about summary refs log tree commit diff
path: root/sysdeps/i386/i686
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686')
-rw-r--r--sysdeps/i386/i686/Makefile12
-rw-r--r--sysdeps/i386/i686/add_n.S110
-rw-r--r--sysdeps/i386/i686/bcopy.S3
-rw-r--r--sysdeps/i386/i686/bzero.S4
-rw-r--r--sysdeps/i386/i686/dl-hash.h79
-rw-r--r--sysdeps/i386/i686/ffs.c48
-rw-r--r--sysdeps/i386/i686/fpu/e_log.S29
-rw-r--r--sysdeps/i386/i686/fpu/e_logf.S30
-rw-r--r--sysdeps/i386/i686/fpu/e_logl.S94
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/Makefile4
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S22
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S325
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_expf.c37
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/libm-test-ulps2188
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name1
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S553
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_cosf.c29
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S586
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sincosf.c30
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S566
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sinf.c28
-rw-r--r--sysdeps/i386/i686/fpu/s_fmax.S39
-rw-r--r--sysdeps/i386/i686/fpu/s_fmaxf.S39
-rw-r--r--sysdeps/i386/i686/fpu/s_fmaxl.S58
-rw-r--r--sysdeps/i386/i686/fpu/s_fmin.S37
-rw-r--r--sysdeps/i386/i686/fpu/s_fminf.S37
-rw-r--r--sysdeps/i386/i686/fpu/s_fminl.S58
-rw-r--r--sysdeps/i386/i686/hp-timing.h42
-rw-r--r--sysdeps/i386/i686/init-arch.h19
-rw-r--r--sysdeps/i386/i686/memcmp.S408
-rw-r--r--sysdeps/i386/i686/memcpy.S98
-rw-r--r--sysdeps/i386/i686/memmove.S120
-rw-r--r--sysdeps/i386/i686/mempcpy.S65
-rw-r--r--sysdeps/i386/i686/memset.S100
-rw-r--r--sysdeps/i386/i686/memusage.h21
-rw-r--r--sysdeps/i386/i686/multiarch/Makefile44
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy.S59
-rw-r--r--sysdeps/i386/i686/multiarch/bzero-sse2-rep.S3
-rw-r--r--sysdeps/i386/i686/multiarch/bzero-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/bzero.S62
-rw-r--r--sysdeps/i386/i686/multiarch/ifunc-impl-list.c376
-rw-r--r--sysdeps/i386/i686/multiarch/locale-defines.sym11
-rw-r--r--sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S502
-rw-r--r--sysdeps/i386/i686/multiarch/memchr-sse2.S709
-rw-r--r--sysdeps/i386/i686/multiarch/memchr.S65
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-sse4.S1225
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-ssse3.S2157
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp.S62
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S681
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S1809
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-ssse3.S3162
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy.S78
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy_chk.S50
-rw-r--r--sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S4
-rw-r--r--sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S4
-rw-r--r--sysdeps/i386/i686/multiarch/memmove-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/memmove.S89
-rw-r--r--sysdeps/i386/i686/multiarch/memmove_chk.S94
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S4
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S4
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy.S81
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy_chk.S50
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr-c.c7
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S417
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr-sse2.S724
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr.S45
-rw-r--r--sysdeps/i386/i686/multiarch/memset-sse2-rep.S811
-rw-r--r--sysdeps/i386/i686/multiarch/memset-sse2.S860
-rw-r--r--sysdeps/i386/i686/multiarch/memset.S75
-rw-r--r--sysdeps/i386/i686/multiarch/memset_chk.S82
-rw-r--r--sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S3
-rw-r--r--sysdeps/i386/i686/multiarch/rawmemchr-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/rawmemchr.S65
-rw-r--r--sysdeps/i386/i686/multiarch/rtld-strnlen.c1
-rw-r--r--sysdeps/i386/i686/multiarch/s_fma-fma.c27
-rw-r--r--sysdeps/i386/i686/multiarch/s_fma.c34
-rw-r--r--sysdeps/i386/i686/multiarch/s_fmaf-fma.c27
-rw-r--r--sysdeps/i386/i686/multiarch/s_fmaf.c34
-rw-r--r--sysdeps/i386/i686/multiarch/sched_cpucount.c1
-rw-r--r--sysdeps/i386/i686/multiarch/stpcpy-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/stpcpy-ssse3.S3
-rw-r--r--sysdeps/i386/i686/multiarch/stpcpy.S9
-rw-r--r--sysdeps/i386/i686/multiarch/stpncpy-sse2.S4
-rw-r--r--sysdeps/i386/i686/multiarch/stpncpy-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/stpncpy.S8
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp-c.c12
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp.S39
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp_l-c.c13
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S2
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S2
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp_l.S7
-rw-r--r--sysdeps/i386/i686/multiarch/strcat-sse2.S1245
-rw-r--r--sysdeps/i386/i686/multiarch/strcat-ssse3.S572
-rw-r--r--sysdeps/i386/i686/multiarch/strcat.S92
-rw-r--r--sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S158
-rw-r--r--sysdeps/i386/i686/multiarch/strchr-sse2.S348
-rw-r--r--sysdeps/i386/i686/multiarch/strchr.S57
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-sse4.S804
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-ssse3.S2810
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp.S95
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy-sse2.S2250
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy-ssse3.S3901
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy.S116
-rw-r--r--sysdeps/i386/i686/multiarch/strcspn-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strcspn.S75
-rw-r--r--sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S125
-rw-r--r--sysdeps/i386/i686/multiarch/strlen-sse2.S695
-rw-r--r--sysdeps/i386/i686/multiarch/strlen.S60
-rw-r--r--sysdeps/i386/i686/multiarch/strncase-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncase.S39
-rw-r--r--sysdeps/i386/i686/multiarch/strncase_l-c.c13
-rw-r--r--sysdeps/i386/i686/multiarch/strncase_l-sse4.S2
-rw-r--r--sysdeps/i386/i686/multiarch/strncase_l-ssse3.S2
-rw-r--r--sysdeps/i386/i686/multiarch/strncase_l.S7
-rw-r--r--sysdeps/i386/i686/multiarch/strncat-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncat-sse2.S4
-rw-r--r--sysdeps/i386/i686/multiarch/strncat-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/strncat.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp-sse4.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp-ssse3.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy-ssse3.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strnlen-c.c10
-rw-r--r--sysdeps/i386/i686/multiarch/strnlen-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strnlen.S37
-rw-r--r--sysdeps/i386/i686/multiarch/strpbrk-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strpbrk.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S282
-rw-r--r--sysdeps/i386/i686/multiarch/strrchr-sse2.S708
-rw-r--r--sysdeps/i386/i686/multiarch/strrchr.S57
-rw-r--r--sysdeps/i386/i686/multiarch/strspn-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strspn.S56
-rw-r--r--sysdeps/i386/i686/multiarch/test-multiarch.c1
-rw-r--r--sysdeps/i386/i686/multiarch/varshift.c1
-rw-r--r--sysdeps/i386/i686/multiarch/varshift.h1
-rw-r--r--sysdeps/i386/i686/multiarch/wcschr-c.c22
-rw-r--r--sysdeps/i386/i686/multiarch/wcschr-sse2.S219
-rw-r--r--sysdeps/i386/i686/multiarch/wcschr.S36
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp-c.c14
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp-sse2.S1018
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp.S39
-rw-r--r--sysdeps/i386/i686/multiarch/wcscpy-c.c5
-rw-r--r--sysdeps/i386/i686/multiarch/wcscpy-ssse3.S600
-rw-r--r--sysdeps/i386/i686/multiarch/wcscpy.S36
-rw-r--r--sysdeps/i386/i686/multiarch/wcslen-c.c9
-rw-r--r--sysdeps/i386/i686/multiarch/wcslen-sse2.S193
-rw-r--r--sysdeps/i386/i686/multiarch/wcslen.S37
-rw-r--r--sysdeps/i386/i686/multiarch/wcsrchr-c.c5
-rw-r--r--sysdeps/i386/i686/multiarch/wcsrchr-sse2.S354
-rw-r--r--sysdeps/i386/i686/multiarch/wcsrchr.S35
-rw-r--r--sysdeps/i386/i686/multiarch/wmemcmp-c.c9
-rw-r--r--sysdeps/i386/i686/multiarch/wmemcmp-sse4.S4
-rw-r--r--sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/wmemcmp.S40
-rw-r--r--sysdeps/i386/i686/nptl/tls.h35
-rw-r--r--sysdeps/i386/i686/pthread_spin_trylock.S20
-rw-r--r--sysdeps/i386/i686/stack-aliasing.h23
-rw-r--r--sysdeps/i386/i686/strcmp.S52
-rw-r--r--sysdeps/i386/i686/tst-stack-align.h44
167 files changed, 0 insertions, 38206 deletions
diff --git a/sysdeps/i386/i686/Makefile b/sysdeps/i386/i686/Makefile
deleted file mode 100644
index 311042787b..0000000000
--- a/sysdeps/i386/i686/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# So that we can test __m128's alignment
-stack-align-test-flags += -msse
-
-CFLAGS-.o += -Wa,-mtune=i686
-CFLAGS-.os += -Wa,-mtune=i686
-CFLAGS-.op += -Wa,-mtune=i686
-CFLAGS-.oS += -Wa,-mtune=i686
-
-ASFLAGS-.o += -Wa,-mtune=i686
-ASFLAGS-.os += -Wa,-mtune=i686
-ASFLAGS-.op += -Wa,-mtune=i686
-ASFLAGS-.oS += -Wa,-mtune=i686
diff --git a/sysdeps/i386/i686/add_n.S b/sysdeps/i386/i686/add_n.S
deleted file mode 100644
index 4afa648ceb..0000000000
--- a/sysdeps/i386/i686/add_n.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Add two limb vectors of the same length > 0 and store sum in a third
-   limb vector.
-   Copyright (C) 1992-2017 Free Software Foundation, Inc.
-   This file is part of the GNU MP Library.
-
-   The GNU MP Library is free software; you can redistribute it and/or modify
-   it under the terms of the GNU Lesser General Public License as published by
-   the Free Software Foundation; either version 2.1 of the License, or (at your
-   option) any later version.
-
-   The GNU MP Library is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-   License for more details.
-
-   You should have received a copy of the GNU Lesser General Public License
-   along with the GNU MP Library; see the file COPYING.LIB.  If not,
-   see <http://www.gnu.org/licenses/>.  */
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-#define PARMS	4+8		/* space for 2 saved regs */
-#define RES	PARMS
-#define S1	RES+4
-#define S2	S1+4
-#define SIZE	S2+4
-
-	.text
-#ifdef PIC
-L(1):	addl    (%esp), %eax
-	ret
-#endif
-ENTRY (__mpn_add_n)
-
-	pushl %edi
-	cfi_adjust_cfa_offset (4)
-	pushl %esi
-	cfi_adjust_cfa_offset (4)
-
-	movl	RES(%esp),%edi
-	cfi_rel_offset (edi, 4)
-	movl	S1(%esp),%esi
-	cfi_rel_offset (esi, 0)
-	movl	S2(%esp),%edx
-	movl	SIZE(%esp),%ecx
-	movl	%ecx,%eax
-	shrl	$3,%ecx			/* compute count for unrolled loop */
-	negl	%eax
-	andl	$7,%eax			/* get index where to start loop */
-	jz	L(oop)			/* necessary special case for 0 */
-	incl	%ecx			/* adjust loop count */
-	shll	$2,%eax			/* adjustment for pointers... */
-	subl	%eax,%edi		/* ... since they are offset ... */
-	subl	%eax,%esi		/* ... by a constant when we ... */
-	subl	%eax,%edx		/* ... enter the loop */
-	shrl	$2,%eax			/* restore previous value */
-#ifdef PIC
-/* Calculate start address in loop for PIC.  */
-	leal	(L(oop)-L(0)-3)(%eax,%eax,8),%eax
-	call	L(1)
-L(0):
-#else
-/* Calculate start address in loop for non-PIC.  */
- 	leal	(L(oop) - 3)(%eax,%eax,8),%eax
-#endif
-	jmp	*%eax			/* jump into loop */
-	ALIGN (3)
-L(oop):	movl	(%esi),%eax
-	adcl	(%edx),%eax
-	movl	%eax,(%edi)
-	movl	4(%esi),%eax
-	adcl	4(%edx),%eax
-	movl	%eax,4(%edi)
-	movl	8(%esi),%eax
-	adcl	8(%edx),%eax
-	movl	%eax,8(%edi)
-	movl	12(%esi),%eax
-	adcl	12(%edx),%eax
-	movl	%eax,12(%edi)
-	movl	16(%esi),%eax
-	adcl	16(%edx),%eax
-	movl	%eax,16(%edi)
-	movl	20(%esi),%eax
-	adcl	20(%edx),%eax
-	movl	%eax,20(%edi)
-	movl	24(%esi),%eax
-	adcl	24(%edx),%eax
-	movl	%eax,24(%edi)
-	movl	28(%esi),%eax
-	adcl	28(%edx),%eax
-	movl	%eax,28(%edi)
-	leal	32(%edi),%edi
-	leal	32(%esi),%esi
-	leal	32(%edx),%edx
-	decl	%ecx
-	jnz	L(oop)
-
-	sbbl	%eax,%eax
-	negl	%eax
-
-	popl %esi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (esi)
-	popl %edi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (edi)
-
-	ret
-END (__mpn_add_n)
diff --git a/sysdeps/i386/i686/bcopy.S b/sysdeps/i386/i686/bcopy.S
deleted file mode 100644
index 15ef9419a4..0000000000
--- a/sysdeps/i386/i686/bcopy.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_BCOPY
-#define memmove bcopy
-#include <sysdeps/i386/i686/memmove.S>
diff --git a/sysdeps/i386/i686/bzero.S b/sysdeps/i386/i686/bzero.S
deleted file mode 100644
index c7898f18e0..0000000000
--- a/sysdeps/i386/i686/bzero.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_BZERO
-#define memset __bzero
-#include <sysdeps/i386/i686/memset.S>
-weak_alias (__bzero, bzero)
diff --git a/sysdeps/i386/i686/dl-hash.h b/sysdeps/i386/i686/dl-hash.h
deleted file mode 100644
index ceda785b32..0000000000
--- a/sysdeps/i386/i686/dl-hash.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Compute hash alue for given string according to ELF standard.
-   Copyright (C) 1998-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _DL_HASH_H
-#define _DL_HASH_H	1
-
-
-/* This is the hashing function specified by the ELF ABI.  It is highly
-   optimized for the PII processors.  Though it will run on i586 it
-   would be much slower than the generic C implementation.  So don't
-   use it.  */
-static unsigned int
-__attribute__ ((unused))
-_dl_elf_hash (const char *name)
-{
-  unsigned int result;
-  unsigned int temp0;
-  unsigned int temp1;
-
-  __asm__ __volatile__
-    ("movzbl (%1),%2\n\t"
-     "testl %2, %2\n\t"
-     "jz 1f\n\t"
-     "movl %2, %0\n\t"
-     "movzbl 1(%1), %2\n\t"
-     "jecxz 1f\n\t"
-     "shll $4, %0\n\t"
-     "addl %2, %0\n\t"
-     "movzbl 2(%1), %2\n\t"
-     "jecxz 1f\n\t"
-     "shll $4, %0\n\t"
-     "addl %2, %0\n\t"
-     "movzbl 3(%1), %2\n\t"
-     "jecxz 1f\n\t"
-     "shll $4, %0\n\t"
-     "addl %2, %0\n\t"
-     "movzbl 4(%1), %2\n\t"
-     "jecxz 1f\n\t"
-     "shll $4, %0\n\t"
-     "addl $5, %1\n\t"
-     "addl %2, %0\n\t"
-     "movzbl (%1), %2\n\t"
-     "jecxz 1f\n"
-     "2:\t"
-     "shll $4, %0\n\t"
-     "movl $0xf0000000, %3\n\t"
-     "incl %1\n\t"
-     "addl %2, %0\n\t"
-     "andl %0, %3\n\t"
-     "andl $0x0fffffff, %0\n\t"
-     "shrl $24, %3\n\t"
-     "movzbl (%1), %2\n\t"
-     "xorl %3, %0\n\t"
-     "testl %2, %2\n\t"
-     "jnz 2b\n"
-     "1:\t"
-     : "=&r" (result), "=r" (name), "=&c" (temp0), "=&r" (temp1)
-     : "0" (0), "1" ((const unsigned char *) name));
-
-  return result;
-}
-
-#endif /* dl-hash.h */
diff --git a/sysdeps/i386/i686/ffs.c b/sysdeps/i386/i686/ffs.c
deleted file mode 100644
index cbe36ff873..0000000000
--- a/sysdeps/i386/i686/ffs.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* ffs -- find first set bit in a word, counted from least significant end.
-   For Intel 80x86, x>=6.
-   This file is part of the GNU C Library.
-   Copyright (C) 1991-2017 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define ffsl __something_else
-#include <string.h>
-
-#undef	ffs
-
-#ifdef	__GNUC__
-
-int
-__ffs (int x)
-{
-  int cnt;
-  int tmp;
-
-  asm ("bsfl %2,%0\n"		/* Count low bits in X and store in %1.  */
-       "cmovel %1,%0\n"		/* If number was zero, use -1 as result.  */
-       : "=&r" (cnt), "=r" (tmp) : "rm" (x), "1" (-1));
-
-  return cnt + 1;
-}
-weak_alias (__ffs, ffs)
-libc_hidden_def (__ffs)
-libc_hidden_builtin_def (ffs)
-#undef ffsl
-weak_alias (__ffs, ffsl)
-
-#else
-#include <string/ffs.c>
-#endif
diff --git a/sysdeps/i386/i686/fpu/e_log.S b/sysdeps/i386/i686/fpu/e_log.S
deleted file mode 100644
index 73060b088c..0000000000
--- a/sysdeps/i386/i686/fpu/e_log.S
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- *
- * Adapted for i686 instructions.
- */
-
-#include <machine/asm.h>
-
-
-	.text
-ENTRY(__ieee754_log)
-	fldln2			// log(2)
-	fldl	4(%esp)		// x : log(2)
-	fucomi	%st
-	jp	3f
-	fyl2x			// log(x)
-	ret
-
-3:	fstp	%st(1)
-	ret
-END (__ieee754_log)
-
-ENTRY(__log_finite)
-	fldln2			// log(2)
-	fldl	4(%esp)		// x : log(2)
-	fyl2x			// log(x)
-	ret
-END(__log_finite)
diff --git a/sysdeps/i386/i686/fpu/e_logf.S b/sysdeps/i386/i686/fpu/e_logf.S
deleted file mode 100644
index 6fd39d50d3..0000000000
--- a/sysdeps/i386/i686/fpu/e_logf.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- * Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
- *
- * Adapted for i686 instructions.
- */
-
-#include <machine/asm.h>
-
-
-	.text
-ENTRY(__ieee754_logf)
-	fldln2			// log(2)
-	flds	4(%esp)		// x : log(2)
-	fucomi	%st
-	jp	3f
-	fyl2x			// log(x)
-	ret
-
-3:	fstp	%st(1)
-	ret
-END (__ieee754_logf)
-
-ENTRY(__logf_finite)
-	fldln2			// log(2)
-	flds	4(%esp)		// x : log(2)
-	fyl2x			// log(x)
-	ret
-END(__logf_finite)
diff --git a/sysdeps/i386/i686/fpu/e_logl.S b/sysdeps/i386/i686/fpu/e_logl.S
deleted file mode 100644
index 7e3bc8d817..0000000000
--- a/sysdeps/i386/i686/fpu/e_logl.S
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- *
- * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
- * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
- * Adapted for i686 instructions.
- */
-
-#include <machine/asm.h>
-
-	.section .rodata.cst8,"aM",@progbits,8
-
-	.p2align 3
-	.type one,@object
-one:	.double 1.0
-	ASM_SIZE_DIRECTIVE(one)
-	/* It is not important that this constant is precise.  It is only
-	   a value which is known to be on the safe side for using the
-	   fyl2xp1 instruction.  */
-	.type limit,@object
-limit:	.double 0.29
-	ASM_SIZE_DIRECTIVE(limit)
-
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%edx)
-#else
-# define MO(op) op
-#endif
-
-	.text
-ENTRY(__ieee754_logl)
-	fldln2			// log(2)
-	fldt	4(%esp)		// x : log(2)
-	fucomi	%st
-	jp	3f
-#ifdef PIC
-	LOAD_PIC_REG (dx)
-#endif
-	fld	%st		// x : x : log(2)
-	movzwl	4+8(%esp), %eax
-	cmpl	$0xc000, %eax
-	jae	5f		// x <= -2, avoid overflow from -LDBL_MAX - 1.
-	fsubl	MO(one)		// x-1 : x : log(2)
-5:	fld	%st		// x-1 : x-1 : x : log(2)
-	fabs			// |x-1| : x-1 : x : log(2)
-	fld	MO(limit)	// 0.29 : |x-1| : x-1 : x : log(2)
-	fcomip	%st(1)		// |x-1| : x-1 : x : log(2)
-	fstp	%st(0)		// x-1 : x : log(2)
-	jc	2f
-	fxam
-	fnstsw
-	andb	$0x45, %ah
-	cmpb	$0x40, %ah
-	jne	4f
-	fabs			// log(1) is +0 in all rounding modes.
-4:	fstp	%st(1)		// x-1 : log(2)
-	fyl2xp1			// log(x)
-	ret
-
-2:	fstp	%st(0)		// x : log(2)
-	fyl2x			// log(x)
-	ret
-
-3:	fstp	%st(1)
-	fadd	%st(0)
-	ret
-END (__ieee754_logl)
-
-ENTRY(__logl_finite)
-	fldln2			// log(2)
-	fldt	4(%esp)		// x : log(2)
-#ifdef PIC
-	LOAD_PIC_REG (dx)
-#endif
-	fld	%st		// x : x : log(2)
-	fsubl	MO(one)		// x-1 : x : log(2)
-	fld	%st		// x-1 : x-1 : x : log(2)
-	fabs			// |x-1| : x-1 : x : log(2)
-	fld	MO(limit)	// 0.29 : |x-1| : x-1 : x : log(2)
-	fcomip	%st(1)		// |x-1| : x-1 : x : log(2)
-	fstp	%st(0)		// x-1 : x : log(2)
-	jc	2b
-	fxam
-	fnstsw
-	andb	$0x45, %ah
-	cmpb	$0x40, %ah
-	jne	6f
-	fabs			// log(1) is +0 in all rounding modes.
-6:	fstp	%st(1)		// x-1 : log(2)
-	fyl2xp1			// log(x)
-	ret
-END(__logl_finite)
diff --git a/sysdeps/i386/i686/fpu/multiarch/Makefile b/sysdeps/i386/i686/fpu/multiarch/Makefile
deleted file mode 100644
index 7d9089232f..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-ifeq ($(subdir),math)
-libm-sysdep_routines += e_expf-sse2 e_expf-ia32 s_sinf-sse2 s_cosf-sse2 \
-                        s_sincosf-sse2
-endif
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S b/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S
deleted file mode 100644
index b486b4d1ca..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define __ieee754_expf __ieee754_expf_ia32
-#define __expf_finite __expf_finite_ia32
-
-#include <sysdeps/i386/fpu/e_expf.S>
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S
deleted file mode 100644
index e6bb6fa289..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S
+++ /dev/null
@@ -1,325 +0,0 @@
-/* SSE2 version of __ieee754_expf and __expf_finite
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#include <sysdep.h>
-
-/* Short algorithm description:
- *
- *  Let K = 64 (table size).
- *       e^x  = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y))
- *  where
- *       x = m*log(2)/K + y,    y in [0.0..log(2)/K]
- *       m = n*K + j,           m,n,j - signed integer, j in [0..K-1]
- *       values of 2^(j/K) are tabulated as T[j].
- *
- *       P(y) is a minimax polynomial approximation of expf(x)-1
- *       on small interval [0.0..log(2)/K].
- *
- *       P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as
- *       z = y*y;    P(y) = (P3*z + P1)*z + (P2*z + P0)*y
- *
- * Special cases:
- *  __ieee754_expf_sse2(NaN) = NaN
- *  __ieee754_expf_sse2(+INF) = +INF
- *  __ieee754_expf_sse2(-INF) = 0
- *  __ieee754_expf_sse2(x) = 1 for subnormals
- *  for finite argument, only __ieee754_expf_sse2(0)=1 is exact
- *  __ieee754_expf_sse2(x) overflows if x>700
- *  __ieee754_expf_sse2(x) underflows if x<-700
- *
- * Note:
- *  For |x|<700, __ieee754_expf_sse2 computes result in double precision,
- *  with accuracy a bit more than needed for expf, and does not round it
- *  to single precision.
- */
-
-
-#ifdef	PIC
-# define MO1(symbol)			L(symbol)##@GOTOFF(%edx)
-# define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%edx,reg2,_scale)
-#else
-# define MO1(symbol)			L(symbol)
-# define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
-#endif
-
-	.text
-ENTRY(__ieee754_expf_sse2)
-	/* Input: single precision x on stack at address 4(%esp) */
-
-#ifdef	PIC
-	LOAD_PIC_REG(dx)
-#endif
-
-	cvtss2sd	4(%esp), %xmm1	/* Convert x to double precision */
-	mov	4(%esp), %ecx		/* Copy x */
-	movsd	MO1(DP_KLN2), %xmm2	/* DP K/log(2) */
-	movsd	MO1(DP_P2), %xmm3	/* DP P2 */
-	movl	%ecx, %eax		/* x */
-	mulsd	%xmm1, %xmm2		/* DP x*K/log(2) */
-	andl	$0x7fffffff, %ecx	/* |x| */
-	cmpl	$0x442f0000, %ecx	/* |x|<700 ? */
-	movsd	MO1(DP_P3), %xmm4	/* DP P3 */
-	addsd	MO1(DP_RS), %xmm2	/* DP x*K/log(2)+RS */
-	jae	L(special_paths)
-
-	/* Here if |x|<700 */
-	cmpl	$0x31800000, %ecx	/* |x|<2^(-28) ? */
-	jb	L(small_arg)
-
-	/* Main path: here if 2^(-28)<=|x|<700 */
-	cvtsd2ss	%xmm2, %xmm2	/* SP x*K/log(2)+RS */
-	movd	%xmm2, %eax		/* bits of n*K+j with trash */
-	subss	MO1(SP_RS), %xmm2	/* SP t=round(x*K/log(2)) */
-	movl	%eax, %ecx		/* n*K+j with trash */
-	cvtss2sd	%xmm2, %xmm2	/* DP t */
-	andl	$0x3f, %eax		/* bits of j */
-	mulsd	MO1(DP_NLN2K), %xmm2	/* DP -t*log(2)/K */
-	andl	$0xffffffc0, %ecx	/* bits of n */
-#ifdef __AVX__
-	vaddsd	%xmm1, %xmm2, %xmm0	/* DP y=x-t*log(2)/K */
-	vmulsd	%xmm0, %xmm0, %xmm2	/* DP z=y*y */
-#else
-	addsd	%xmm1, %xmm2		/* DP y=x-t*log(2)/K */
-	movaps	%xmm2, %xmm0		/* DP y */
-	mulsd	%xmm2, %xmm2		/* DP z=y*y */
-#endif
-	mulsd	%xmm2, %xmm4		/* DP P3*z */
-	addl	$0xffc0, %ecx		/* bits of n + DP exponent bias */
-	mulsd	%xmm2, %xmm3		/* DP P2*z */
-	shrl	$2, %ecx		/* High 2 bytes of DP 2^n */
-	pxor	%xmm1, %xmm1		/* clear %xmm1 */
-	addsd	MO1(DP_P1), %xmm4	/* DP P3*z+P1 */
-	addsd	MO1(DP_P0), %xmm3	/* DP P2*z+P0 */
-	pinsrw	$3, %ecx, %xmm1		/* DP 2^n */
-	mulsd	%xmm2, %xmm4		/* DP (P3*z+P1)*z */
-	mulsd	%xmm3, %xmm0		/* DP (P2*z+P0)*y */
-	addsd	%xmm4, %xmm0		/* DP P(y) */
-	mulsd	MO2(DP_T,%eax,8), %xmm0	/* DP P(y)*T[j] */
-	addsd	MO2(DP_T,%eax,8), %xmm0	/* DP T[j]*(P(y)+1) */
-	mulsd	%xmm1, %xmm0		/* DP result=2^n*(T[j]*(P(y)+1)) */
-	cvtsd2ss	%xmm0, %xmm1
-
-	lea	-4(%esp), %esp		/* Borrow 4 bytes of stack frame */
-	movss	%xmm1, 0(%esp)		/* Move result from sse... */
-	flds	0(%esp)			/* ...to FPU. */
-	lea	4(%esp), %esp		/* Return back 4 bytes of stack frame */
-	ret
-
-	.p2align	4
-L(small_arg):
-	/* Here if 0<=|x|<2^(-28) */
-	movss	4(%esp), %xmm0		/* load x */
-	addss	MO1(SP_ONE), %xmm0	/* 1.0 + x */
-	/* Return 1.0 with inexact raised, except for x==0 */
-	jmp	L(epilogue)
-
-	.p2align	4
-L(special_paths):
-	/* Here if x is NaN, or Inf, or finite |x|>=700 */
-	movss	4(%esp), %xmm0		/* load x */
-
-	cmpl	$0x7f800000, %ecx	/* |x| is finite ? */
-	jae	L(arg_inf_or_nan)
-
-	/* Here if finite |x|>=700 */
-	testl	$0x80000000, %eax	/* sign of x nonzero ? */
-	je	L(res_overflow)
-
-	/* Here if finite x<=-700 */
-	movss	MO1(SP_SMALL), %xmm0	/* load small value 2^(-100) */
-	mulss	%xmm0, %xmm0		/* Return underflowed result (zero or subnormal) */
-	jmp	L(epilogue)
-
-	.p2align	4
-L(res_overflow):
-	/* Here if finite x>=700 */
-	movss	MO1(SP_LARGE), %xmm0	/* load large value 2^100 */
-	mulss	%xmm0, %xmm0		/* Return overflowed result (Inf or max normal) */
-	jmp	L(epilogue)
-
-	.p2align	4
-L(arg_inf_or_nan):
-	/* Here if |x| is Inf or NAN */
-	jne	L(arg_nan)	/* |x| is Inf ? */
-
-	/* Here if |x| is Inf */
-	shrl	$31, %eax		/* Get sign bit of x */
-	movss	MO2(SP_INF_0,%eax,4), %xmm0/* return zero or Inf, depending on sign of x */
-	jmp	L(epilogue)
-
-	.p2align	4
-L(arg_nan):
-	/* Here if |x| is NaN */
-	addss	%xmm0, %xmm0		/* Return x+x (raise invalid) */
-
-	.p2align	4
-L(epilogue):
-	lea	-4(%esp), %esp		/* Borrow 4 bytes of stack frame */
-	movss	%xmm0, 0(%esp)		/* Move result from sse... */
-	flds	0(%esp)			/* ...to FPU. */
-	lea	4(%esp), %esp		/* Return back 4 bytes of stack frame */
-	ret
-END(__ieee754_expf_sse2)
-
-	.section .rodata, "a"
-	.p2align 3
-L(DP_T): /* table of double precision values 2^(j/K) for j=[0..K-1] */
-	.long	0x00000000, 0x3ff00000
-	.long	0x3e778061, 0x3ff02c9a
-	.long	0xd3158574, 0x3ff059b0
-	.long	0x18759bc8, 0x3ff08745
-	.long	0x6cf9890f, 0x3ff0b558
-	.long	0x32d3d1a2, 0x3ff0e3ec
-	.long	0xd0125b51, 0x3ff11301
-	.long	0xaea92de0, 0x3ff1429a
-	.long	0x3c7d517b, 0x3ff172b8
-	.long	0xeb6fcb75, 0x3ff1a35b
-	.long	0x3168b9aa, 0x3ff1d487
-	.long	0x88628cd6, 0x3ff2063b
-	.long	0x6e756238, 0x3ff2387a
-	.long	0x65e27cdd, 0x3ff26b45
-	.long	0xf51fdee1, 0x3ff29e9d
-	.long	0xa6e4030b, 0x3ff2d285
-	.long	0x0a31b715, 0x3ff306fe
-	.long	0xb26416ff, 0x3ff33c08
-	.long	0x373aa9cb, 0x3ff371a7
-	.long	0x34e59ff7, 0x3ff3a7db
-	.long	0x4c123422, 0x3ff3dea6
-	.long	0x21f72e2a, 0x3ff4160a
-	.long	0x6061892d, 0x3ff44e08
-	.long	0xb5c13cd0, 0x3ff486a2
-	.long	0xd5362a27, 0x3ff4bfda
-	.long	0x769d2ca7, 0x3ff4f9b2
-	.long	0x569d4f82, 0x3ff5342b
-	.long	0x36b527da, 0x3ff56f47
-	.long	0xdd485429, 0x3ff5ab07
-	.long	0x15ad2148, 0x3ff5e76f
-	.long	0xb03a5585, 0x3ff6247e
-	.long	0x82552225, 0x3ff66238
-	.long	0x667f3bcd, 0x3ff6a09e
-	.long	0x3c651a2f, 0x3ff6dfb2
-	.long	0xe8ec5f74, 0x3ff71f75
-	.long	0x564267c9, 0x3ff75feb
-	.long	0x73eb0187, 0x3ff7a114
-	.long	0x36cf4e62, 0x3ff7e2f3
-	.long	0x994cce13, 0x3ff82589
-	.long	0x9b4492ed, 0x3ff868d9
-	.long	0x422aa0db, 0x3ff8ace5
-	.long	0x99157736, 0x3ff8f1ae
-	.long	0xb0cdc5e5, 0x3ff93737
-	.long	0x9fde4e50, 0x3ff97d82
-	.long	0x82a3f090, 0x3ff9c491
-	.long	0x7b5de565, 0x3ffa0c66
-	.long	0xb23e255d, 0x3ffa5503
-	.long	0x5579fdbf, 0x3ffa9e6b
-	.long	0x995ad3ad, 0x3ffae89f
-	.long	0xb84f15fb, 0x3ffb33a2
-	.long	0xf2fb5e47, 0x3ffb7f76
-	.long	0x904bc1d2, 0x3ffbcc1e
-	.long	0xdd85529c, 0x3ffc199b
-	.long	0x2e57d14b, 0x3ffc67f1
-	.long	0xdcef9069, 0x3ffcb720
-	.long	0x4a07897c, 0x3ffd072d
-	.long	0xdcfba487, 0x3ffd5818
-	.long	0x03db3285, 0x3ffda9e6
-	.long	0x337b9b5f, 0x3ffdfc97
-	.long	0xe78b3ff6, 0x3ffe502e
-	.long	0xa2a490da, 0x3ffea4af
-	.long	0xee615a27, 0x3ffefa1b
-	.long	0x5b6e4540, 0x3fff5076
-	.long	0x819e90d8, 0x3fffa7c1
-	.type L(DP_T), @object
-	ASM_SIZE_DIRECTIVE(L(DP_T))
-
-	.section .rodata.cst8,"aM",@progbits,8
-	.p2align 3
-L(DP_KLN2): /* double precision K/log(2) */
-	.long	0x652b82fe, 0x40571547
-	.type L(DP_KLN2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_KLN2))
-
-	.p2align 3
-L(DP_NLN2K): /* double precision -log(2)/K */
-	.long	0xfefa39ef, 0xbf862e42
-	.type L(DP_NLN2K), @object
-	ASM_SIZE_DIRECTIVE(L(DP_NLN2K))
-
-	.p2align 3
-L(DP_RS): /* double precision 2^23+2^22 */
-	.long	0x00000000, 0x41680000
-	.type L(DP_RS), @object
-	ASM_SIZE_DIRECTIVE(L(DP_RS))
-
-	.p2align 3
-L(DP_P3): /* double precision polynomial coefficient P3 */
-	.long	0xeb78fa85, 0x3fa56420
-	.type L(DP_P3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_P3))
-
-	.p2align 3
-L(DP_P1): /* double precision polynomial coefficient P1 */
-	.long	0x008d6118, 0x3fe00000
-	.type L(DP_P1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_P1))
-
-	.p2align 3
-L(DP_P2): /* double precision polynomial coefficient P2 */
-	.long	0xda752d4f, 0x3fc55550
-	.type L(DP_P2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_P2))
-
-	.p2align 3
-L(DP_P0): /* double precision polynomial coefficient P0 */
-	.long	0xffffe7c6, 0x3fefffff
-	.type L(DP_P0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_P0))
-
-	.p2align 2
-L(SP_INF_0):
-	.long	0x7f800000	/* single precision Inf */
-	.long	0		/* single precision zero */
-	.type L(SP_INF_0), @object
-	ASM_SIZE_DIRECTIVE(L(SP_INF_0))
-
-	.section .rodata.cst4,"aM",@progbits,4
-	.p2align 2
-L(SP_RS): /* single precision 2^23+2^22 */
-	.long	0x4b400000
-	.type L(SP_RS), @object
-	ASM_SIZE_DIRECTIVE(L(SP_RS))
-
-	.p2align 2
-L(SP_SMALL): /* single precision small value 2^(-100) */
-	.long	0x0d800000
-	.type L(SP_SMALL), @object
-	ASM_SIZE_DIRECTIVE(L(SP_SMALL))
-
-	.p2align 2
-L(SP_LARGE): /* single precision large value 2^100 */
-	.long	0x71800000
-	.type L(SP_LARGE), @object
-	ASM_SIZE_DIRECTIVE(L(SP_LARGE))
-
-	.p2align 2
-L(SP_ONE): /* single precision 1.0 */
-	.long	0x3f800000
-	.type L(SP_ONE), @object
-	ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-strong_alias (__ieee754_expf_sse2, __expf_finite_sse2)
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf.c b/sysdeps/i386/i686/fpu/multiarch/e_expf.c
deleted file mode 100644
index 388cf98a39..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/e_expf.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of expf
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <init-arch.h>
-
-extern double __ieee754_expf_sse2 (double);
-extern double __ieee754_expf_ia32 (double);
-
-double __ieee754_expf (double);
-libm_ifunc (__ieee754_expf,
-	    HAS_CPU_FEATURE (SSE2)
-	    ? __ieee754_expf_sse2
-	    : __ieee754_expf_ia32);
-
-extern double __expf_finite_sse2 (double);
-extern double __expf_finite_ia32 (double);
-
-double __expf_finite (double);
-libm_ifunc (__expf_finite,
-	    HAS_CPU_FEATURE (SSE2)
-	    ? __expf_finite_sse2
-	    : __expf_finite_ia32);
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
deleted file mode 100644
index 04bc23b37b..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ /dev/null
@@ -1,2188 +0,0 @@
-# Begin of automatic generation
-
-# Maximal error of functions:
-Function: "acos":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "acos_downward":
-ildouble: 2
-ldouble: 2
-
-Function: "acos_towardzero":
-ildouble: 2
-ldouble: 2
-
-Function: "acos_upward":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "acosh":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 2
-
-Function: "acosh_downward":
-double: 1
-idouble: 1
-ildouble: 6
-ldouble: 4
-
-Function: "acosh_towardzero":
-double: 1
-idouble: 1
-ildouble: 6
-ldouble: 4
-
-Function: "acosh_upward":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 3
-
-Function: "asin":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "asin_downward":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "asin_towardzero":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "asin_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "asinh":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "asinh_downward":
-double: 1
-float: 1
-idouble: 1
-ildouble: 5
-ldouble: 5
-
-Function: "asinh_towardzero":
-double: 1
-float: 1
-idouble: 1
-ildouble: 4
-ldouble: 4
-
-Function: "asinh_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: "atan":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan2":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan2_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan2_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan2_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atanh":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "atanh_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 4
-
-Function: "atanh_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 3
-
-Function: "atanh_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: "cabs":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cabs_downward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cabs_towardzero":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cabs_upward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "cacos":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "cacos":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "cacos_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cacos_downward":
-double: 5
-float: 3
-idouble: 5
-ifloat: 3
-ildouble: 6
-ldouble: 6
-
-Function: Real part of "cacos_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cacos_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Real part of "cacos_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cacos_upward":
-double: 7
-float: 7
-idouble: 7
-ifloat: 7
-ildouble: 7
-ldouble: 7
-
-Function: Real part of "cacosh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cacosh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "cacosh_downward":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "cacosh_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "cacosh_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "cacosh_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "cacosh_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "cacosh_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "carg":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "carg_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "carg_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "carg_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "casin":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "casin":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "casin_downward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "casin_downward":
-double: 5
-float: 3
-idouble: 5
-ifloat: 3
-ildouble: 6
-ldouble: 6
-
-Function: Real part of "casin_towardzero":
-double: 3
-float: 1
-idouble: 3
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "casin_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Real part of "casin_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "casin_upward":
-double: 7
-float: 7
-idouble: 7
-ifloat: 7
-ildouble: 7
-ldouble: 7
-
-Function: Real part of "casinh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "casinh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "casinh_downward":
-double: 5
-float: 3
-idouble: 5
-ifloat: 3
-ildouble: 6
-ldouble: 6
-
-Function: Imaginary part of "casinh_downward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "casinh_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "casinh_towardzero":
-double: 3
-float: 1
-idouble: 3
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "casinh_upward":
-double: 7
-float: 7
-idouble: 7
-ifloat: 7
-ildouble: 7
-ldouble: 7
-
-Function: Imaginary part of "casinh_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "catan":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catan":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "catan_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catan_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "catan_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catan_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "catan_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catan_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "catanh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catanh":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "catanh_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "catanh_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "catanh_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "catanh_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "catanh_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "catanh_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cbrt":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "cbrt_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "cbrt_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "cbrt_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ccos":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "ccos":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "ccos_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ccos_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ccos_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ccos_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ccos_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "ccos_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "ccosh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "ccosh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "ccosh_downward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ccosh_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ccosh_towardzero":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ccosh_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ccosh_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "ccosh_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "cexp":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "cexp":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "cexp_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "cexp_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "cexp_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "cexp_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "cexp_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cexp_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "clog":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "clog":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "clog10":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "clog10":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "clog10_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 8
-ldouble: 8
-
-Function: Imaginary part of "clog10_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "clog10_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 8
-ldouble: 8
-
-Function: Imaginary part of "clog10_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "clog10_upward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 7
-ldouble: 7
-
-Function: Imaginary part of "clog10_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "clog_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "clog_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "clog_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "clog_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "clog_upward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "clog_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cos":
-ildouble: 1
-ldouble: 1
-
-Function: "cos_downward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "cos_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "cos_upward":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "cosh":
-double: 1
-float: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "cosh_downward":
-double: 2
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 3
-
-Function: "cosh_towardzero":
-double: 2
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "cosh_upward":
-double: 4
-float: 2
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 3
-
-Function: Real part of "cpow":
-double: 2
-float: 5
-idouble: 2
-ifloat: 5
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "cpow":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "cpow_downward":
-double: 5
-float: 8
-idouble: 5
-ifloat: 8
-ildouble: 7
-ldouble: 7
-
-Function: Imaginary part of "cpow_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "cpow_towardzero":
-double: 5
-float: 8
-idouble: 5
-ifloat: 8
-ildouble: 7
-ldouble: 7
-
-Function: Imaginary part of "cpow_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "cpow_upward":
-double: 4
-float: 1
-idouble: 4
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cpow_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "csin":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "csin":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-
-Function: Real part of "csin_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csin_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csin_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csin_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csin_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csin_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csinh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "csinh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "csinh_downward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csinh_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csinh_towardzero":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csinh_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csinh_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csinh_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csqrt":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "csqrt":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "csqrt_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "csqrt_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "csqrt_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "csqrt_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "csqrt_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "csqrt_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ctan":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "ctan":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "ctan_downward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "ctan_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ctan_towardzero":
-double: 3
-float: 1
-idouble: 3
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "ctan_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ctan_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ctan_upward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ctanh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "ctanh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "ctanh_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "ctanh_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ctanh_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "ctanh_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ctanh_upward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ctanh_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "erf":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "erf_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "erf_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "erf_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "erfc":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "erfc_downward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "erfc_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: "erfc_upward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "exp":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp10":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp10_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "exp10_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "exp10_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "exp2":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp2_downward":
-ildouble: 1
-ldouble: 1
-
-Function: "exp2_towardzero":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp2_upward":
-ildouble: 1
-ldouble: 1
-
-Function: "exp_downward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "exp_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "expm1":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "expm1_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "expm1_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "expm1_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "gamma":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "gamma_downward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 7
-ldouble: 7
-
-Function: "gamma_towardzero":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 7
-ldouble: 7
-
-Function: "gamma_upward":
-double: 3
-float: 4
-idouble: 3
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "hypot":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "hypot_downward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "hypot_towardzero":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "hypot_upward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "j0":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "j0_downward":
-double: 1
-float: 3
-idouble: 1
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "j0_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: "j0_upward":
-double: 1
-float: 3
-idouble: 1
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "j1":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "j1_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: "j1_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "j1_upward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: "jn":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "jn_downward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "jn_towardzero":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "jn_upward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "lgamma":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "lgamma_downward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 7
-ldouble: 7
-
-Function: "lgamma_towardzero":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 7
-ldouble: 7
-
-Function: "lgamma_upward":
-double: 3
-float: 4
-idouble: 3
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "log":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log10":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log10_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log10_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log10_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log1p":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log1p_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "log1p_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "log1p_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "log2":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log2_downward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log2_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log2_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log_downward":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log_upward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "pow":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "pow10":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "pow10_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow10_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow10_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow_downward":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 4
-
-Function: "pow_towardzero":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 4
-
-Function: "pow_upward":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 4
-
-Function: "sin":
-ildouble: 1
-ldouble: 1
-
-Function: "sin_downward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "sin_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "sin_upward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "sincos":
-ildouble: 1
-ldouble: 1
-
-Function: "sincos_downward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "sincos_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "sincos_upward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "sinh":
-double: 1
-ildouble: 2
-ldouble: 2
-
-Function: "sinh_downward":
-double: 2
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 5
-
-Function: "sinh_towardzero":
-double: 2
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 4
-
-Function: "sinh_upward":
-double: 4
-float: 2
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 5
-
-Function: "tan":
-float: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "tan_downward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "tan_towardzero":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "tan_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: "tanh":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "tanh_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 7
-ldouble: 4
-
-Function: "tanh_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "tanh_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 4
-
-Function: "tgamma":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "tgamma_downward":
-double: 3
-float: 4
-idouble: 3
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "tgamma_towardzero":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "tgamma_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "y0":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "y0_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: "y0_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: "y0_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "y1":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: "y1_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 7
-ldouble: 7
-
-Function: "y1_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: "y1_upward":
-double: 1
-float: 3
-idouble: 1
-ifloat: 3
-ildouble: 7
-ldouble: 7
-
-Function: "yn":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "yn_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: "yn_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "yn_upward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-# end of automatic generation
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name
deleted file mode 100644
index 193dd704b3..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name
+++ /dev/null
@@ -1 +0,0 @@
-i686
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
deleted file mode 100644
index f37850d0b3..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
+++ /dev/null
@@ -1,553 +0,0 @@
-/* Optimized with sse2 version of cosf
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- *  1) if |x| == 0: return 1.0-|x|.
- *  2) if |x| <  2^-27: return 1.0-|x|.
- *  3) if |x| <  2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
- *  4) if |x| <   Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
- *  5) if |x| < 9*Pi/4:
- *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
- *           t=|x|-j*Pi/4.
- *      5.2) Reconstruction:
- *          s = (-1.0)^((n>>2)&1)
- *          if(n&2 != 0) {
- *              using cos(t) polynomial for |t|<Pi/4, result is
- *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
- *          } else {
- *              using sin(t) polynomial for |t|<Pi/4, result is
- *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
- *          }
- *  6) if |x| < 2^23, large args:
- *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
- *           t=|x|-j*Pi/4.
- *      6.2) Reconstruction same as (5.2).
- *  7) if |x| >= 2^23, very large args:
- *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
- *           t=|x|-j*Pi/4.
- *      7.2) Reconstruction same as (5.2).
- *  8) if x is Inf, return x-x, and set errno=EDOM.
- *  9) if x is NaN, return x-x.
- *
- * Special cases:
- *  cos(+-0) = 1 not raising inexact,
- *  cos(subnormal) raises inexact,
- *  cos(min_normalized) raises inexact,
- *  cos(normalized) raises inexact,
- *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
- *  cos(NaN) = NaN.
- */
-
-#ifdef	PIC
-# define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
-# define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
-# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
-# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
-# define POP(REG)			popl REG; CFI_POP(REG)
-# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
-# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
-# define ARG_X				8(%esp)
-#else
-# define MO1(symbol)			L(symbol)
-# define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
-# define ENTRANCE
-# define RETURN				ret
-# define ARG_X				4(%esp)
-#endif
-
-	.text
-ENTRY(__cosf_sse2)
-	/* Input: single precision x on stack at address ARG_X */
-
-	ENTRANCE
-	movl	ARG_X, %eax		/* Bits of x */
-	cvtss2sd ARG_X, %xmm0		/* DP x */
-	andl	$0x7fffffff, %eax	/* |x| */
-
-	cmpl	$0x3f490fdb, %eax	/* |x|<Pi/4?  */
-	jb	L(arg_less_pio4)
-
-	/* Here if |x|>=Pi/4 */
-	movd	%eax, %xmm3		/* SP |x| */
-	andpd	MO1(DP_ABS_MASK),%xmm0	/* DP |x| */
-	movss	MO1(SP_INVPIO4), %xmm2	/* SP 1/(Pi/4) */
-
-	cmpl	$0x40e231d6, %eax	/* |x|<9*Pi/4?  */
-	jae	L(large_args)
-
-	/* Here if Pi/4<=|x|<9*Pi/4 */
-	mulss	%xmm3, %xmm2		/* SP |x|/(Pi/4) */
-	cvttss2si %xmm2, %eax		/* k, number of Pi/4 in x */
-	addl	$1, %eax		/* k+1 */
-	movl	$0x0e, %edx
-	andl	%eax, %edx		/* j = (k+1)&0x0e */
-	addl	$2, %eax		/* n */
-	subsd	MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
-
-L(reconstruction):
-	/* Input: %eax=n, %xmm0=t */
-	testl	$2, %eax		/* n&2 != 0?  */
-	jz	L(sin_poly)
-
-/*L(cos_poly):*/
-	/* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
-	 * y = t*t; z = y*y;
-	 * s = sign(x) * (-1.0)^((n>>2)&1)
-	 * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
-	 */
-	shrl	$2, %eax		/* n>>2 */
-	mulsd	%xmm0, %xmm0		/* y=t^2 */
-	andl	$1, %eax		/* (n>>2)&1 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=t^4 */
-
-	movsd	MO1(DP_C4), %xmm4	/* C4 */
-	mulsd	%xmm0, %xmm4		/* z*C4 */
-	movsd	MO1(DP_C3), %xmm3	/* C3 */
-	mulsd	%xmm0, %xmm3		/* z*C3 */
-	addsd	MO1(DP_C2), %xmm4	/* C2+z*C4 */
-	mulsd	%xmm0, %xmm4		/* z*(C2+z*C4) */
-	lea	-8(%esp), %esp		/* Borrow 4 bytes of stack frame */
-	addsd	MO1(DP_C1), %xmm3	/* C1+z*C3 */
-	mulsd	%xmm0, %xmm3		/* z*(C1+z*C3) */
-	addsd	MO1(DP_C0), %xmm4	/* C0+z*(C2+z*C4) */
-	mulsd	%xmm1, %xmm4		/* y*(C0+z*(C2+z*C4)) */
-
-	addsd	%xmm4, %xmm3		/* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	/* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	addsd	MO1(DP_ONES), %xmm3
-
-	mulsd	MO2(DP_ONES,%eax,8), %xmm3 /* DP result */
-	movsd	%xmm3, 0(%esp)		/* Move result from sse...  */
-	fldl	0(%esp)			/* ...to FPU.  */
-	/* Return back 4 bytes of stack frame */
-	lea	8(%esp), %esp
-	RETURN
-
-	.p2align	4
-L(sin_poly):
-	/* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
-	 * y = t*t; z = y*y;
-	 * s = sign(x) * (-1.0)^((n>>2)&1)
-	 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
-	 */
-
-	movaps	%xmm0, %xmm4		/* t */
-	shrl	$2, %eax		/* n>>2 */
-	mulsd	%xmm0, %xmm0		/* y=t^2 */
-	andl	$1, %eax		/* (n>>2)&1 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=t^4 */
-
-	movsd	MO1(DP_S4), %xmm2	/* S4 */
-	mulsd	%xmm0, %xmm2		/* z*S4 */
-	movsd	MO1(DP_S3), %xmm3	/* S3 */
-	mulsd	%xmm0, %xmm3		/* z*S3 */
-	lea	-8(%esp), %esp		/* Borrow 4 bytes of stack frame */
-	addsd	MO1(DP_S2), %xmm2	/* S2+z*S4 */
-	mulsd	%xmm0, %xmm2		/* z*(S2+z*S4) */
-	addsd	MO1(DP_S1), %xmm3	/* S1+z*S3 */
-	mulsd	%xmm0, %xmm3		/* z*(S1+z*S3) */
-	addsd	MO1(DP_S0), %xmm2	/* S0+z*(S2+z*S4) */
-	mulsd	%xmm1, %xmm2		/* y*(S0+z*(S2+z*S4)) */
-	/* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
-	mulsd	MO2(DP_ONES,%eax,8), %xmm4
-	addsd	%xmm2, %xmm3		/* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	/* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	mulsd	%xmm4, %xmm3
-	/* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm4, %xmm3
-	movsd	%xmm3, 0(%esp)		/* Move result from sse...   */
-	fldl	0(%esp)			/* ...to FPU.  */
-	/* Return back 4 bytes of stack frame */
-	lea	8(%esp), %esp
-	RETURN
-
-	.p2align	4
-L(large_args):
-	/* Here if |x|>=9*Pi/4 */
-	cmpl	$0x7f800000, %eax	/* x is Inf or NaN?  */
-	jae	L(arg_inf_or_nan)
-
-	/* Here if finite |x|>=9*Pi/4 */
-	cmpl	$0x4b000000, %eax	/* |x|<2^23?  */
-	jae	L(very_large_args)
-
-	/* Here if 9*Pi/4<=|x|<2^23 */
-	movsd	MO1(DP_INVPIO4), %xmm1	/* 1/(Pi/4) */
-	mulsd	%xmm0, %xmm1		/* |x|/(Pi/4) */
-	cvttsd2si %xmm1, %eax		/* k=trunc(|x|/(Pi/4)) */
-	addl	$1, %eax		/* k+1 */
-	movl	%eax, %edx
-	andl	$0xfffffffe, %edx	/* j=(k+1)&0xfffffffe */
-	cvtsi2sdl %edx, %xmm4		/* DP j */
-	movsd	MO1(DP_PIO4HI), %xmm2	/* -PIO4HI = high part of -Pi/4 */
-	mulsd	%xmm4, %xmm2		/* -j*PIO4HI */
-	movsd	MO1(DP_PIO4LO), %xmm3	/* -PIO4LO = low part of -Pi/4 */
-	addsd	%xmm2, %xmm0		/* |x| - j*PIO4HI */
-	addl	$2, %eax		/* n */
-	mulsd	%xmm3, %xmm4		/* j*PIO4LO */
-	addsd	%xmm4, %xmm0		/* t = |x| - j*PIO4HI - j*PIO4LO */
-	jmp	L(reconstruction)
-
-	.p2align	4
-L(very_large_args):
-	/* Here if finite |x|>=2^23 */
-
-	/* bitpos = (ix>>23) - BIAS_32 + 59; */
-	shrl	$23, %eax		/* eb = biased exponent of x */
-	/* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
-	subl	$68, %eax
-	movl	$28, %ecx		/* %cl=28 */
-	movl	%eax, %edx		/* bitpos copy */
-
-	/* j = bitpos/28; */
-	div	%cl			/* j in register %al=%ax/%cl */
-	movapd	%xmm0, %xmm3		/* |x| */
-	/* clear unneeded remainder from %ah */
-	andl	$0xff, %eax
-
-	imull	$28, %eax, %ecx		/* j*28 */
-	movsd	MO1(DP_HI_MASK), %xmm4	/* DP_HI_MASK */
-	movapd	%xmm0, %xmm5		/* |x| */
-	mulsd	-2*8+MO2(_FPI,%eax,8), %xmm3	/* tmp3 = FPI[j-2]*|x| */
-	movapd	%xmm0, %xmm1		/* |x| */
-	mulsd	-1*8+MO2(_FPI,%eax,8), %xmm5	/* tmp2 = FPI[j-1]*|x| */
-	mulsd	0*8+MO2(_FPI,%eax,8), %xmm0	/* tmp0 = FPI[j]*|x| */
-	addl	$19, %ecx		/* j*28+19 */
-	mulsd	1*8+MO2(_FPI,%eax,8), %xmm1	/* tmp1 = FPI[j+1]*|x| */
-	cmpl	%ecx, %edx		/* bitpos>=j*28+19?  */
-	jl	L(very_large_skip1)
-
-	/* Here if bitpos>=j*28+19 */
-	andpd	%xmm3, %xmm4		/* HI(tmp3) */
-	subsd	%xmm4, %xmm3		/* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
-	movsd	MO1(DP_2POW52), %xmm6
-	movapd	%xmm5, %xmm2		/* tmp2 copy */
-	addsd	%xmm3, %xmm5		/* tmp5 = tmp3 + tmp2 */
-	movl	$1, %edx
-	addsd	%xmm5, %xmm6		/* tmp6 = tmp5 + 2^52 */
-	movsd	8+MO1(DP_2POW52), %xmm4
-	movd	%xmm6, %eax		/* k = I64_LO(tmp6); */
-	addsd	%xmm6, %xmm4		/* tmp4 = tmp6 - 2^52 */
-	comisd	%xmm5, %xmm4		/* tmp4 > tmp5?  */
-	jbe	L(very_large_skip2)
-
-	/* Here if tmp4 > tmp5 */
-	subl	$1, %eax		/* k-- */
-	addsd	8+MO1(DP_ONES), %xmm4	/* tmp4 -= 1.0 */
-L(very_large_skip2):
-
-	andl	%eax, %edx		/* k&1 */
-	subsd	%xmm4, %xmm3		/* tmp3 -= tmp4 */
-	addsd	MO2(DP_ZERONE,%edx,8), %xmm3 /* t  = DP_ZERONE[k&1] + tmp3 */
-	addsd	%xmm2, %xmm3		/* t += tmp2 */
-	addsd	%xmm3, %xmm0		/* t += tmp0 */
-	addl	$3, %eax		/* n=k+3 */
-	addsd	%xmm1, %xmm0		/* t += tmp1 */
-	mulsd	MO1(DP_PIO4), %xmm0	/* t *= PI04 */
-
-	jmp	L(reconstruction)	/* end of very_large_args peth */
-
-	.p2align	4
-L(arg_less_pio4):
-	/* Here if |x|<Pi/4 */
-	cmpl	$0x3d000000, %eax	/* |x|<2^-5?  */
-	jl	L(arg_less_2pn5)
-
-	/* Here if 2^-5<=|x|<Pi/4 */
-	mulsd	%xmm0, %xmm0		/* y=x^2 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=x^4 */
-	movsd	MO1(DP_C4), %xmm3	/* C4 */
-	mulsd	%xmm0, %xmm3		/* z*C4 */
-	movsd	MO1(DP_C3), %xmm5	/* C3 */
-	mulsd	%xmm0, %xmm5		/* z*C3 */
-	addsd	MO1(DP_C2), %xmm3	/* C2+z*C4 */
-	mulsd	%xmm0, %xmm3		/* z*(C2+z*C4) */
-	addsd	MO1(DP_C1), %xmm5	/* C1+z*C3 */
-	mulsd	%xmm0, %xmm5		/* z*(C1+z*C3) */
-	addsd	MO1(DP_C0), %xmm3	/* C0+z*(C2+z*C4) */
-	mulsd	%xmm1, %xmm3		/* y*(C0+z*(C2+z*C4)) */
-	addsd	%xmm5, %xmm3		/* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	/* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	addsd	MO1(DP_ONES), %xmm3
-	cvtsd2ss %xmm3, %xmm3		/* SP result */
-
-L(epilogue):
-	lea	-4(%esp), %esp		/* Borrow 4 bytes of stack frame */
-	movss	%xmm3, 0(%esp)		/* Move result from sse...  */
-	flds	0(%esp)			/* ...to FPU.  */
-	/* Return back 4 bytes of stack frame */
-	lea	4(%esp), %esp
-	RETURN
-
-	.p2align	4
-L(arg_less_2pn5):
-	/* Here if |x|<2^-5 */
-	cmpl	$0x32000000, %eax	/* |x|<2^-27?  */
-	jl	L(arg_less_2pn27)
-
-	/* Here if 2^-27<=|x|<2^-5 */
-	mulsd	%xmm0, %xmm0		/* DP x^2 */
-	movsd	MO1(DP_COS2_1), %xmm3	/* DP DP_COS2_1 */
-	mulsd	%xmm0, %xmm3		/* DP x^2*DP_COS2_1 */
-	addsd	MO1(DP_COS2_0), %xmm3	/* DP DP_COS2_0+x^2*DP_COS2_1 */
-	mulsd	%xmm0, %xmm3		/* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
-	/* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
-	addsd	MO1(DP_ONES), %xmm3
-	cvtsd2ss %xmm3, %xmm3		/* SP result */
-	jmp	L(epilogue)
-
-	.p2align	4
-L(arg_less_2pn27):
-	/* Here if |x|<2^-27 */
-	movss	ARG_X, %xmm0		/* x */
-	andps	MO1(SP_ABS_MASK),%xmm0	/* |x| */
-	movss	MO1(SP_ONE), %xmm3	/* 1.0 */
-	subss	%xmm0, %xmm3		/* result is 1.0-|x| */
-	jmp	L(epilogue)
-
-	.p2align	4
-L(arg_inf_or_nan):
-	/* Here if |x| is Inf or NAN */
-	jne	L(skip_errno_setting)	/* in case of x is NaN */
-
-	/* Here if x is Inf. Set errno to EDOM.  */
-	call	JUMPTARGET(__errno_location)
-	movl	$EDOM, (%eax)
-
-	.p2align	4
-L(skip_errno_setting):
-	/* Here if |x| is Inf or NAN. Continued.  */
-	movss	ARG_X, %xmm3		/* load x */
-	subss	%xmm3, %xmm3		/* Result is NaN */
-	jmp	L(epilogue)
-END(__cosf_sse2)
-
-	.section .rodata, "a"
-	.p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
-	.long	0x00000000,0x00000000
-	.long	0x54442d18,0x3fe921fb
-	.long	0x54442d18,0x3ff921fb
-	.long	0x7f3321d2,0x4002d97c
-	.long	0x54442d18,0x400921fb
-	.long	0x2955385e,0x400f6a7a
-	.long	0x7f3321d2,0x4012d97c
-	.long	0xe9bba775,0x4015fdbb
-	.long	0x54442d18,0x401921fb
-	.long	0xbeccb2bb,0x401c463a
-	.long	0x2955385e,0x401f6a7a
-	.type L(PIO4J), @object
-	ASM_SIZE_DIRECTIVE(L(PIO4J))
-
-	.p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
-	.long	0x00000000,0x00000000
-	.long	0x6c000000,0x3ff45f30
-	.long	0x2a000000,0x3e3c9c88
-	.long	0xa8000000,0x3c54fe13
-	.long	0xd0000000,0x3aaf47d4
-	.long	0x6c000000,0x38fbb81b
-	.long	0xe0000000,0x3714acc9
-	.long	0x7c000000,0x3560e410
-	.long	0x56000000,0x33bca2c7
-	.long	0xac000000,0x31fbd778
-	.long	0xe0000000,0x300b7246
-	.long	0xe8000000,0x2e5d2126
-	.long	0x48000000,0x2c970032
-	.long	0xe8000000,0x2ad77504
-	.long	0xe0000000,0x290921cf
-	.long	0xb0000000,0x274deb1c
-	.long	0xe0000000,0x25829a73
-	.long	0xbe000000,0x23fd1046
-	.long	0x10000000,0x2224baed
-	.long	0x8e000000,0x20709d33
-	.long	0x80000000,0x1e535a2f
-	.long	0x64000000,0x1cef904e
-	.long	0x30000000,0x1b0d6398
-	.long	0x24000000,0x1964ce7d
-	.long	0x16000000,0x17b908bf
-	.type L(_FPI), @object
-	ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomial
- for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5.  */
-	.p2align 3
-L(DP_COS2_0):
-	.long	0xff5cc6fd,0xbfdfffff
-	.type L(DP_COS2_0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
-
-	.p2align 3
-L(DP_COS2_1):
-	.long	0xb178dac5,0x3fa55514
-	.type L(DP_COS2_1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
-
-	.p2align 3
-L(DP_ZERONE):
-	.long	0x00000000,0x00000000	/* 0.0 */
-	.long	0x00000000,0xbff00000	/* 1.0 */
-	.type L(DP_ZERONE),@object
-	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
-	.p2align 3
-L(DP_ONES):
-	.long	0x00000000,0x3ff00000	/* +1.0 */
-	.long	0x00000000,0xbff00000	/* -1.0 */
-	.type L(DP_ONES), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomial
- for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
-	.p2align 3
-L(DP_S3):
-	.long	0x64e6b5b4,0x3ec71d72
-	.type L(DP_S3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S3))
-
-	.p2align 3
-L(DP_S1):
-	.long	0x10c2688b,0x3f811111
-	.type L(DP_S1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S1))
-
-	.p2align 3
-L(DP_S4):
-	.long	0x1674b58a,0xbe5a947e
-	.type L(DP_S4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S4))
-
-	.p2align 3
-L(DP_S2):
-	.long	0x8b4bd1f9,0xbf2a019f
-	.type L(DP_S2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S2))
-
-	.p2align 3
-L(DP_S0):
-	.long	0x55551cd9,0xbfc55555
-	.type L(DP_S0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S0))
-
-/* Coefficients of polynomial
- for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
-	.p2align 3
-L(DP_C3):
-	.long	0x9ac43cc0,0x3efa00eb
-	.type L(DP_C3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C3))
-
-	.p2align 3
-L(DP_C1):
-	.long	0x545c50c7,0x3fa55555
-	.type L(DP_C1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C1))
-
-	.p2align 3
-L(DP_C4):
-	.long	0xdd8844d7,0xbe923c97
-	.type L(DP_C4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C4))
-
-	.p2align 3
-L(DP_C2):
-	.long	0x348b6874,0xbf56c16b
-	.type L(DP_C2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C2))
-
-	.p2align 3
-L(DP_C0):
-	.long	0xfffe98ae,0xbfdfffff
-	.type L(DP_C0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C0))
-
-	.p2align 3
-L(DP_PIO4):
-	.long	0x54442d18,0x3fe921fb	/* Pi/4 */
-	.type L(DP_PIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
-	.p2align 3
-L(DP_2POW52):
-	.long	0x00000000,0x43300000	/* +2^52 */
-	.long	0x00000000,0xc3300000	/* -2^52 */
-	.type L(DP_2POW52), @object
-	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
-	.p2align 3
-L(DP_INVPIO4):
-	.long	0x6dc9c883,0x3ff45f30	/* 4/Pi */
-	.type L(DP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
-	.p2align 3
-L(DP_PIO4HI):
-	.long	0x54000000,0xbfe921fb	/* High part of Pi/4 */
-	.type L(DP_PIO4HI), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
-	.p2align 3
-L(DP_PIO4LO):
-	.long	0x11A62633,0xbe010b46	/* Low part of Pi/4 */
-	.type L(DP_PIO4LO), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
-	.p2align 2
-L(SP_INVPIO4):
-	.long	0x3fa2f983		/* 4/Pi */
-	.type L(SP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
-	.p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
-	.long	0xffffffff,0x7fffffff
-	.long	0xffffffff,0x7fffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
-	.p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
-	.long	0x00000000,0xffffffff
-	.type L(DP_HI_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
-	.p2align 4
-L(SP_ABS_MASK): /* Mask for getting SP absolute value */
-	.long	0x7fffffff,0x7fffffff
-	.long	0x7fffffff,0x7fffffff
-	.type L(SP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
-
-	.p2align 2
-L(SP_ONE):
-	.long	0x3f800000		/* 1.0 */
-	.type L(SP_ONE), @object
-	ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-weak_alias (__cosf, cosf)
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c b/sysdeps/i386/i686/fpu/multiarch/s_cosf.c
deleted file mode 100644
index af588de9dc..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Multiple versions of cosf
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <init-arch.h>
-
-extern float __cosf_sse2 (float);
-extern float __cosf_ia32 (float);
-float __cosf (float);
-
-libm_ifunc (__cosf, HAS_CPU_FEATURE (SSE2) ? __cosf_sse2 : __cosf_ia32);
-weak_alias (__cosf, cosf);
-
-#define COSF __cosf_ia32
-#include <sysdeps/ieee754/flt-32/s_cosf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S
deleted file mode 100644
index f31a925522..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S
+++ /dev/null
@@ -1,586 +0,0 @@
-/* Optimized with sse2 version of sincosf
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- *  1) if |x|==0:    sin(x)=x,
- *                   cos(x)=1.
- *  2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed,
- *                   cos(x)=1-|x|.
- *  3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1,
- *                   cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1
- *  4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))),
- *                   cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
- *  5) if |x| < 9*Pi/4:
- *      5.1) Range reduction:
- *          k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4.
- *      5.2) Reconstruction:
- *          sign_sin = sign(x) * (-1.0)^(( n   >>2)&1)
- *          sign_cos =           (-1.0)^(((n+2)>>2)&1)
- *          poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t
- *          poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s
- *          if(n&2 != 0) {
- *              using cos(t) and sin(t) polynomials for |t|<Pi/4, results are
- *              cos(x) = poly_sin * sign_cos
- *              sin(x) = poly_cos * sign_sin
- *          } else {
- *              sin(x) = poly_sin * sign_sin
- *              cos(x) = poly_cos * sign_cos
- *          }
- *  6) if |x| < 2^23, large args:
- *      6.1) Range reduction:
- *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4
- *      6.2) Reconstruction same as (5.2).
- *  7) if |x| >= 2^23, very large args:
- *      7.1) Range reduction:
- *          k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4.
- *      7.2) Reconstruction same as (5.2).
- *  8) if x is Inf, return x-x, and set errno=EDOM.
- *  9) if x is NaN, return x-x.
- *
- * Special cases:
- *  sin/cos(+-0) = +-0/1 not raising inexact/underflow,
- *  sin/cos(subnormal) raises inexact/underflow,
- *  sin/cos(min_normalized) raises inexact/underflow,
- *  sin/cos(normalized) raises inexact,
- *  sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM,
- *  sin/cos(NaN) = NaN.
- */
-
-#ifdef	PIC
-# define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
-# define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
-# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
-# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
-# define POP(REG)			popl REG; CFI_POP(REG)
-# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
-# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
-# define ARG_X				8(%esp)
-# define ARG_SIN_PTR			12(%esp)
-# define ARG_COS_PTR			16(%esp)
-#else
-# define MO1(symbol)			L(symbol)
-# define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
-# define ENTRANCE
-# define RETURN				ret
-# define ARG_X				4(%esp)
-# define ARG_SIN_PTR			8(%esp)
-# define ARG_COS_PTR			12(%esp)
-#endif
-
-	.text
-ENTRY(__sincosf_sse2)
-	/* Input: single precision x on stack at address ARG_X */
-	/*        pointer to sin result on stack at address ARG_SIN_PTR */
-	/*        pointer to cos result on stack at address ARG_COS_PTR */
-
-	ENTRANCE
-	movl	ARG_X, %eax		/* Bits of x */
-	cvtss2sd ARG_X, %xmm0		/* DP x */
-	andl	$0x7fffffff, %eax	/* |x| */
-
-	cmpl	$0x3f490fdb, %eax	/* |x|<Pi/4 ? */
-	jb	L(arg_less_pio4)
-
-	/* Here if |x|>=Pi/4 */
-	movd	%eax, %xmm3		/* SP |x| */
-	andpd	MO1(DP_ABS_MASK),%xmm0	/* DP |x| */
-	movss	MO1(SP_INVPIO4), %xmm2	/* SP 1/(Pi/4) */
-
-	cmpl	$0x40e231d6, %eax	/* |x|<9*Pi/4 ? */
-	jae	L(large_args)
-
-	/* Here if Pi/4<=|x|<9*Pi/4 */
-	mulss	%xmm3, %xmm2		/* SP |x|/(Pi/4) */
-	movl	ARG_X, %ecx		/* Load x */
-	cvttss2si %xmm2, %eax		/* k, number of Pi/4 in x */
-	shrl	$29, %ecx		/* (sign of x) << 2 */
-	addl	$1, %eax		/* k+1 */
-	movl	$0x0e, %edx
-	andl	%eax, %edx		/* j = (k+1)&0x0e */
-	subsd	MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */
-
-L(reconstruction):
-	/* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
-
-	movaps	%xmm0, %xmm4		/* t */
-	movhpd	MO1(DP_ONES), %xmm4	/* 1|t */
-	mulsd	%xmm0, %xmm0		/* y=t^2 */
-	movl	$2, %edx
-	unpcklpd %xmm0, %xmm0		/* y|y */
-	addl	%eax, %edx		/* k+2 */
-	movaps	%xmm0, %xmm1		/* y|y */
-	mulpd	%xmm0, %xmm0		/* z=t^4|z=t^4 */
-
-	movaps	MO1(DP_SC4), %xmm2	/* S4 */
-	mulpd	%xmm0, %xmm2		/* z*S4 */
-	movaps	MO1(DP_SC3), %xmm3	/* S3 */
-	mulpd	%xmm0, %xmm3		/* z*S3 */
-	xorl	%eax, %ecx		/* (sign_x ^ (k>>2))<<2 */
-	addpd	MO1(DP_SC2), %xmm2	/* S2+z*S4 */
-	mulpd	%xmm0, %xmm2		/* z*(S2+z*S4) */
-	shrl	$2, %edx		/* (k+2)>>2 */
-	addpd	MO1(DP_SC1), %xmm3	/* S1+z*S3 */
-	mulpd	%xmm0, %xmm3		/* z*(S1+z*S3) */
-	shrl	$2, %ecx		/* sign_x ^ k>>2 */
-	addpd	MO1(DP_SC0), %xmm2	/* S0+z*(S2+z*S4) */
-	andl	$1, %edx		/* sign_cos = ((k+2)>>2)&1 */
-	mulpd	%xmm1, %xmm2		/* y*(S0+z*(S2+z*S4)) */
-	andl	$1, %ecx		/* sign_sin = sign_x ^ ((k>>2)&1) */
-	addpd	%xmm2, %xmm3		/* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	mulpd	%xmm4, %xmm3		/*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
-	testl	$2, %eax		/* n&2 != 0 ? */
-	addpd	%xmm4, %xmm3		/*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
-	jnz	L(sin_result_sin_poly)
-
-/*L(sin_result_cos_poly):*/
-	/*
-	 * Here if
-	 * cos(x) = poly_sin * sign_cos
-	 * sin(x) = poly_cos * sign_sin
-	 */
-	movsd	MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */
-	movhpd	MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */
-	mulpd	%xmm4, %xmm3		/* result_cos|result_sin */
-	movl	ARG_SIN_PTR, %eax
-	cvtpd2ps %xmm3, %xmm0		/* SP results */
-	movl	ARG_COS_PTR, %ecx
-	movss	%xmm0, (%eax)		/* store sin(x) from xmm0[0] */
-	shufps	$1, %xmm0, %xmm0	/* move cos(x) to xmm0[0] */
-	movss	%xmm0, (%ecx)		/* store cos(x) */
-	RETURN
-
-	.p2align	4
-L(sin_result_sin_poly):
-	/*
-	 * Here if
-	 * sin(x) = poly_sin * sign_sin
-	 * cos(x) = poly_cos * sign_cos
-	 */
-	movsd	MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */
-	movhpd	MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */
-	mulpd	%xmm4, %xmm3		/* result_sin|result_cos */
-	movl	ARG_SIN_PTR, %eax
-	cvtpd2ps %xmm3, %xmm0		/* SP results */
-	movl	ARG_COS_PTR, %ecx
-	movss	%xmm0, (%ecx)		/* store cos(x) from xmm0[0] */
-	shufps	$1, %xmm0, %xmm0	/* move sin(x) to xmm0[0] */
-	movss	%xmm0, (%eax)		/* store sin(x) */
-	RETURN
-
-	.p2align	4
-L(large_args):
-	/* Here if |x|>=9*Pi/4 */
-	cmpl	$0x7f800000, %eax	/* x is Inf or NaN ? */
-	jae	L(arg_inf_or_nan)
-
-	/* Here if finite |x|>=9*Pi/4 */
-	cmpl	$0x4b000000, %eax	/* |x|<2^23 ? */
-	jae	L(very_large_args)
-
-	/* Here if 9*Pi/4<=|x|<2^23 */
-	movsd	MO1(DP_INVPIO4), %xmm1	/* 1/(Pi/4) */
-	mulsd	%xmm0, %xmm1		/* |x|/(Pi/4) */
-	cvttsd2si %xmm1, %eax		/* k=trunc(|x|/(Pi/4)) */
-	addl	$1, %eax		/* k+1 */
-	movl	%eax, %edx
-	andl	$0xfffffffe, %edx	/* j=(k+1)&0xfffffffe */
-	cvtsi2sdl %edx, %xmm4		/* DP j */
-	movl	ARG_X, %ecx		/* Load x */
-	movsd	MO1(DP_PIO4HI), %xmm2	/* -PIO4HI = high part of -Pi/4 */
-	shrl	$29, %ecx		/* (sign of x) << 2 */
-	mulsd	%xmm4, %xmm2		/* -j*PIO4HI */
-	movsd	MO1(DP_PIO4LO), %xmm3	/* -PIO4LO = low part of -Pi/4 */
-	addsd	%xmm2, %xmm0		/* |x| - j*PIO4HI */
-	mulsd	%xmm3, %xmm4		/* j*PIO4LO */
-	addsd	%xmm4, %xmm0		/* t = |x| - j*PIO4HI - j*PIO4LO */
-	jmp	L(reconstruction)
-
-	.p2align	4
-L(very_large_args):
-	/* Here if finite |x|>=2^23 */
-
-	/* bitpos = (ix>>23) - BIAS_32 + 59; */
-	shrl	$23, %eax		/* eb = biased exponent of x */
-	subl	$68, %eax		/* bitpos=eb-0x7f+59, where 0x7f */
-							/*is exponent bias */
-	movl	$28, %ecx		/* %cl=28 */
-	movl	%eax, %edx		/* bitpos copy */
-
-	/* j = bitpos/28; */
-	div	%cl			/* j in register %al=%ax/%cl */
-	movapd	%xmm0, %xmm3		/* |x| */
-	andl	$0xff, %eax		/* clear unneeded remainder from %ah*/
-
-	imull	$28, %eax, %ecx		/* j*28 */
-	movsd	MO1(DP_HI_MASK), %xmm4	/* DP_HI_MASK */
-	movapd	%xmm0, %xmm5		/* |x| */
-	mulsd	-2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */
-	movapd	%xmm0, %xmm1		/* |x| */
-	mulsd	-1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */
-	mulsd	0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */
-	addl	$19, %ecx		/* j*28+19 */
-	mulsd	1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */
-	cmpl	%ecx, %edx		/* bitpos>=j*28+19 ? */
-	jl	L(very_large_skip1)
-
-	/* Here if bitpos>=j*28+19 */
-	andpd	%xmm3, %xmm4		/* HI(tmp3) */
-	subsd	%xmm4, %xmm3		/* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
-	movsd	MO1(DP_2POW52), %xmm6
-	movapd	%xmm5, %xmm2		/* tmp2 copy */
-	addsd	%xmm3, %xmm5		/* tmp5 = tmp3 + tmp2 */
-	movl	$1, %edx
-	addsd	%xmm5, %xmm6		/* tmp6 = tmp5 + 2^52 */
-	movsd	8+MO1(DP_2POW52), %xmm4
-	movd	%xmm6, %eax		/* k = I64_LO(tmp6); */
-	addsd	%xmm6, %xmm4		/* tmp4 = tmp6 - 2^52 */
-	movl	ARG_X, %ecx		/* Load x */
-	comisd	%xmm5, %xmm4		/* tmp4 > tmp5 ? */
-	jbe	L(very_large_skip2)
-
-	/* Here if tmp4 > tmp5 */
-	subl	$1, %eax		/* k-- */
-	addsd	8+MO1(DP_ONES), %xmm4	/* tmp4 -= 1.0 */
-L(very_large_skip2):
-
-	andl	%eax, %edx		/* k&1 */
-	subsd	%xmm4, %xmm3		/* tmp3 -= tmp4 */
-	addsd	MO2(DP_ZERONE,%edx,8), %xmm3/* t  = DP_ZERONE[k&1] + tmp3 */
-	addsd	%xmm2, %xmm3		/* t += tmp2 */
-	shrl	$29, %ecx		/* (sign of x) << 2 */
-	addsd	%xmm3, %xmm0		/* t += tmp0 */
-	addl	$1, %eax		/* n=k+1 */
-	addsd	%xmm1, %xmm0		/* t += tmp1 */
-	mulsd	MO1(DP_PIO4), %xmm0	/* t *= PI04 */
-
-	jmp	L(reconstruction)	/* end of very_large_args peth */
-
-	.p2align	4
-L(arg_less_pio4):
-	/* Here if |x|<Pi/4 */
-	cmpl	$0x3d000000, %eax	/* |x|<2^-5 ? */
-	jl	L(arg_less_2pn5)
-
-	/* Here if 2^-5<=|x|<Pi/4 */
-	movaps	%xmm0, %xmm3		/* DP x */
-	movhpd	MO1(DP_ONES), %xmm3	/* DP 1|x */
-	mulsd	%xmm0, %xmm0		/* DP y=x^2 */
-	unpcklpd %xmm0, %xmm0		/* DP y|y */
-	movaps	%xmm0, %xmm1		/* y|y */
-	mulpd	%xmm0, %xmm0		/* z=x^4|z=x^4 */
-
-	movapd	MO1(DP_SC4), %xmm4	/* S4 */
-	mulpd	%xmm0, %xmm4		/* z*S4 */
-	movapd	MO1(DP_SC3), %xmm5	/* S3 */
-	mulpd	%xmm0, %xmm5		/* z*S3 */
-	addpd	MO1(DP_SC2), %xmm4	/* S2+z*S4 */
-	mulpd	%xmm0, %xmm4		/* z*(S2+z*S4) */
-	addpd	MO1(DP_SC1), %xmm5	/* S1+z*S3 */
-	mulpd	%xmm0, %xmm5		/* z*(S1+z*S3) */
-	addpd	MO1(DP_SC0), %xmm4	/* S0+z*(S2+z*S4) */
-	mulpd	%xmm1, %xmm4		/* y*(S0+z*(S2+z*S4)) */
-	mulpd	%xmm3, %xmm5		/* x*z*(S1+z*S3) */
-	mulpd	%xmm3, %xmm4		/* x*y*(S0+z*(S2+z*S4)) */
-	addpd	%xmm5, %xmm4		/*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
-	movl	ARG_SIN_PTR, %eax
-	addpd	%xmm4, %xmm3		/*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
-	movl	ARG_COS_PTR, %ecx
-	cvtpd2ps %xmm3, %xmm0		/* SP results */
-	movss	%xmm0, (%eax)		/* store sin(x) from xmm0[0] */
-	shufps	$1, %xmm0, %xmm0	/* move cos(x) to xmm0[0] */
-	movss	%xmm0, (%ecx)		/* store cos(x) */
-	RETURN
-
-	.p2align	4
-L(arg_less_2pn5):
-	/* Here if |x|<2^-5 */
-	cmpl	$0x32000000, %eax	/* |x|<2^-27 ? */
-	jl	L(arg_less_2pn27)
-
-	/* Here if 2^-27<=|x|<2^-5 */
-	movaps	%xmm0, %xmm1		/* DP x */
-	movhpd	MO1(DP_ONES), %xmm1	/* DP 1|x */
-	mulsd	%xmm0, %xmm0		/* DP x^2 */
-	unpcklpd %xmm0, %xmm0		/* DP x^2|x^2 */
-
-	movaps	MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */
-	mulpd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_1 */
-	addpd	MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */
-	mulpd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
-	mulpd	%xmm1, %xmm3		/* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
-	addpd	%xmm1, %xmm3		/* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
-	movl	ARG_SIN_PTR, %eax
-	cvtpd2ps %xmm3, %xmm0		/* SP results */
-	movl	ARG_COS_PTR, %ecx
-	movss	%xmm0, (%eax)		/* store sin(x) from xmm0[0] */
-	shufps	$1, %xmm0, %xmm0	/* move cos(x) to xmm0[0] */
-	movss	%xmm0, (%ecx)		/* store cos(x) */
-	RETURN
-
-	.p2align	4
-L(arg_less_2pn27):
-	movss	ARG_X, %xmm7		/* SP x */
-	cmpl	$0, %eax		/* x=0 ? */
-	je	L(arg_zero)		/* in case x=0 return sin(+-0)==+-0 */
-	/* Here if |x|<2^-27 */
-	/*
-	 * Special cases here:
-	 *  sin(subnormal) raises inexact/underflow
-	 *  sin(min_normalized) raises inexact/underflow
-	 *  sin(normalized) raises inexact
-	 *  cos(here)=1-|x| (raising inexact)
-	 */
-	movaps	%xmm0, %xmm3		/* DP x */
-	mulsd	MO1(DP_SMALL), %xmm0	/* DP x*DP_SMALL */
-	subsd	%xmm0, %xmm3		/* DP sin result is x-x*DP_SMALL */
-	andps	MO1(SP_ABS_MASK), %xmm7	/* SP |x| */
-	cvtsd2ss %xmm3, %xmm0		/* sin(x) */
-	movl	ARG_SIN_PTR, %eax
-	movss	MO1(SP_ONE), %xmm1	/* SP 1.0 */
-	movss	%xmm0, (%eax)		/* sin(x) store */
-	movl	ARG_COS_PTR, %ecx
-	subss	%xmm7, %xmm1		/* cos(x) */
-	movss	%xmm1, (%ecx)		/* cos(x) store */
-	RETURN
-
-	.p2align	4
-L(arg_zero):
-	movss	MO1(SP_ONE), %xmm0	/* 1.0 */
-	movl	ARG_SIN_PTR, %eax
-	movl	ARG_COS_PTR, %ecx
-	movss	%xmm7, (%eax)		/* sin(+-0)==x */
-	movss	%xmm0, (%ecx)		/* cos(+-0)==1 */
-	RETURN
-
-	.p2align	4
-L(arg_inf_or_nan):
-	movss	ARG_X, %xmm7		/* SP x */
-	/* Here if |x| is Inf or NAN */
-	jne	L(skip_errno_setting)	/* in case of x is NaN */
-
-	/* Here if x is Inf. Set errno to EDOM.  */
-	call	JUMPTARGET(__errno_location)
-	movl	$EDOM, (%eax)
-
-	.p2align	4
-L(skip_errno_setting):
-	/* Here if |x| is Inf or NAN. Continued. */
-	subss	%xmm7, %xmm7		/* x-x, result is NaN */
-	movl	ARG_SIN_PTR, %eax
-	movl	ARG_COS_PTR, %ecx
-	movss	%xmm7, (%eax)
-	movss	%xmm7, (%ecx)
-	RETURN
-END(__sincosf_sse2)
-
-	.section .rodata, "a"
-	.p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
-	.long	0x00000000,0x00000000
-	.long	0x54442d18,0x3fe921fb
-	.long	0x54442d18,0x3ff921fb
-	.long	0x7f3321d2,0x4002d97c
-	.long	0x54442d18,0x400921fb
-	.long	0x2955385e,0x400f6a7a
-	.long	0x7f3321d2,0x4012d97c
-	.long	0xe9bba775,0x4015fdbb
-	.long	0x54442d18,0x401921fb
-	.long	0xbeccb2bb,0x401c463a
-	.long	0x2955385e,0x401f6a7a
-	.type L(PIO4J), @object
-	ASM_SIZE_DIRECTIVE(L(PIO4J))
-
-	.p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
-	.long	0x00000000,0x00000000
-	.long	0x6c000000,0x3ff45f30
-	.long	0x2a000000,0x3e3c9c88
-	.long	0xa8000000,0x3c54fe13
-	.long	0xd0000000,0x3aaf47d4
-	.long	0x6c000000,0x38fbb81b
-	.long	0xe0000000,0x3714acc9
-	.long	0x7c000000,0x3560e410
-	.long	0x56000000,0x33bca2c7
-	.long	0xac000000,0x31fbd778
-	.long	0xe0000000,0x300b7246
-	.long	0xe8000000,0x2e5d2126
-	.long	0x48000000,0x2c970032
-	.long	0xe8000000,0x2ad77504
-	.long	0xe0000000,0x290921cf
-	.long	0xb0000000,0x274deb1c
-	.long	0xe0000000,0x25829a73
-	.long	0xbe000000,0x23fd1046
-	.long	0x10000000,0x2224baed
-	.long	0x8e000000,0x20709d33
-	.long	0x80000000,0x1e535a2f
-	.long	0x64000000,0x1cef904e
-	.long	0x30000000,0x1b0d6398
-	.long	0x24000000,0x1964ce7d
-	.long	0x16000000,0x17b908bf
-	.type L(_FPI), @object
-	ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomials for */
-/* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low  DP part, */
-/* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */
-/* for |x|<2^-5. */
-	.p2align 4
-L(DP_SINCOS2_0):
-	.long	0x5543d49d,0xbfc55555
-	.long	0xff5cc6fd,0xbfdfffff
-	.type L(DP_SINCOS2_0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0))
-
-	.p2align 4
-L(DP_SINCOS2_1):
-	.long	0x75cec8c5,0x3f8110f4
-	.long	0xb178dac5,0x3fa55514
-	.type L(DP_SINCOS2_1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1))
-
-	.p2align 3
-L(DP_ZERONE):
-	.long	0x00000000,0x00000000	/* 0.0 */
-	.long	0x00000000,0xbff00000	/* 1.0 */
-	.type L(DP_ZERONE), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
-	.p2align 3
-L(DP_ONES):
-	.long	0x00000000,0x3ff00000	/* +1.0 */
-	.long	0x00000000,0xbff00000	/* -1.0 */
-	.type L(DP_ONES), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomials for */
-/* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low  DP part, */
-/* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */
-/* for |t|<Pi/4. */
-	.p2align 4
-L(DP_SC4):
-	.long	0x1674b58a,0xbe5a947e
-	.long	0xdd8844d7,0xbe923c97
-	.type L(DP_SC4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SC4))
-
-	.p2align 4
-L(DP_SC3):
-	.long	0x64e6b5b4,0x3ec71d72
-	.long	0x9ac43cc0,0x3efa00eb
-	.type L(DP_SC3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SC3))
-
-	.p2align 4
-L(DP_SC2):
-	.long	0x8b4bd1f9,0xbf2a019f
-	.long	0x348b6874,0xbf56c16b
-	.type L(DP_SC2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SC2))
-
-	.p2align 4
-L(DP_SC1):
-	.long	0x10c2688b,0x3f811111
-	.long	0x545c50c7,0x3fa55555
-	.type L(DP_SC1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SC1))
-
-	.p2align 4
-L(DP_SC0):
-	.long	0x55551cd9,0xbfc55555
-	.long	0xfffe98ae,0xbfdfffff
-	.type L(DP_SC0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SC0))
-
-	.p2align 3
-L(DP_SMALL):
-	.long	0x00000000,0x3cd00000	/* 2^(-50) */
-	.type L(DP_SMALL), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SMALL))
-
-	.p2align 3
-L(DP_PIO4):
-	.long	0x54442d18,0x3fe921fb	/* Pi/4 */
-	.type L(DP_PIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
-	.p2align 3
-L(DP_2POW52):
-	.long	0x00000000,0x43300000	/* +2^52 */
-	.long	0x00000000,0xc3300000	/* -2^52 */
-	.type L(DP_2POW52), @object
-	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
-	.p2align 3
-L(DP_INVPIO4):
-	.long	0x6dc9c883,0x3ff45f30	/* 4/Pi */
-	.type L(DP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
-	.p2align 3
-L(DP_PIO4HI):
-	.long	0x54000000,0xbfe921fb	/* High part of Pi/4 */
-	.type L(DP_PIO4HI), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
-	.p2align 3
-L(DP_PIO4LO):
-	.long	0x11A62633,0xbe010b46	/* Low part of Pi/4 */
-	.type L(DP_PIO4LO), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
-	.p2align 2
-L(SP_INVPIO4):
-	.long	0x3fa2f983		/* 4/Pi */
-	.type L(SP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
-	.p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
-	.long	0xffffffff,0x7fffffff
-	.long	0xffffffff,0x7fffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
-	.p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
-	.long	0x00000000,0xffffffff
-	.type L(DP_HI_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
-	.p2align 4
-L(SP_ABS_MASK): /* Mask for getting SP absolute value */
-	.long	0x7fffffff,0x7fffffff
-	.long	0x7fffffff,0x7fffffff
-	.type L(SP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
-
-	.p2align 2
-L(SP_ONE):
-	.long	0x3f800000		/* 1.0 */
-	.type L(SP_ONE), @object
-	ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-weak_alias(__sincosf, sincosf)
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c b/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c
deleted file mode 100644
index 9428f9b4ea..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Multiple versions of sincosf
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <init-arch.h>
-
-extern void __sincosf_sse2 (float, float *, float *);
-extern void __sincosf_ia32 (float, float *, float *);
-void __sincosf (float, float *, float *);
-
-libm_ifunc (__sincosf,
-	    HAS_CPU_FEATURE (SSE2) ? __sincosf_sse2 : __sincosf_ia32);
-weak_alias (__sincosf, sincosf);
-
-#define SINCOSF __sincosf_ia32
-#include <sysdeps/ieee754/flt-32/s_sincosf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
deleted file mode 100644
index ee96018061..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
+++ /dev/null
@@ -1,566 +0,0 @@
-/* Optimized with sse2 version of sinf
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- *  1) if |x| == 0: return x.
- *  2) if |x| <  2^-27: return x-x*DP_SMALL, raise underflow only when needed.
- *  3) if |x| <  2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
- *  4) if |x| <   Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
- *  5) if |x| < 9*Pi/4:
- *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
- *           t=|x|-j*Pi/4.
- *      5.2) Reconstruction:
- *          s = sign(x) * (-1.0)^((n>>2)&1)
- *          if(n&2 != 0) {
- *              using cos(t) polynomial for |t|<Pi/4, result is
- *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
- *          } else {
- *              using sin(t) polynomial for |t|<Pi/4, result is
- *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
- *          }
- *  6) if |x| < 2^23, large args:
- *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
- *           t=|x|-j*Pi/4.
- *      6.2) Reconstruction same as (5.2).
- *  7) if |x| >= 2^23, very large args:
- *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
- *           t=|x|-j*Pi/4.
- *      7.2) Reconstruction same as (5.2).
- *  8) if x is Inf, return x-x, and set errno=EDOM.
- *  9) if x is NaN, return x-x.
- *
- * Special cases:
- *  sin(+-0) = +-0 not raising inexact/underflow,
- *  sin(subnormal) raises inexact/underflow,
- *  sin(min_normalized) raises inexact/underflow,
- *  sin(normalized) raises inexact,
- *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
- *  sin(NaN) = NaN.
- */
-
-#ifdef	PIC
-# define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
-# define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
-# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
-# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
-# define POP(REG)			popl REG; CFI_POP(REG)
-# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
-# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
-# define ARG_X				8(%esp)
-#else
-# define MO1(symbol)			L(symbol)
-# define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
-# define ENTRANCE
-# define RETURN				ret
-# define ARG_X				4(%esp)
-#endif
-
-	.text
-ENTRY(__sinf_sse2)
-	/* Input: single precision x on stack at address ARG_X */
-
-	ENTRANCE
-	movl	ARG_X, %eax		/* Bits of x */
-	cvtss2sd ARG_X, %xmm0		/* DP x */
-	andl	$0x7fffffff, %eax	/* |x| */
-
-	cmpl	$0x3f490fdb, %eax	/* |x|<Pi/4?  */
-	jb	L(arg_less_pio4)
-
-	/* Here if |x|>=Pi/4 */
-	movd	%eax, %xmm3		/* SP |x| */
-	andpd	MO1(DP_ABS_MASK),%xmm0	/* DP |x| */
-	movss	MO1(SP_INVPIO4), %xmm2	/* SP 1/(Pi/4) */
-
-	cmpl	$0x40e231d6, %eax	/* |x|<9*Pi/4?  */
-	jae	L(large_args)
-
-	/* Here if Pi/4<=|x|<9*Pi/4 */
-	mulss	%xmm3, %xmm2		/* SP |x|/(Pi/4) */
-	movl	ARG_X, %ecx		/* Load x */
-	cvttss2si %xmm2, %eax		/* k, number of Pi/4 in x */
-	shrl	$31, %ecx		/* sign of x */
-	addl	$1, %eax		/* k+1 */
-	movl	$0x0e, %edx
-	andl	%eax, %edx		/* j = (k+1)&0x0e */
-	subsd	MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
-
-L(reconstruction):
-	/* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
-	testl	$2, %eax		/* n&2 != 0?  */
-	jz	L(sin_poly)
-
-/*L(cos_poly):*/
-	/* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
-	 * y = t*t; z = y*y;
-	 * s = sign(x) * (-1.0)^((n>>2)&1)
-	 * result = s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
-	 */
-	shrl	$2, %eax		/* n>>2 */
-	mulsd	%xmm0, %xmm0		/* y=t^2 */
-	andl	$1, %eax		/* (n>>2)&1 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=t^4 */
-
-	movsd	MO1(DP_C4), %xmm4	/* C4 */
-	mulsd	%xmm0, %xmm4		/* z*C4 */
-	xorl	%eax, %ecx		/* (-1.0)^((n>>2)&1) XOR sign(x) */
-	movsd	MO1(DP_C3), %xmm3	/* C3 */
-	mulsd	%xmm0, %xmm3		/* z*C3 */
-	addsd	MO1(DP_C2), %xmm4	/* C2+z*C4 */
-	mulsd	%xmm0, %xmm4		/* z*(C2+z*C4) */
-	lea	-8(%esp), %esp		/* Borrow 4 bytes of stack frame */
-	addsd	MO1(DP_C1), %xmm3	/* C1+z*C3 */
-	mulsd	%xmm0, %xmm3		/* z*(C1+z*C3) */
-	addsd	MO1(DP_C0), %xmm4	/* C0+z*(C2+z*C4) */
-	mulsd	%xmm1, %xmm4		/* y*(C0+z*(C2+z*C4)) */
-
-	addsd	%xmm4, %xmm3		/* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	/* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	addsd	MO1(DP_ONES), %xmm3
-
-	mulsd	MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */
-	movsd	%xmm3, 0(%esp)		/* Move result from sse...  */
-	fldl	0(%esp)			/* ...to FPU.  */
-	/* Return back 4 bytes of stack frame */
-	lea	8(%esp), %esp
-	RETURN
-
-	.p2align	4
-L(sin_poly):
-	/* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
-	 * y = t*t; z = y*y;
-	 * s = sign(x) * (-1.0)^((n>>2)&1)
-	 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
-	 */
-
-	movaps	%xmm0, %xmm4		/* t */
-	shrl	$2, %eax		/* n>>2 */
-	mulsd	%xmm0, %xmm0		/* y=t^2 */
-	andl	$1, %eax		/* (n>>2)&1 */
-	movaps	%xmm0, %xmm1		/* y */
-	xorl	%eax, %ecx		/* (-1.0)^((n>>2)&1) XOR sign(x) */
-	mulsd	%xmm0, %xmm0		/* z=t^4 */
-
-	movsd	MO1(DP_S4), %xmm2	/* S4 */
-	mulsd	%xmm0, %xmm2		/* z*S4 */
-	movsd	MO1(DP_S3), %xmm3	/* S3 */
-	mulsd	%xmm0, %xmm3		/* z*S3 */
-	lea	-8(%esp), %esp		/* Borrow 4 bytes of stack frame */
-	addsd	MO1(DP_S2), %xmm2	/* S2+z*S4 */
-	mulsd	%xmm0, %xmm2		/* z*(S2+z*S4) */
-	addsd	MO1(DP_S1), %xmm3	/* S1+z*S3 */
-	mulsd	%xmm0, %xmm3		/* z*(S1+z*S3) */
-	addsd	MO1(DP_S0), %xmm2	/* S0+z*(S2+z*S4) */
-	mulsd	%xmm1, %xmm2		/* y*(S0+z*(S2+z*S4)) */
-	/* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
-	mulsd	MO2(DP_ONES,%ecx,8), %xmm4
-	addsd	%xmm2, %xmm3		/* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	/* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	mulsd	%xmm4, %xmm3
-	/* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm4, %xmm3
-	movsd	%xmm3, 0(%esp)		/* Move result from sse...  */
-	fldl	0(%esp)			/* ...to FPU.  */
-	/* Return back 4 bytes of stack frame */
-	lea	8(%esp), %esp
-	RETURN
-
-	.p2align	4
-L(large_args):
-	/* Here if |x|>=9*Pi/4 */
-	cmpl	$0x7f800000, %eax	/* x is Inf or NaN?  */
-	jae	L(arg_inf_or_nan)
-
-	/* Here if finite |x|>=9*Pi/4 */
-	cmpl	$0x4b000000, %eax	/* |x|<2^23?  */
-	jae	L(very_large_args)
-
-	/* Here if 9*Pi/4<=|x|<2^23 */
-	movsd	MO1(DP_INVPIO4), %xmm1	/* 1/(Pi/4) */
-	mulsd	%xmm0, %xmm1		/* |x|/(Pi/4) */
-	cvttsd2si %xmm1, %eax		/* k=trunc(|x|/(Pi/4)) */
-	addl	$1, %eax		/* k+1 */
-	movl	%eax, %edx
-	andl	$0xfffffffe, %edx	/* j=(k+1)&0xfffffffe */
-	cvtsi2sdl %edx, %xmm4		/* DP j */
-	movl	ARG_X, %ecx		/* Load x */
-	movsd	MO1(DP_PIO4HI), %xmm2	/* -PIO4HI = high part of -Pi/4 */
-	shrl	$31, %ecx		/* sign bit of x */
-	mulsd	%xmm4, %xmm2		/* -j*PIO4HI */
-	movsd	MO1(DP_PIO4LO), %xmm3	/* -PIO4LO = low part of -Pi/4 */
-	addsd	%xmm2, %xmm0		/* |x| - j*PIO4HI */
-	mulsd	%xmm3, %xmm4		/* j*PIO4LO */
-	addsd	%xmm4, %xmm0		/* t = |x| - j*PIO4HI - j*PIO4LO */
-	jmp	L(reconstruction)
-
-	.p2align	4
-L(very_large_args):
-	/* Here if finite |x|>=2^23 */
-
-	/* bitpos = (ix>>23) - BIAS_32 + 59; */
-	shrl	$23, %eax		/* eb = biased exponent of x */
-	/* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
-	subl	$68, %eax
-	movl	$28, %ecx		/* %cl=28 */
-	movl	%eax, %edx		/* bitpos copy */
-
-	/* j = bitpos/28; */
-	div	%cl			/* j in register %al=%ax/%cl */
-	movapd	%xmm0, %xmm3		/* |x| */
-	/* clear unneeded remainder from %ah */
-	andl	$0xff, %eax
-
-	imull	$28, %eax, %ecx		/* j*28 */
-	movsd	MO1(DP_HI_MASK), %xmm4	/* DP_HI_MASK */
-	movapd	%xmm0, %xmm5		/* |x| */
-	mulsd	-2*8+MO2(_FPI,%eax,8), %xmm3	/* tmp3 = FPI[j-2]*|x| */
-	movapd	%xmm0, %xmm1		/* |x| */
-	mulsd	-1*8+MO2(_FPI,%eax,8), %xmm5	/* tmp2 = FPI[j-1]*|x| */
-	mulsd	0*8+MO2(_FPI,%eax,8), %xmm0	/* tmp0 = FPI[j]*|x| */
-	addl	$19, %ecx		/* j*28+19 */
-	mulsd	1*8+MO2(_FPI,%eax,8), %xmm1	/* tmp1 = FPI[j+1]*|x| */
-	cmpl	%ecx, %edx		/* bitpos>=j*28+19?   */
-	jl	L(very_large_skip1)
-
-	/* Here if bitpos>=j*28+19 */
-	andpd	%xmm3, %xmm4		/* HI(tmp3) */
-	subsd	%xmm4, %xmm3		/* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
-	movsd	MO1(DP_2POW52), %xmm6
-	movapd	%xmm5, %xmm2		/* tmp2 copy */
-	addsd	%xmm3, %xmm5		/* tmp5 = tmp3 + tmp2 */
-	movl	$1, %edx
-	addsd	%xmm5, %xmm6		/* tmp6 = tmp5 + 2^52 */
-	movsd	8+MO1(DP_2POW52), %xmm4
-	movd	%xmm6, %eax		/* k = I64_LO(tmp6); */
-	addsd	%xmm6, %xmm4		/* tmp4 = tmp6 - 2^52 */
-	movl	ARG_X, %ecx		/* Load x */
-	comisd	%xmm5, %xmm4		/* tmp4 > tmp5?  */
-	jbe	L(very_large_skip2)
-
-	/* Here if tmp4 > tmp5 */
-	subl	$1, %eax		/* k-- */
-	addsd	8+MO1(DP_ONES), %xmm4	/* tmp4 -= 1.0 */
-L(very_large_skip2):
-
-	andl	%eax, %edx		/* k&1 */
-	subsd	%xmm4, %xmm3		/* tmp3 -= tmp4 */
-	addsd	MO2(DP_ZERONE,%edx,8), %xmm3 /* t  = DP_ZERONE[k&1] + tmp3 */
-	addsd	%xmm2, %xmm3		/* t += tmp2 */
-	shrl	$31, %ecx		/* sign of x */
-	addsd	%xmm3, %xmm0		/* t += tmp0 */
-	addl	$1, %eax		/* n=k+1 */
-	addsd	%xmm1, %xmm0		/* t += tmp1 */
-	mulsd	MO1(DP_PIO4), %xmm0	/* t *= PI04 */
-
-	jmp	L(reconstruction)	/* end of very_large_args peth */
-
-	.p2align	4
-L(arg_less_pio4):
-	/* Here if |x|<Pi/4 */
-	cmpl	$0x3d000000, %eax	/* |x|<2^-5?  */
-	jl	L(arg_less_2pn5)
-
-	/* Here if 2^-5<=|x|<Pi/4 */
-	movaps	%xmm0, %xmm3		/* x */
-	mulsd	%xmm0, %xmm0		/* y=x^2 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=x^4 */
-	movsd	MO1(DP_S4), %xmm4	/* S4 */
-	mulsd	%xmm0, %xmm4		/* z*S4 */
-	movsd	MO1(DP_S3), %xmm5	/* S3 */
-	mulsd	%xmm0, %xmm5		/* z*S3 */
-	addsd	MO1(DP_S2), %xmm4	/* S2+z*S4 */
-	mulsd	%xmm0, %xmm4		/* z*(S2+z*S4) */
-	addsd	MO1(DP_S1), %xmm5	/* S1+z*S3 */
-	mulsd	%xmm0, %xmm5		/* z*(S1+z*S3) */
-	addsd	MO1(DP_S0), %xmm4	/* S0+z*(S2+z*S4) */
-	mulsd	%xmm1, %xmm4		/* y*(S0+z*(S2+z*S4)) */
-	mulsd	%xmm3, %xmm5		/* x*z*(S1+z*S3) */
-	mulsd	%xmm3, %xmm4		/* x*y*(S0+z*(S2+z*S4)) */
-	/* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm5, %xmm4
-	/* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm4, %xmm3
-	cvtsd2ss %xmm3, %xmm3		/* SP result */
-
-L(epilogue):
-	lea	-4(%esp), %esp		/* Borrow 4 bytes of stack frame */
-	movss	%xmm3, 0(%esp)		/* Move result from sse...  */
-	flds	0(%esp)			/* ...to FPU.  */
-	/* Return back 4 bytes of stack frame */
-	lea	4(%esp), %esp
-	RETURN
-
-	.p2align	4
-L(arg_less_2pn5):
-	/* Here if |x|<2^-5 */
-	cmpl	$0x32000000, %eax	/* |x|<2^-27?  */
-	jl	L(arg_less_2pn27)
-
-	/* Here if 2^-27<=|x|<2^-5 */
-	movaps	%xmm0, %xmm1		/* DP x */
-	mulsd	%xmm0, %xmm0		/* DP x^2 */
-	movsd	MO1(DP_SIN2_1), %xmm3	/* DP DP_SIN2_1 */
-	mulsd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_1 */
-	addsd	MO1(DP_SIN2_0), %xmm3	/* DP DP_SIN2_0+x^2*DP_SIN2_1 */
-	mulsd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
-	mulsd	%xmm1, %xmm3		/* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
-	addsd	%xmm1, %xmm3		/* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
-	cvtsd2ss %xmm3, %xmm3		/* SP result */
-	jmp	L(epilogue)
-
-	.p2align	4
-L(arg_less_2pn27):
-	movss	ARG_X, %xmm3		/* SP x */
-	cmpl	$0, %eax		/* x=0?  */
-	je	L(epilogue)		/* in case x=0 return sin(+-0)==+-0 */
-	/* Here if |x|<2^-27 */
-	/*
-	 * Special cases here:
-	 *  sin(subnormal) raises inexact/underflow
-	 *  sin(min_normalized) raises inexact/underflow
-	 *  sin(normalized) raises inexact
-	 */
-	movaps	%xmm0, %xmm3		/* Copy of DP x */
-	mulsd	MO1(DP_SMALL), %xmm0	/* x*DP_SMALL */
-	subsd	%xmm0, %xmm3		/* Result is x-x*DP_SMALL */
-	cvtsd2ss %xmm3, %xmm3		/* Result converted to SP */
-	jmp	L(epilogue)
-
-	.p2align	4
-L(arg_inf_or_nan):
-	/* Here if |x| is Inf or NAN */
-	jne	L(skip_errno_setting)	/* in case of x is NaN */
-
-	/* Here if x is Inf. Set errno to EDOM.  */
-	call	JUMPTARGET(__errno_location)
-	movl	$EDOM, (%eax)
-
-	.p2align	4
-L(skip_errno_setting):
-	/* Here if |x| is Inf or NAN. Continued.  */
-	movss	ARG_X, %xmm3		/* load x */
-	subss	%xmm3, %xmm3		/* Result is NaN */
-	jmp	L(epilogue)
-END(__sinf_sse2)
-
-	.section .rodata, "a"
-	.p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
-	.long	0x00000000,0x00000000
-	.long	0x54442d18,0x3fe921fb
-	.long	0x54442d18,0x3ff921fb
-	.long	0x7f3321d2,0x4002d97c
-	.long	0x54442d18,0x400921fb
-	.long	0x2955385e,0x400f6a7a
-	.long	0x7f3321d2,0x4012d97c
-	.long	0xe9bba775,0x4015fdbb
-	.long	0x54442d18,0x401921fb
-	.long	0xbeccb2bb,0x401c463a
-	.long	0x2955385e,0x401f6a7a
-	.type L(PIO4J), @object
-	ASM_SIZE_DIRECTIVE(L(PIO4J))
-
-	.p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
-	.long	0x00000000,0x00000000
-	.long	0x6c000000,0x3ff45f30
-	.long	0x2a000000,0x3e3c9c88
-	.long	0xa8000000,0x3c54fe13
-	.long	0xd0000000,0x3aaf47d4
-	.long	0x6c000000,0x38fbb81b
-	.long	0xe0000000,0x3714acc9
-	.long	0x7c000000,0x3560e410
-	.long	0x56000000,0x33bca2c7
-	.long	0xac000000,0x31fbd778
-	.long	0xe0000000,0x300b7246
-	.long	0xe8000000,0x2e5d2126
-	.long	0x48000000,0x2c970032
-	.long	0xe8000000,0x2ad77504
-	.long	0xe0000000,0x290921cf
-	.long	0xb0000000,0x274deb1c
-	.long	0xe0000000,0x25829a73
-	.long	0xbe000000,0x23fd1046
-	.long	0x10000000,0x2224baed
-	.long	0x8e000000,0x20709d33
-	.long	0x80000000,0x1e535a2f
-	.long	0x64000000,0x1cef904e
-	.long	0x30000000,0x1b0d6398
-	.long	0x24000000,0x1964ce7d
-	.long	0x16000000,0x17b908bf
-	.type L(_FPI), @object
-	ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomial
-   for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5.  */
-	.p2align 3
-L(DP_SIN2_0):
-	.long	0x5543d49d,0xbfc55555
-	.type L(DP_SIN2_0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
-
-	.p2align 3
-L(DP_SIN2_1):
-	.long	0x75cec8c5,0x3f8110f4
-	.type L(DP_SIN2_1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
-
-	.p2align 3
-L(DP_ZERONE):
-	.long	0x00000000,0x00000000	/* 0.0 */
-	.long	0x00000000,0xbff00000	/* 1.0 */
-	.type L(DP_ZERONE), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
-	.p2align 3
-L(DP_ONES):
-	.long	0x00000000,0x3ff00000	/* +1.0 */
-	.long	0x00000000,0xbff00000	/* -1.0 */
-	.type L(DP_ONES), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomial
-   for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
-	.p2align 3
-L(DP_S3):
-	.long	0x64e6b5b4,0x3ec71d72
-	.type L(DP_S3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S3))
-
-	.p2align 3
-L(DP_S1):
-	.long	0x10c2688b,0x3f811111
-	.type L(DP_S1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S1))
-
-	.p2align 3
-L(DP_S4):
-	.long	0x1674b58a,0xbe5a947e
-	.type L(DP_S4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S4))
-
-	.p2align 3
-L(DP_S2):
-	.long	0x8b4bd1f9,0xbf2a019f
-	.type L(DP_S2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S2))
-
-	.p2align 3
-L(DP_S0):
-	.long	0x55551cd9,0xbfc55555
-	.type L(DP_S0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S0))
-
-	.p2align 3
-L(DP_SMALL):
-	.long	0x00000000,0x3cd00000	/* 2^(-50) */
-	.type L(DP_SMALL), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SMALL))
-
-/* Coefficients of polynomial
-   for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
-	.p2align 3
-L(DP_C3):
-	.long	0x9ac43cc0,0x3efa00eb
-	.type L(DP_C3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C3))
-
-	.p2align 3
-L(DP_C1):
-	.long	0x545c50c7,0x3fa55555
-	.type L(DP_C1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C1))
-
-	.p2align 3
-L(DP_C4):
-	.long	0xdd8844d7,0xbe923c97
-	.type L(DP_C4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C4))
-
-	.p2align 3
-L(DP_C2):
-	.long	0x348b6874,0xbf56c16b
-	.type L(DP_C2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C2))
-
-	.p2align 3
-L(DP_C0):
-	.long	0xfffe98ae,0xbfdfffff
-	.type L(DP_C0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C0))
-
-	.p2align 3
-L(DP_PIO4):
-	.long	0x54442d18,0x3fe921fb	/* Pi/4 */
-	.type L(DP_PIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
-	.p2align 3
-L(DP_2POW52):
-	.long	0x00000000,0x43300000	/* +2^52 */
-	.long	0x00000000,0xc3300000	/* -2^52 */
-	.type L(DP_2POW52), @object
-	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
-	.p2align 3
-L(DP_INVPIO4):
-	.long	0x6dc9c883,0x3ff45f30	/* 4/Pi */
-	.type L(DP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
-	.p2align 3
-L(DP_PIO4HI):
-	.long	0x54000000,0xbfe921fb	/* High part of Pi/4 */
-	.type L(DP_PIO4HI), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
-	.p2align 3
-L(DP_PIO4LO):
-	.long	0x11A62633,0xbe010b46	/* Low part of Pi/4 */
-	.type L(DP_PIO4LO), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
-	.p2align 2
-L(SP_INVPIO4):
-	.long	0x3fa2f983		/* 4/Pi */
-	.type L(SP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
-	.p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
-	.long	0xffffffff,0x7fffffff
-	.long	0xffffffff,0x7fffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
-	.p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
-	.long	0x00000000,0xffffffff
-	.type L(DP_HI_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
-weak_alias (__sinf, sinf)
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c b/sysdeps/i386/i686/fpu/multiarch/s_sinf.c
deleted file mode 100644
index 8ccdd2f34d..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Multiple versions of sinf
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <init-arch.h>
-
-extern float __sinf_sse2 (float);
-extern float __sinf_ia32 (float);
-float __sinf (float);
-
-libm_ifunc (__sinf, HAS_CPU_FEATURE (SSE2) ? __sinf_sse2 : __sinf_ia32);
-weak_alias (__sinf, sinf);
-#define SINF __sinf_ia32
-#include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/sysdeps/i386/i686/fpu/s_fmax.S b/sysdeps/i386/i686/fpu/s_fmax.S
deleted file mode 100644
index ace8db9410..0000000000
--- a/sysdeps/i386/i686/fpu/s_fmax.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Compute maximum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 1997-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY(__fmax)
-	fldl	4(%esp)		// x
-	fldl	12(%esp)	// x : y
-
-	fucomi	%st(0), %st
-	fcmovu	%st(1), %st	// now %st contains y if not NaN, x otherwise
-
-	fxch
-
-	fucomi	%st(1), %st
-	fcmovb	%st(1), %st
-
-	fstp	%st(1)
-
-	ret
-END(__fmax)
-weak_alias (__fmax, fmax)
diff --git a/sysdeps/i386/i686/fpu/s_fmaxf.S b/sysdeps/i386/i686/fpu/s_fmaxf.S
deleted file mode 100644
index 3a25951a09..0000000000
--- a/sysdeps/i386/i686/fpu/s_fmaxf.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Compute maximum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 1997-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY(__fmaxf)
-	flds	4(%esp)		// x
-	flds	8(%esp)		// x : y
-
-	fucomi	%st(0), %st
-	fcmovu	%st(1), %st	// now %st contains y if not NaN, x otherwise
-
-	fxch
-
-	fucomi	%st(1), %st
-	fcmovb	%st(1), %st
-
-	fstp	%st(1)
-
-	ret
-END(__fmaxf)
-weak_alias (__fmaxf, fmaxf)
diff --git a/sysdeps/i386/i686/fpu/s_fmaxl.S b/sysdeps/i386/i686/fpu/s_fmaxl.S
deleted file mode 100644
index 3f6c21c63d..0000000000
--- a/sysdeps/i386/i686/fpu/s_fmaxl.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Compute maximum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 1997-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY(__fmaxl)
-	fldt	4(%esp)		// x
-	fldt	16(%esp)	// x : y
-
-	fucomi	%st(1), %st
-	jp	2f
-	fcmovb	%st(1), %st
-
-	fstp	%st(1)
-
-	ret
-
-2:	// Unordered.
-	fucomi	%st(0), %st
-	jp	3f
-	// st(1) is a NaN; st(0) is not.  Test if st(1) is signaling.
-	testb	$0x40, 11(%esp)
-	jz	4f
-	fstp	%st(1)
-	ret
-
-3:	// st(0) is a NaN; st(1) may or may not be.
-	fxch
-	fucomi	%st(0), %st
-	jp	4f
-	// st(1) is a NaN; st(0) is not.  Test if st(1) is signaling.
-	testb	$0x40, 23(%esp)
-	jz	4f
-	fstp	%st(1)
-	ret
-
-4:	// Both arguments are NaNs, or one is a signaling NaN.
-	faddp
-	ret
-END(__fmaxl)
-weak_alias (__fmaxl, fmaxl)
diff --git a/sysdeps/i386/i686/fpu/s_fmin.S b/sysdeps/i386/i686/fpu/s_fmin.S
deleted file mode 100644
index 72d306fd79..0000000000
--- a/sysdeps/i386/i686/fpu/s_fmin.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Compute minimum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 1997-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY(__fmin)
-	fldl	4(%esp)		// x
-	fldl	12(%esp)	// x : y
-
-	fucomi	%st(0), %st
-	fcmovu	%st(1), %st	// now %st contains y if not NaN, x otherwise
-
-	fucomi	%st(1), %st
-	fcmovnb	%st(1), %st
-
-	fstp	%st(1)
-
-	ret
-END(__fmin)
-weak_alias (__fmin, fmin)
diff --git a/sysdeps/i386/i686/fpu/s_fminf.S b/sysdeps/i386/i686/fpu/s_fminf.S
deleted file mode 100644
index 52ea892bad..0000000000
--- a/sysdeps/i386/i686/fpu/s_fminf.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Compute minimum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 1997-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY(__fminf)
-	flds	4(%esp)		// x
-	flds	8(%esp)		// x : y
-
-	fucomi	%st(0), %st
-	fcmovu	%st(1), %st	// now %st contains y if not NaN, x otherwise
-
-	fucomi	%st(1), %st
-	fcmovnb	%st(1), %st
-
-	fstp	%st(1)
-
-	ret
-END(__fminf)
-weak_alias (__fminf, fminf)
diff --git a/sysdeps/i386/i686/fpu/s_fminl.S b/sysdeps/i386/i686/fpu/s_fminl.S
deleted file mode 100644
index e1cb83fed7..0000000000
--- a/sysdeps/i386/i686/fpu/s_fminl.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Compute minimum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 1997-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY(__fminl)
-	fldt	4(%esp)		// x
-	fldt	16(%esp)	// x : y
-
-	fucomi	%st(1), %st
-	jp	2f
-	fcmovnb	%st(1), %st
-
-	fstp	%st(1)
-
-	ret
-
-2:	// Unordered.
-	fucomi	%st(0), %st
-	jp	3f
-	// st(1) is a NaN; st(0) is not.  Test if st(1) is signaling.
-	testb	$0x40, 11(%esp)
-	jz	4f
-	fstp	%st(1)
-	ret
-
-3:	// st(0) is a NaN; st(1) may or may not be.
-	fxch
-	fucomi	%st(0), %st
-	jp	4f
-	// st(1) is a NaN; st(0) is not.  Test if st(1) is signaling.
-	testb	$0x40, 23(%esp)
-	jz	4f
-	fstp	%st(1)
-	ret
-
-4:	// Both arguments are NaNs, or one is a signaling NaN.
-	faddp
-	ret
-END(__fminl)
-weak_alias (__fminl, fminl)
diff --git a/sysdeps/i386/i686/hp-timing.h b/sysdeps/i386/i686/hp-timing.h
deleted file mode 100644
index 1b11410feb..0000000000
--- a/sysdeps/i386/i686/hp-timing.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* High precision, low overhead timing functions.  i686 version.
-   Copyright (C) 1998-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _HP_TIMING_H
-#define _HP_TIMING_H	1
-
-/* We always assume having the timestamp register.  */
-#define HP_TIMING_AVAIL		(1)
-#define HP_SMALL_TIMING_AVAIL	(1)
-
-/* We indeed have inlined functions.  */
-#define HP_TIMING_INLINE	(1)
-
-/* We use 64bit values for the times.  */
-typedef unsigned long long int hp_timing_t;
-
-/* That's quite simple.  Use the `rdtsc' instruction.  Note that the value
-   might not be 100% accurate since there might be some more instructions
-   running in this moment.  This could be changed by using a barrier like
-   'cpuid' right before the `rdtsc' instruciton.  But we are not interested
-   in accurate clock cycles here so we don't do this.  */
-#define HP_TIMING_NOW(Var)	__asm__ __volatile__ ("rdtsc" : "=A" (Var))
-
-#include <hp-timing-common.h>
-
-#endif	/* hp-timing.h */
diff --git a/sysdeps/i386/i686/init-arch.h b/sysdeps/i386/i686/init-arch.h
deleted file mode 100644
index f55f80efa0..0000000000
--- a/sysdeps/i386/i686/init-arch.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Copyright (C) 2015-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define MINIMUM_ISA 686
-#include <sysdeps/x86/init-arch.h>
diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S
deleted file mode 100644
index 5140ee2145..0000000000
--- a/sysdeps/i386/i686/memcmp.S
+++ /dev/null
@@ -1,408 +0,0 @@
-/* Compare two memory blocks for differences in the first COUNT bytes.
-   Copyright (C) 2004-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS		4+4	/* Preserve EBX.  */
-#define BLK1		PARMS
-#define BLK2		BLK1+4
-#define LEN		BLK2+4
-#define ENTRANCE	pushl %ebx; cfi_adjust_cfa_offset (4); \
-			cfi_rel_offset (ebx, 0)
-#define RETURN		popl %ebx; cfi_adjust_cfa_offset (-4); \
-			cfi_restore (ebx); ret
-
-/* Load an entry in a jump table into EBX.  TABLE is a jump table
-   with relative offsets.  INDEX is a register contains the index
-   into the jump table.  */
-#define LOAD_JUMP_TABLE_ENTRY(TABLE, INDEX) \
-  /* We first load PC into EBX.  */					      \
-  SETUP_PIC_REG(bx);							      \
-  /* Get the address of the jump table.  */				      \
-  addl	$(TABLE - .), %ebx;						      \
-  /* Get the entry and convert the relative offset to the		      \
-     absolute address.  */						      \
-  addl	(%ebx,INDEX,4), %ebx
-
-        .text
-	ALIGN (4)
-ENTRY (memcmp)
-	ENTRANCE
-
-	movl	BLK1(%esp), %eax
-	movl	BLK2(%esp), %edx
-	movl	LEN(%esp), %ecx
-
-	cmpl 	$1, %ecx
-	jne	L(not_1)
-	movzbl	(%eax), %ecx		/* LEN == 1  */
-	cmpb	(%edx), %cl
-	jne	L(neq)
-L(bye):
-	xorl	%eax, %eax
-	RETURN
-
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-L(neq):
-	sbbl	%eax, %eax
-	sbbl	$-1, %eax
-	RETURN
-
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (ebx, 0)
-L(not_1):
-	jl	L(bye)			/* LEN == 0  */
-
-	pushl	%esi
-	cfi_adjust_cfa_offset (4)
-	movl	%eax, %esi
-	cfi_rel_offset (esi, 0)
-	cmpl	$32, %ecx;
-	jge	L(32bytesormore)	/* LEN => 32  */
-
-	LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
-	addl	%ecx, %edx
-	addl	%ecx, %esi
-	jmp	*%ebx
-
-	ALIGN (4)
-L(28bytes):
-	movl	-28(%esi), %eax
-	movl	-28(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(24bytes):
-	movl	-24(%esi), %eax
-	movl	-24(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(20bytes):
-	movl	-20(%esi), %eax
-	movl	-20(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(16bytes):
-	movl	-16(%esi), %eax
-	movl	-16(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(12bytes):
-	movl	-12(%esi), %eax
-	movl	-12(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(8bytes):
-	movl	-8(%esi), %eax
-	movl	-8(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(4bytes):
-	movl	-4(%esi), %eax
-	movl	-4(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(0bytes):
-	popl	%esi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (esi)
-	xorl	%eax, %eax
-	RETURN
-
-	cfi_adjust_cfa_offset (8)
-	cfi_rel_offset (esi, 0)
-	cfi_rel_offset (ebx, 4)
-L(29bytes):
-	movl	-29(%esi), %eax
-	movl	-29(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(25bytes):
-	movl	-25(%esi), %eax
-	movl	-25(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(21bytes):
-	movl	-21(%esi), %eax
-	movl	-21(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(17bytes):
-	movl	-17(%esi), %eax
-	movl	-17(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(13bytes):
-	movl	-13(%esi), %eax
-	movl	-13(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(9bytes):
-	movl	-9(%esi), %eax
-	movl	-9(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(5bytes):
-	movl	-5(%esi), %eax
-	movl	-5(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(1bytes):
-	movzbl	-1(%esi), %eax
-	cmpb	-1(%edx), %al
-	jne	L(set)
-	popl	%esi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (esi)
-	xorl	%eax, %eax
-	RETURN
-
-	cfi_adjust_cfa_offset (8)
-	cfi_rel_offset (esi, 0)
-	cfi_rel_offset (ebx, 4)
-L(30bytes):
-	movl	-30(%esi), %eax
-	movl	-30(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(26bytes):
-	movl	-26(%esi), %eax
-	movl	-26(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(22bytes):
-	movl	-22(%esi), %eax
-	movl	-22(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(18bytes):
-	movl	-18(%esi), %eax
-	movl	-18(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(14bytes):
-	movl	-14(%esi), %eax
-	movl	-14(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(10bytes):
-	movl	-10(%esi), %eax
-	movl	-10(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(6bytes):
-	movl	-6(%esi), %eax
-	movl	-6(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(2bytes):
-	movzwl	-2(%esi), %eax
-	movzwl	-2(%edx), %ecx
-	cmpb	%cl, %al
-	jne	L(set)
-	cmpl	%ecx, %eax
-	jne	L(set)
-	popl	%esi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (esi)
-	xorl	%eax, %eax
-	RETURN
-
-	cfi_adjust_cfa_offset (8)
-	cfi_rel_offset (esi, 0)
-	cfi_rel_offset (ebx, 4)
-L(31bytes):
-	movl	-31(%esi), %eax
-	movl	-31(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(27bytes):
-	movl	-27(%esi), %eax
-	movl	-27(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(23bytes):
-	movl	-23(%esi), %eax
-	movl	-23(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(19bytes):
-	movl	-19(%esi), %eax
-	movl	-19(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(15bytes):
-	movl	-15(%esi), %eax
-	movl	-15(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(11bytes):
-	movl	-11(%esi), %eax
-	movl	-11(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(7bytes):
-	movl	-7(%esi), %eax
-	movl	-7(%edx), %ecx
-	cmpl	%ecx, %eax
-	jne	L(find_diff)
-L(3bytes):
-	movzwl	-3(%esi), %eax
-	movzwl	-3(%edx), %ecx
-	cmpb	%cl, %al
-	jne	L(set)
-	cmpl	%ecx, %eax
-	jne	L(set)
-	movzbl	-1(%esi), %eax
-	cmpb	-1(%edx), %al
-	jne	L(set)
-	popl	%esi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (esi)
-	xorl	%eax, %eax
-	RETURN
-
-	cfi_adjust_cfa_offset (8)
-	cfi_rel_offset (esi, 0)
-	cfi_rel_offset (ebx, 4)
-	ALIGN (4)
-/* ECX >= 32.  */
-L(32bytesormore):
-	subl	$32, %ecx
-
-	movl	(%esi), %eax
-	cmpl	(%edx), %eax
-	jne	L(load_ecx)
-
-	movl	4(%esi), %eax
-	cmpl	4(%edx), %eax
-	jne	L(load_ecx_4)
-
-	movl	8(%esi), %eax
-	cmpl	8(%edx), %eax
-	jne	L(load_ecx_8)
-
-	movl	12(%esi), %eax
-	cmpl	12(%edx), %eax
-	jne	L(load_ecx_12)
-
-	movl	16(%esi), %eax
-	cmpl	16(%edx), %eax
-	jne	L(load_ecx_16)
-
-	movl	20(%esi), %eax
-	cmpl	20(%edx), %eax
-	jne	L(load_ecx_20)
-
-	movl	24(%esi), %eax
-	cmpl	24(%edx), %eax
-	jne	L(load_ecx_24)
-
-	movl	28(%esi), %eax
-	cmpl	28(%edx), %eax
-	jne	L(load_ecx_28)
-
-	addl	$32, %esi
-	addl	$32, %edx
-	cmpl	$32, %ecx
-	jge	L(32bytesormore)
-
-	LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
-	addl	%ecx, %edx
-	addl	%ecx, %esi
-	jmp	*%ebx
-
-L(load_ecx_28):
-	addl	$0x4, %edx
-L(load_ecx_24):
-	addl	$0x4, %edx
-L(load_ecx_20):
-	addl	$0x4, %edx
-L(load_ecx_16):
-	addl	$0x4, %edx
-L(load_ecx_12):
-	addl	$0x4, %edx
-L(load_ecx_8):
-	addl	$0x4, %edx
-L(load_ecx_4):
-	addl	$0x4, %edx
-L(load_ecx):
-	movl	(%edx), %ecx
-
-L(find_diff):
-	cmpb	%cl, %al
-	jne	L(set)
-	cmpb	%ch, %ah
-	jne	L(set)
-	shrl	$16,%eax
-	shrl	$16,%ecx
-	cmpb	%cl, %al
-	jne	L(set)
-	/* We get there only if we already know there is a
-	   difference.  */
-	cmpl	%ecx, %eax
-L(set):
-	sbbl	%eax, %eax
-	sbbl	$-1, %eax
-	popl	%esi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (esi)
-	RETURN
-END (memcmp)
-
-	.section	.rodata
-	ALIGN (2)
-L(table_32bytes) :
-	.long	L(0bytes) - L(table_32bytes)
-	.long	L(1bytes) - L(table_32bytes)
-	.long	L(2bytes) - L(table_32bytes)
-	.long	L(3bytes) - L(table_32bytes)
-	.long	L(4bytes) - L(table_32bytes)
-	.long	L(5bytes) - L(table_32bytes)
-	.long	L(6bytes) - L(table_32bytes)
-	.long	L(7bytes) - L(table_32bytes)
-	.long	L(8bytes) - L(table_32bytes)
-	.long	L(9bytes) - L(table_32bytes)
-	.long	L(10bytes) - L(table_32bytes)
-	.long	L(11bytes) - L(table_32bytes)
-	.long	L(12bytes) - L(table_32bytes)
-	.long	L(13bytes) - L(table_32bytes)
-	.long	L(14bytes) - L(table_32bytes)
-	.long	L(15bytes) - L(table_32bytes)
-	.long	L(16bytes) - L(table_32bytes)
-	.long	L(17bytes) - L(table_32bytes)
-	.long	L(18bytes) - L(table_32bytes)
-	.long	L(19bytes) - L(table_32bytes)
-	.long	L(20bytes) - L(table_32bytes)
-	.long	L(21bytes) - L(table_32bytes)
-	.long	L(22bytes) - L(table_32bytes)
-	.long	L(23bytes) - L(table_32bytes)
-	.long	L(24bytes) - L(table_32bytes)
-	.long	L(25bytes) - L(table_32bytes)
-	.long	L(26bytes) - L(table_32bytes)
-	.long	L(27bytes) - L(table_32bytes)
-	.long	L(28bytes) - L(table_32bytes)
-	.long	L(29bytes) - L(table_32bytes)
-	.long	L(30bytes) - L(table_32bytes)
-	.long	L(31bytes) - L(table_32bytes)
-
-
-#undef bcmp
-weak_alias (memcmp, bcmp)
-libc_hidden_builtin_def (memcmp)
diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S
deleted file mode 100644
index 1d61447430..0000000000
--- a/sysdeps/i386/i686/memcpy.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/* Copy memory block and return pointer to beginning of destination block
-   For Intel 80x86, x>=6.
-   This file is part of the GNU C Library.
-   Copyright (C) 1999-2017 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS	4		/* no space for saved regs */
-#define RTN	PARMS
-#define DEST	RTN
-#define SRC	DEST+4
-#define LEN	SRC+4
-
-	.text
-#if defined PIC && IS_IN (libc)
-ENTRY_CHK (__memcpy_chk)
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__memcpy_chk)
-#endif
-ENTRY (memcpy)
-
-	movl	%edi, %eax
-	movl	DEST(%esp), %edi
-	movl	%esi, %edx
-	movl	SRC(%esp), %esi
-
-	movl	%edi, %ecx
-	xorl	%esi, %ecx
-	andl	$3, %ecx
-	movl	LEN(%esp), %ecx
-	cld
-	jne	.Lunaligned
-
-	cmpl	$3, %ecx
-	jbe	.Lunaligned
-
-	testl	$3, %esi
-	je	1f
-	movsb
-	decl	%ecx
-	testl	$3, %esi
-	je	1f
-	movsb
-	decl	%ecx
-	testl	$3, %esi
-	je	1f
-	movsb
-	decl	%ecx
-1:	pushl	%eax
-	movl	%ecx, %eax
-	shrl	$2, %ecx
-	andl	$3, %eax
-	rep
-	movsl
-	movl	%eax, %ecx
-	rep
-	movsb
-	popl	%eax
-
-.Lend:	movl	%eax, %edi
-	movl	%edx, %esi
-	movl	DEST(%esp), %eax
-
-	ret
-
-	/* When we come here the pointers do not have the same
-	   alignment or the length is too short.  No need to optimize for
-	   aligned memory accesses. */
-.Lunaligned:
-	shrl	$1, %ecx
-	jnc	1f
-	movsb
-1:	shrl	$1, %ecx
-	jnc	2f
-	movsw
-2:	rep
-	movsl
-	jmp	.Lend
-END (memcpy)
-libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
deleted file mode 100644
index f60c3d501b..0000000000
--- a/sysdeps/i386/i686/memmove.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/* Copy memory block and return pointer to beginning of destination block
-   For Intel 80x86, x>=6.
-   This file is part of the GNU C Library.
-   Copyright (C) 2003-2017 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 2003.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS	4+4	/* one spilled register */
-#define RTN	PARMS
-
-	.text
-
-#ifdef USE_AS_BCOPY
-# define SRC	RTN
-# define DEST	SRC+4
-# define LEN	DEST+4
-#else
-# define DEST	RTN
-# define SRC	DEST+4
-# define LEN	SRC+4
-
-# if defined PIC && IS_IN (libc)
-ENTRY_CHK (__memmove_chk)
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__memmove_chk)
-# endif
-#endif
-
-ENTRY (memmove)
-
-	pushl	%edi
-	cfi_adjust_cfa_offset (4)
-
-	movl	LEN(%esp), %ecx
-	movl	DEST(%esp), %edi
-	cfi_rel_offset (edi, 0)
-	movl	%esi, %edx
-	movl	SRC(%esp), %esi
-	cfi_register (esi, edx)
-
-	movl	%edi, %eax
-	subl	%esi, %eax
-	cmpl	%eax, %ecx
-	ja	3f
-
-	cld
-	shrl	$1, %ecx
-	jnc	1f
-	movsb
-1:	shrl	$1, %ecx
-	jnc	2f
-	movsw
-2:	rep
-	movsl
-	movl	%edx, %esi
-	cfi_restore (esi)
-#ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#endif
-
-	popl	%edi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (edi)
-
-	ret
-
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (edi, 0)
-	cfi_register (esi, edx)
-
-	/* Backward copying.  */
-3:	std
-	leal	-1(%edi, %ecx), %edi
-	leal	-1(%esi, %ecx), %esi
-	shrl	$1, %ecx
-	jnc	1f
-	movsb
-1:	subl	$1, %edi
-	subl	$1, %esi
-	shrl	$1, %ecx
-	jnc	2f
-	movsw
-2:	subl	$2, %edi
-	subl	$2, %esi
-	rep
-	movsl
-	movl	%edx, %esi
-	cfi_restore (esi)
-#ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#endif
-
-	cld
-	popl	%edi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (edi)
-
-	ret
-END (memmove)
-#ifndef USE_AS_BCOPY
-libc_hidden_builtin_def (memmove)
-#endif
diff --git a/sysdeps/i386/i686/mempcpy.S b/sysdeps/i386/i686/mempcpy.S
deleted file mode 100644
index 31cb4efdb2..0000000000
--- a/sysdeps/i386/i686/mempcpy.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copy memory block and return pointer to following byte.
-   For Intel 80x86, x>=6.
-   This file is part of the GNU C Library.
-   Copyright (C) 1998-2017 Free Software Foundation, Inc.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS	4		/* no space for saved regs */
-#define RTN	PARMS
-#define DEST	RTN
-#define SRC	DEST+4
-#define LEN	SRC+4
-
-	.text
-#if defined PIC && IS_IN (libc)
-ENTRY_CHK (__mempcpy_chk)
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__mempcpy_chk)
-#endif
-ENTRY (__mempcpy)
-
-	movl	LEN(%esp), %ecx
-	movl	%edi, %eax
-	cfi_register (edi, eax)
-	movl	DEST(%esp), %edi
-	movl	%esi, %edx
-	cfi_register (esi, edx)
-	movl	SRC(%esp), %esi
-	cld
-	shrl	$1, %ecx
-	jnc	1f
-	movsb
-1:	shrl	$1, %ecx
-	jnc	2f
-	movsw
-2:	rep
-	movsl
-	xchgl	%edi, %eax
-	cfi_restore (edi)
-	movl	%edx, %esi
-	cfi_restore (esi)
-
-	ret
-END (__mempcpy)
-libc_hidden_def (__mempcpy)
-weak_alias (__mempcpy, mempcpy)
-libc_hidden_builtin_def (mempcpy)
diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
deleted file mode 100644
index 24d06178d2..0000000000
--- a/sysdeps/i386/i686/memset.S
+++ /dev/null
@@ -1,100 +0,0 @@
-/* memset/bzero -- set memory area to CH/0
-   Highly optimized version for ix86, x>=6.
-   Copyright (C) 1999-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS	4+4	/* space for 1 saved reg */
-#ifdef USE_AS_BZERO
-# define DEST	PARMS
-# define LEN	DEST+4
-#else
-# define RTN	PARMS
-# define DEST	RTN
-# define CHR	DEST+4
-# define LEN	CHR+4
-#endif
-
-        .text
-#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
-ENTRY_CHK (__memset_chk)
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__memset_chk)
-#endif
-ENTRY (memset)
-
-	cld
-	pushl	%edi
-	cfi_adjust_cfa_offset (4)
-	movl	DEST(%esp), %edx
-	movl	LEN(%esp), %ecx
-#ifdef USE_AS_BZERO
-	xorl	%eax, %eax	/* fill with 0 */
-#else
-	movzbl	CHR(%esp), %eax
-#endif
-	jecxz	1f
-	movl	%edx, %edi
-	cfi_rel_offset (edi, 0)
-	andl	$3, %edx
-	jz	2f	/* aligned */
-	jp	3f	/* misaligned at 3, store just one byte below */
-	stosb		/* misaligned at 1 or 2, store two bytes */
-	decl	%ecx
-	jz	1f
-3:	stosb
-	decl	%ecx
-	jz	1f
-	xorl	$1, %edx
-	jnz	2f	/* was misaligned at 2 or 3, now aligned */
-	stosb		/* was misaligned at 1, store third byte */
-	decl	%ecx
-2:	movl	%ecx, %edx
-	shrl	$2, %ecx
-	andl	$3, %edx
-#ifndef USE_AS_BZERO
-	imul	$0x01010101, %eax
-#endif
-	rep
-	stosl
-	movl	%edx, %ecx
-	rep
-	stosb
-
-1:
-#ifndef USE_AS_BZERO
-	movl DEST(%esp), %eax	/* start address of destination is result */
-#endif
-	popl	%edi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (edi)
-
-	ret
-END (memset)
-libc_hidden_builtin_def (memset)
-
-#if defined SHARED && IS_IN (libc) && !defined __memset_chk \
-    && !defined USE_AS_BZERO
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
-	.section .gnu.warning.__memset_zero_constant_len_parameter
-	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
-#endif
diff --git a/sysdeps/i386/i686/memusage.h b/sysdeps/i386/i686/memusage.h
deleted file mode 100644
index 77a020d7c0..0000000000
--- a/sysdeps/i386/i686/memusage.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define GETSP() ({ register uintptr_t stack_ptr asm ("esp"); stack_ptr; })
-#define GETTIME(low,high) asm ("rdtsc" : "=a" (low), "=d" (high))
-
-#include <sysdeps/generic/memusage.h>
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
deleted file mode 100644
index 4a0c20c051..0000000000
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ /dev/null
@@ -1,44 +0,0 @@
-ifeq ($(subdir),csu)
-tests += test-multiarch
-endif
-
-ifeq ($(subdir),string)
-gen-as-const-headers += locale-defines.sym
-sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
-		   memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
-		   memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
-		   memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
-		   strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
-		   memcmp-ssse3 memcmp-sse4 varshift \
-		   strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
-		   strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
-		   strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
-		   strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
-		   strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
-		   memchr-sse2 memchr-sse2-bsf \
-		   memrchr-sse2 memrchr-sse2-bsf memrchr-c \
-		   rawmemchr-sse2 rawmemchr-sse2-bsf \
-		   strnlen-sse2 strnlen-c \
-		   strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
-		   strncase_l-c strncase-c strncase_l-ssse3 \
-		   strcasecmp_l-sse4 strncase_l-sse4 \
-		   bcopy-sse2-unaligned memcpy-sse2-unaligned \
-		   mempcpy-sse2-unaligned memmove-sse2-unaligned \
-		   strcspn-c strpbrk-c strspn-c
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
-endif
-
-ifeq ($(subdir),wcsmbs)
-sysdep_routines += wcscmp-sse2 wcscmp-c wcslen-sse2 wcslen-c \
-		   wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcschr-sse2 \
-		   wcschr-c wcsrchr-sse2 wcsrchr-c wcscpy-ssse3 wcscpy-c
-endif
-
-ifeq ($(subdir),math)
-libm-sysdep_routines += s_fma-fma s_fmaf-fma
-CFLAGS-s_fma-fma.c += -mavx -mfpmath=sse
-CFLAGS-s_fmaf-fma.c += -mavx -mfpmath=sse
-endif
diff --git a/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S
deleted file mode 100644
index efef2a10dd..0000000000
--- a/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define USE_AS_BCOPY
-#define MEMCPY		__bcopy_sse2_unaligned
-#include "memcpy-sse2-unaligned.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S
deleted file mode 100644
index cbc8b420e8..0000000000
--- a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define USE_AS_BCOPY
-#define MEMCPY		__bcopy_ssse3_rep
-#include "memcpy-ssse3-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3.S
deleted file mode 100644
index 36aac44b9c..0000000000
--- a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define USE_AS_BCOPY
-#define MEMCPY		__bcopy_ssse3
-#include "memcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S
deleted file mode 100644
index 877f82c28f..0000000000
--- a/sysdeps/i386/i686/multiarch/bcopy.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Multiple versions of bcopy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#if IS_IN (libc)
-	.text
-ENTRY(bcopy)
-	.type	bcopy, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__bcopy_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__bcopy_sse2_unaligned)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__bcopy_ssse3)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__bcopy_ssse3_rep)
-2:	ret
-END(bcopy)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __bcopy_ia32, @function; \
-	.p2align 4; \
-	.globl __bcopy_ia32; \
-	.hidden __bcopy_ia32; \
-	__bcopy_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32
-
-#endif
-
-#include "../bcopy.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S
deleted file mode 100644
index 507b288bb3..0000000000
--- a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_BZERO
-#define __memset_sse2_rep __bzero_sse2_rep
-#include "memset-sse2-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2.S b/sysdeps/i386/i686/multiarch/bzero-sse2.S
deleted file mode 100644
index 8d04512e4e..0000000000
--- a/sysdeps/i386/i686/multiarch/bzero-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_BZERO
-#define __memset_sse2 __bzero_sse2
-#include "memset-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S
deleted file mode 100644
index 9dac490aa2..0000000000
--- a/sysdeps/i386/i686/multiarch/bzero.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Multiple versions of bzero
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#if IS_IN (libc)
-	.text
-ENTRY(__bzero)
-	.type	__bzero, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__bzero_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX ( __bzero_sse2)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__bzero_sse2_rep)
-2:	ret
-END(__bzero)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __bzero_ia32, @function; \
-	.p2align 4; \
-	.globl __bzero_ia32; \
-	.hidden __bzero_ia32; \
-	__bzero_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __bzero_ia32, .-__bzero_ia32
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI___bzero; __GI___bzero = __bzero_ia32
-# endif
-#endif
-
-#include "../bzero.S"
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
deleted file mode 100644
index e8026a2a78..0000000000
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ /dev/null
@@ -1,376 +0,0 @@
-/* Enumerate available IFUNC implementations of a function.  i686 version.
-   Copyright (C) 2012-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <assert.h>
-#include <string.h>
-#include <wchar.h>
-#include <ifunc-impl-list.h>
-#include "init-arch.h"
-
-/* Maximum number of IFUNC implementations.  */
-#define MAX_IFUNC	4
-
-/* Fill ARRAY of MAX elements with IFUNC implementations for function
-   NAME and return the number of valid entries.  */
-
-size_t
-__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
-			size_t max)
-{
-  assert (max >= MAX_IFUNC);
-
-  size_t i = 0;
-
-  /* Support sysdeps/i386/i686/multiarch/bcopy.S.  */
-  IFUNC_IMPL (i, name, bcopy,
-	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3),
-			      __bcopy_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3),
-			      __bcopy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2),
-			      __bcopy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/bzero.S.  */
-  IFUNC_IMPL (i, name, bzero,
-	      IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
-			      __bzero_sse2_rep)
-	      IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
-			      __bzero_sse2)
-	      IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/memchr.S.  */
-  IFUNC_IMPL (i, name, memchr,
-	      IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2),
-			      __memchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2),
-			      __memchr_sse2)
-	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/memcmp.S.  */
-  IFUNC_IMPL (i, name, memcmp,
-	      IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_2),
-			      __memcmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3),
-			      __memcmp_ssse3)
-	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/memmove_chk.S.  */
-  IFUNC_IMPL (i, name, __memmove_chk,
-	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __memmove_chk_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __memmove_chk_ssse3)
-	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
-			      HAS_CPU_FEATURE (SSE2),
-			      __memmove_chk_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
-			      __memmove_chk_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/memmove.S.  */
-  IFUNC_IMPL (i, name, memmove,
-	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
-			      __memmove_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
-			      __memmove_ssse3)
-	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2),
-			      __memmove_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/memrchr.S.  */
-  IFUNC_IMPL (i, name, memrchr,
-	      IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2),
-			      __memrchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2),
-			      __memrchr_sse2)
-	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/memset_chk.S.  */
-  IFUNC_IMPL (i, name, __memset_chk,
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      HAS_CPU_FEATURE (SSE2),
-			      __memset_chk_sse2_rep)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk,
-			      HAS_CPU_FEATURE (SSE2),
-			      __memset_chk_sse2)
-	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
-			      __memset_chk_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/memset.S.  */
-  IFUNC_IMPL (i, name, memset,
-	      IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
-			      __memset_sse2_rep)
-	      IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
-			      __memset_sse2)
-	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/rawmemchr.S.  */
-  IFUNC_IMPL (i, name, rawmemchr,
-	      IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2),
-			      __rawmemchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2),
-			      __rawmemchr_sse2)
-	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/stpncpy.S.  */
-  IFUNC_IMPL (i, name, stpncpy,
-	      IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3),
-			      __stpncpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSE2),
-			      __stpncpy_sse2)
-	      IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/stpcpy.S.  */
-  IFUNC_IMPL (i, name, stpcpy,
-	      IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3),
-			      __stpcpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSE2),
-			      __stpcpy_sse2)
-	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strcasecmp.S.  */
-  IFUNC_IMPL (i, name, strcasecmp,
-	      IFUNC_IMPL_ADD (array, i, strcasecmp,
-			      HAS_CPU_FEATURE (SSE4_2),
-			      __strcasecmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strcasecmp,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __strcasecmp_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strcasecmp_l.S.  */
-  IFUNC_IMPL (i, name, strcasecmp_l,
-	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
-			      HAS_CPU_FEATURE (SSE4_2),
-			      __strcasecmp_l_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __strcasecmp_l_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
-			      __strcasecmp_l_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strcat.S.  */
-  IFUNC_IMPL (i, name, strcat,
-	      IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3),
-			      __strcat_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSE2),
-			      __strcat_sse2)
-	      IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strchr.S.  */
-  IFUNC_IMPL (i, name, strchr,
-	      IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2),
-			      __strchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2),
-			      __strchr_sse2)
-	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strcmp.S.  */
-  IFUNC_IMPL (i, name, strcmp,
-	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
-			      __strcmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3),
-			      __strcmp_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strcpy.S.  */
-  IFUNC_IMPL (i, name, strcpy,
-	      IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3),
-			      __strcpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSE2),
-			      __strcpy_sse2)
-	      IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strcspn.S.  */
-  IFUNC_IMPL (i, name, strcspn,
-	      IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2),
-			      __strcspn_sse42)
-	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strncase.S.  */
-  IFUNC_IMPL (i, name, strncasecmp,
-	      IFUNC_IMPL_ADD (array, i, strncasecmp,
-			      HAS_CPU_FEATURE (SSE4_2),
-			      __strncasecmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strncasecmp,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __strncasecmp_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
-			      __strncasecmp_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strncase_l.S.  */
-  IFUNC_IMPL (i, name, strncasecmp_l,
-	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
-			      HAS_CPU_FEATURE (SSE4_2),
-			      __strncasecmp_l_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __strncasecmp_l_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
-			      __strncasecmp_l_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strncat.S.  */
-  IFUNC_IMPL (i, name, strncat,
-	      IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3),
-			      __strncat_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSE2),
-			      __strncat_sse2)
-	      IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strncpy.S.  */
-  IFUNC_IMPL (i, name, strncpy,
-	      IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3),
-			      __strncpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSE2),
-			      __strncpy_sse2)
-	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strnlen.S.  */
-  IFUNC_IMPL (i, name, strnlen,
-	      IFUNC_IMPL_ADD (array, i, strnlen, HAS_CPU_FEATURE (SSE2),
-			      __strnlen_sse2)
-	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strpbrk.S.  */
-  IFUNC_IMPL (i, name, strpbrk,
-	      IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2),
-			      __strpbrk_sse42)
-	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strrchr.S.  */
-  IFUNC_IMPL (i, name, strrchr,
-	      IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2),
-			      __strrchr_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2),
-			      __strrchr_sse2)
-	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strspn.S.  */
-  IFUNC_IMPL (i, name, strspn,
-	      IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2),
-			      __strspn_sse42)
-	      IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/wcschr.S.  */
-  IFUNC_IMPL (i, name, wcschr,
-	      IFUNC_IMPL_ADD (array, i, wcschr, HAS_CPU_FEATURE (SSE2),
-			      __wcschr_sse2)
-	      IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/wcscmp.S.  */
-  IFUNC_IMPL (i, name, wcscmp,
-	      IFUNC_IMPL_ADD (array, i, wcscmp, HAS_CPU_FEATURE (SSE2),
-			      __wcscmp_sse2)
-	      IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/wcscpy.S.  */
-  IFUNC_IMPL (i, name, wcscpy,
-	      IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
-			      __wcscpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/wcslen.S.  */
-  IFUNC_IMPL (i, name, wcslen,
-	      IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2),
-			      __wcslen_sse2)
-	      IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/wcsrchr.S.  */
-  IFUNC_IMPL (i, name, wcsrchr,
-	      IFUNC_IMPL_ADD (array, i, wcsrchr, HAS_CPU_FEATURE (SSE2),
-			      __wcsrchr_sse2)
-	      IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/wmemcmp.S.  */
-  IFUNC_IMPL (i, name, wmemcmp,
-	      IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_2),
-			      __wmemcmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3),
-			      __wmemcmp_ssse3)
-	      IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_ia32))
-
-#ifdef SHARED
-  /* Support sysdeps/i386/i686/multiarch/memcpy_chk.S.  */
-  IFUNC_IMPL (i, name, __memcpy_chk,
-	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __memcpy_chk_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __memcpy_chk_ssse3)
-	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
-			      HAS_CPU_FEATURE (SSE2),
-			      __memcpy_chk_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
-			      __memcpy_chk_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/memcpy.S.  */
-  IFUNC_IMPL (i, name, memcpy,
-	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
-			      __memcpy_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
-			      __memcpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2),
-			      __memcpy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S.  */
-  IFUNC_IMPL (i, name, __mempcpy_chk,
-	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __mempcpy_chk_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-			      HAS_CPU_FEATURE (SSSE3),
-			      __mempcpy_chk_ssse3)
-	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
-			      HAS_CPU_FEATURE (SSE2),
-			      __mempcpy_chk_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
-			      __mempcpy_chk_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/mempcpy.S.  */
-  IFUNC_IMPL (i, name, mempcpy,
-	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
-			      __mempcpy_ssse3_rep)
-	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
-			      __mempcpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2),
-			      __mempcpy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strlen.S.  */
-  IFUNC_IMPL (i, name, strlen,
-	      IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2),
-			      __strlen_sse2_bsf)
-	      IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2),
-			      __strlen_sse2)
-	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_ia32))
-
-  /* Support sysdeps/i386/i686/multiarch/strncmp.S.  */
-  IFUNC_IMPL (i, name, strncmp,
-	      IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
-			      __strncmp_sse4_2)
-	      IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
-			      __strncmp_ssse3)
-	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_ia32))
-#endif
-
-  return i;
-}
diff --git a/sysdeps/i386/i686/multiarch/locale-defines.sym b/sysdeps/i386/i686/multiarch/locale-defines.sym
deleted file mode 100644
index aebff9a4f9..0000000000
--- a/sysdeps/i386/i686/multiarch/locale-defines.sym
+++ /dev/null
@@ -1,11 +0,0 @@
-#include <locale/localeinfo.h>
-#include <langinfo.h>
-#include <stddef.h>
-
---
-
-LOCALE_T___LOCALES		offsetof (struct __locale_struct, __locales)
-LC_CTYPE
-_NL_CTYPE_NONASCII_CASE
-LOCALE_DATA_VALUES		offsetof (struct __locale_data, values)
-SIZEOF_VALUES			sizeof (((struct __locale_data *) 0)->values[0])
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
deleted file mode 100644
index dd316486e6..0000000000
--- a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
+++ /dev/null
@@ -1,502 +0,0 @@
-/* Optimized memchr with sse2
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS  4
-# define STR1  PARMS
-# define STR2  STR1+4
-
-# ifndef USE_AS_RAWMEMCHR
-#  define LEN   STR2+4
-#  define RETURN  POP(%edi); ret; CFI_PUSH(%edi);
-# endif
-
-# ifndef MEMCHR
-#  define MEMCHR __memchr_sse2_bsf
-# endif
-
-	.text
-ENTRY (MEMCHR)
-
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-# ifndef USE_AS_RAWMEMCHR
-	mov	LEN(%esp), %edx
-	test	%edx, %edx
-	jz	L(return_null_1)
-# endif
-	mov	%ecx, %eax
-
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-
-	and	$63, %ecx
-	pshufd	$0, %xmm1, %xmm1
-
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-	movdqu	(%eax), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %ecx
-	test	%ecx, %ecx
-	je	L(unaligned_no_match_1)
-/* Check which byte is a match.  */
-	bsf	%ecx, %ecx
-
-# ifndef USE_AS_RAWMEMCHR
-	sub	%ecx, %edx
-	jbe	L(return_null_1)
-# endif
-	add	%ecx, %eax
-	ret
-
-	.p2align 4
-L(unaligned_no_match_1):
-# ifndef USE_AS_RAWMEMCHR
-	sub	$16, %edx
-	jbe	L(return_null_1)
-	PUSH	(%edi)
-	lea	16(%eax), %edi
-	and	$15, %eax
-	and	$-16, %edi
-	add	%eax, %edx
-# else
-	lea	16(%eax), %edx
-	and	$-16, %edx
-# endif
-	jmp	L(loop_prolog)
-
-	.p2align 4
-L(return_null_1):
-	xor	%eax, %eax
-	ret
-
-# ifndef USE_AS_RAWMEMCHR
-	CFI_POP	(%edi)
-# endif
-
-	.p2align 4
-L(crosscache):
-/* Handle unaligned string.  */
-
-# ifndef USE_AS_RAWMEMCHR
-	PUSH	(%edi)
-	mov	%eax, %edi
-	and	$15, %ecx
-	and	$-16, %edi
-	movdqa	(%edi), %xmm0
-# else
-	mov	%eax, %edx
-	and	$15, %ecx
-	and	$-16, %edx
-	movdqa	(%edx), %xmm0
-# endif
-	pcmpeqb	%xmm1, %xmm0
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-/* Remove the leading bytes.  */
-	sar	%cl, %eax
-	test	%eax, %eax
-	je	L(unaligned_no_match)
-/* Check which byte is a match.  */
-	bsf	%eax, %eax
-
-# ifndef USE_AS_RAWMEMCHR
-	sub	%eax, %edx
-	jbe	L(return_null)
-	add	%edi, %eax
-	add	%ecx, %eax
-	RETURN
-# else
-	add	%edx, %eax
-	add	%ecx, %eax
-	ret
-# endif
-
-	.p2align 4
-L(unaligned_no_match):
-# ifndef USE_AS_RAWMEMCHR
-        /* Calculate the last acceptable address and check for possible
-           addition overflow by using satured math:
-           edx = ecx + edx
-           edx |= -(edx < ecx)  */
-	add	%ecx, %edx
-	sbb	%eax, %eax
-	or	%eax, %edx
-	sub	$16, %edx
-	jbe	L(return_null)
-	add	$16, %edi
-# else
-	add	$16, %edx
-# endif
-
-	.p2align 4
-/* Loop start on aligned string.  */
-L(loop_prolog):
-# ifndef USE_AS_RAWMEMCHR
-	sub	$64, %edx
-	jbe	L(exit_loop)
-	movdqa	(%edi), %xmm0
-# else
-	movdqa	(%edx), %xmm0
-# endif
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	16(%edi), %xmm2
-# else
-	movdqa	16(%edx), %xmm2
-# endif
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	32(%edi), %xmm3
-# else
-	movdqa	32(%edx), %xmm3
-# endif
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	48(%edi), %xmm4
-# else
-	movdqa	48(%edx), %xmm4
-# endif
-	pcmpeqb	%xmm1, %xmm4
-
-# ifndef USE_AS_RAWMEMCHR
-	add	$64, %edi
-# else
-	add	$64, %edx
-# endif
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(matches0)
-
-# ifndef USE_AS_RAWMEMCHR
-	test	$0x3f, %edi
-# else
-	test	$0x3f, %edx
-# endif
-	jz	L(align64_loop)
-
-# ifndef USE_AS_RAWMEMCHR
-	sub	$64, %edx
-	jbe	L(exit_loop)
-	movdqa	(%edi), %xmm0
-# else
-	movdqa	(%edx), %xmm0
-# endif
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	16(%edi), %xmm2
-# else
-	movdqa	16(%edx), %xmm2
-# endif
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	32(%edi), %xmm3
-# else
-	movdqa	32(%edx), %xmm3
-# endif
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	48(%edi), %xmm3
-# else
-	movdqa	48(%edx), %xmm3
-# endif
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-
-# ifndef USE_AS_RAWMEMCHR
-	add	$64, %edi
-# else
-	add	$64, %edx
-# endif
-	test	%eax, %eax
-	jnz	L(matches0)
-
-# ifndef USE_AS_RAWMEMCHR
-	mov	%edi, %ecx
-	and	$-64, %edi
-	and	$63, %ecx
-	add	%ecx, %edx
-# else
-	and	$-64, %edx
-# endif
-
-	.p2align 4
-L(align64_loop):
-# ifndef USE_AS_RAWMEMCHR
-	sub	$64, %edx
-	jbe	L(exit_loop)
-	movdqa	(%edi), %xmm0
-	movdqa	16(%edi), %xmm2
-	movdqa	32(%edi), %xmm3
-	movdqa	48(%edi), %xmm4
-# else
-	movdqa	(%edx), %xmm0
-	movdqa	16(%edx), %xmm2
-	movdqa	32(%edx), %xmm3
-	movdqa	48(%edx), %xmm4
-# endif
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
-
-	pmaxub	%xmm0, %xmm3
-	pmaxub	%xmm2, %xmm4
-	pmaxub	%xmm3, %xmm4
-	pmovmskb %xmm4, %eax
-
-# ifndef USE_AS_RAWMEMCHR
-	add	$64, %edi
-# else
-	add	$64, %edx
-# endif
-
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-# ifndef USE_AS_RAWMEMCHR
-	sub	$64, %edi
-# else
-	sub	$64, %edx
-# endif
-
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	32(%edi), %xmm3
-# else
-	movdqa	32(%edx), %xmm3
-# endif
-
-	pcmpeqb	%xmm1, %xmm3
-
-# ifndef USE_AS_RAWMEMCHR
-	pcmpeqb	48(%edi), %xmm1
-# else
-	pcmpeqb	48(%edx), %xmm1
-# endif
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	pmovmskb %xmm1, %eax
-	bsf	%eax, %eax
-
-# ifndef USE_AS_RAWMEMCHR
-	lea	48(%edi, %eax), %eax
-	RETURN
-# else
-	lea	48(%edx, %eax), %eax
-	ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
-	.p2align 4
-L(exit_loop):
-	add	$64, %edx
-	cmp	$32, %edx
-	jbe	L(exit_loop_32)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-
-	movdqa	16(%edi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32_1)
-	cmp	$48, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	48(%edi), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches48_1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(exit_loop_32):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches_1)
-	cmp	$16, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	16(%edi), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches16_1)
-	xor	%eax, %eax
-	RETURN
-# endif
-	.p2align 4
-L(matches0):
-	bsf	%eax, %eax
-# ifndef USE_AS_RAWMEMCHR
-	lea	-16(%eax, %edi), %eax
-	RETURN
-# else
-	lea	-16(%eax, %edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(matches):
-	bsf	%eax, %eax
-# ifndef USE_AS_RAWMEMCHR
-	add	%edi, %eax
-	RETURN
-# else
-	add	%edx, %eax
-	ret
-# endif
-
-	.p2align 4
-L(matches16):
-	bsf	%eax, %eax
-# ifndef USE_AS_RAWMEMCHR
-	lea	16(%eax, %edi), %eax
-	RETURN
-# else
-	lea	16(%eax, %edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(matches32):
-	bsf	%eax, %eax
-# ifndef USE_AS_RAWMEMCHR
-	lea	32(%eax, %edi), %eax
-	RETURN
-# else
-	lea	32(%eax, %edx), %eax
-	ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
-	.p2align 4
-L(matches_1):
-	bsf	%eax, %eax
-	sub	%eax, %edx
-	jbe	L(return_null)
-
-	add	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(matches16_1):
-	sub	$16, %edx
-	bsf	%eax, %eax
-	sub	%eax, %edx
-	jbe	L(return_null)
-
-	lea	16(%edi, %eax), %eax
-	RETURN
-
-	.p2align 4
-L(matches32_1):
-	sub	$32, %edx
-	bsf	%eax, %eax
-	sub	%eax, %edx
-	jbe	L(return_null)
-
-	lea	32(%edi, %eax), %eax
-	RETURN
-
-	.p2align 4
-L(matches48_1):
-	sub	$48, %edx
-	bsf	%eax, %eax
-	sub	%eax, %edx
-	jbe	L(return_null)
-
-	lea	48(%edi, %eax), %eax
-	RETURN
-# endif
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-# ifndef USE_AS_RAWMEMCHR
-	RETURN
-# else
-	ret
-# endif
-
-END (MEMCHR)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2.S b/sysdeps/i386/i686/multiarch/memchr-sse2.S
deleted file mode 100644
index 172d70de13..0000000000
--- a/sysdeps/i386/i686/multiarch/memchr-sse2.S
+++ /dev/null
@@ -1,709 +0,0 @@
-/* Optimized memchr with sse2 without bsf
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef USE_AS_RAWMEMCHR
-#  define ENTRANCE PUSH(%edi);
-#  define PARMS  8
-#  define RETURN  POP(%edi); ret; CFI_PUSH(%edi);
-# else
-#  define ENTRANCE
-#  define PARMS  4
-# endif
-
-# define STR1  PARMS
-# define STR2  STR1+4
-
-# ifndef USE_AS_RAWMEMCHR
-#  define LEN   STR2+4
-# endif
-
-# ifndef MEMCHR
-#  define MEMCHR __memchr_sse2
-# endif
-
-	atom_text_section
-ENTRY (MEMCHR)
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-# ifndef USE_AS_RAWMEMCHR
-	mov	LEN(%esp), %edx
-	test	%edx, %edx
-	jz	L(return_null)
-# endif
-
-	punpcklbw %xmm1, %xmm1
-# ifndef USE_AS_RAWMEMCHR
-	mov	%ecx, %edi
-# else
-	mov	%ecx, %edx
-# endif
-	punpcklbw %xmm1, %xmm1
-
-	and	$63, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqu	(%edi), %xmm0
-# else
-	movdqu	(%edx), %xmm0
-# endif
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-# ifndef USE_AS_RAWMEMCHR
-	jnz	L(match_case2_prolog)
-
-	sub	$16, %edx
-	jbe	L(return_null)
-	lea	16(%edi), %edi
-	and	$15, %ecx
-	and	$-16, %edi
-	add	%ecx, %edx
-# else
-	jnz	L(match_case1_prolog)
-	lea	16(%edx), %edx
-	and	$-16, %edx
-# endif
-	jmp	L(loop_prolog)
-
-	.p2align 4
-L(crosscache):
-	and	$15, %ecx
-# ifndef USE_AS_RAWMEMCHR
-	and	$-16, %edi
-	movdqa	(%edi), %xmm0
-# else
-	and	$-16, %edx
-	movdqa	(%edx), %xmm0
-# endif
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	sar	%cl, %eax
-	test	%eax, %eax
-
-# ifndef USE_AS_RAWMEMCHR
-	jnz	L(match_case2_prolog1)
-        /* "ecx" is less than 16.  Calculate "edx + ecx - 16" by using
-	   "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void
-	   possible addition overflow.  */
-	neg	%ecx
-	add	$16, %ecx
-	sub	%ecx, %edx
-	jbe	L(return_null)
-	lea	16(%edi), %edi
-# else
-	jnz	L(match_case1_prolog1)
-	lea	16(%edx), %edx
-# endif
-
-	.p2align 4
-L(loop_prolog):
-# ifndef USE_AS_RAWMEMCHR
-	sub	$64, %edx
-	jbe	L(exit_loop)
-	movdqa	(%edi), %xmm0
-# else
-	movdqa	(%edx), %xmm0
-# endif
-	pcmpeqb	%xmm1, %xmm0
-	xor	%ecx, %ecx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	16(%edi), %xmm2
-# else
-	movdqa	16(%edx), %xmm2
-# endif
-	pcmpeqb	%xmm1, %xmm2
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	32(%edi), %xmm3
-# else
-	movdqa	32(%edx), %xmm3
-# endif
-	pcmpeqb	%xmm1, %xmm3
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	48(%edi), %xmm4
-# else
-	movdqa	48(%edx), %xmm4
-# endif
-	pcmpeqb	%xmm1, %xmm4
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	lea	64(%edi), %edi
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	(%edi), %xmm0
-# else
-	lea	64(%edx), %edx
-	movdqa	(%edx), %xmm0
-# endif
-	pcmpeqb	%xmm1, %xmm0
-	xor	%ecx, %ecx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	16(%edi), %xmm2
-# else
-	movdqa	16(%edx), %xmm2
-# endif
-	pcmpeqb	%xmm1, %xmm2
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	32(%edi), %xmm3
-# else
-	movdqa	32(%edx), %xmm3
-# endif
-	pcmpeqb	%xmm1, %xmm3
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	48(%edi), %xmm4
-# else
-	movdqa	48(%edx), %xmm4
-# endif
-	pcmpeqb	%xmm1, %xmm4
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	lea	64(%edi), %edi
-	mov	%edi, %ecx
-	and	$-64, %edi
-	and	$63, %ecx
-	add	%ecx, %edx
-# else
-	lea	64(%edx), %edx
-	and	$-64, %edx
-# endif
-
-	.p2align 4
-L(align64_loop):
-
-# ifndef USE_AS_RAWMEMCHR
-	sub	$64, %edx
-	jbe	L(exit_loop)
-	movdqa	(%edi), %xmm0
-	movdqa	16(%edi), %xmm2
-	movdqa	32(%edi), %xmm3
-	movdqa	48(%edi), %xmm4
-# else
-	movdqa	(%edx), %xmm0
-	movdqa	16(%edx), %xmm2
-	movdqa	32(%edx), %xmm3
-	movdqa	48(%edx), %xmm4
-# endif
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
-
-	pmaxub	%xmm0, %xmm3
-	pmaxub	%xmm2, %xmm4
-	pmaxub	%xmm3, %xmm4
-# ifndef USE_AS_RAWMEMCHR
-	add	$64, %edi
-# else
-	add	$64, %edx
-# endif
-	pmovmskb %xmm4, %eax
-
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-# ifndef USE_AS_RAWMEMCHR
-	sub	$64, %edi
-# else
-	sub	$64, %edx
-# endif
-
-	pmovmskb %xmm0, %eax
-	xor	%ecx, %ecx
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	pmovmskb %xmm2, %eax
-	lea	16(%ecx), %ecx
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	movdqa	32(%edi), %xmm3
-# else
-	movdqa	32(%edx), %xmm3
-# endif
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	lea	16(%ecx), %ecx
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
-	pcmpeqb	48(%edi), %xmm1
-# else
-	pcmpeqb	48(%edx), %xmm1
-# endif
-	pmovmskb %xmm1, %eax
-	lea	16(%ecx), %ecx
-
-	.p2align 4
-L(match_case1):
-# ifndef USE_AS_RAWMEMCHR
-	add	%ecx, %edi
-# else
-L(match_case1_prolog1):
-	add	%ecx, %edx
-L(match_case1_prolog):
-# endif
-	test	%al, %al
-	jz	L(match_case1_high)
-	mov	%al, %cl
-	and	$15, %cl
-	jz	L(match_case1_8)
-	test	$0x01, %al
-	jnz	L(ExitCase1_1)
-	test	$0x02, %al
-	jnz	L(ExitCase1_2)
-	test	$0x04, %al
-	jnz	L(ExitCase1_3)
-# ifndef USE_AS_RAWMEMCHR
-	lea	3(%edi), %eax
-	RETURN
-# else
-	lea	3(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(match_case1_8):
-	test	$0x10, %al
-	jnz	L(ExitCase1_5)
-	test	$0x20, %al
-	jnz	L(ExitCase1_6)
-	test	$0x40, %al
-	jnz	L(ExitCase1_7)
-# ifndef USE_AS_RAWMEMCHR
-	lea	7(%edi), %eax
-	RETURN
-# else
-	lea	7(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(match_case1_high):
-	mov	%ah, %ch
-	and	$15, %ch
-	jz	L(match_case1_high_8)
-	test	$0x01, %ah
-	jnz	L(ExitCase1_9)
-	test	$0x02, %ah
-	jnz	L(ExitCase1_10)
-	test	$0x04, %ah
-	jnz	L(ExitCase1_11)
-# ifndef USE_AS_RAWMEMCHR
-	lea	11(%edi), %eax
-	RETURN
-# else
-	lea	11(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(match_case1_high_8):
-	test	$0x10, %ah
-	jnz	L(ExitCase1_13)
-	test	$0x20, %ah
-	jnz	L(ExitCase1_14)
-	test	$0x40, %ah
-	jnz	L(ExitCase1_15)
-# ifndef USE_AS_RAWMEMCHR
-	lea	15(%edi), %eax
-	RETURN
-# else
-	lea	15(%edx), %eax
-	ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
-	.p2align 4
-L(exit_loop):
-	add	$64, %edx
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	xor	%ecx, %ecx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-	cmp	$16, %edx
-	jbe	L(return_null)
-
-	movdqa	16(%edi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-	cmp	$32, %edx
-	jbe	L(return_null)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-	cmp	$48, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	48(%edi), %xmm1
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-
-	xor	%eax, %eax
-	RETURN
-# endif
-
-	.p2align 4
-L(ExitCase1_1):
-# ifndef USE_AS_RAWMEMCHR
-	mov	%edi, %eax
-	RETURN
-# else
-	mov	%edx, %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_2):
-# ifndef USE_AS_RAWMEMCHR
-	lea	1(%edi), %eax
-	RETURN
-# else
-	lea	1(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_3):
-# ifndef USE_AS_RAWMEMCHR
-	lea	2(%edi), %eax
-	RETURN
-# else
-	lea	2(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_5):
-# ifndef USE_AS_RAWMEMCHR
-	lea	4(%edi), %eax
-	RETURN
-# else
-	lea	4(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_6):
-# ifndef USE_AS_RAWMEMCHR
-	lea	5(%edi), %eax
-	RETURN
-# else
-	lea	5(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_7):
-# ifndef USE_AS_RAWMEMCHR
-	lea	6(%edi), %eax
-	RETURN
-# else
-	lea	6(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_9):
-# ifndef USE_AS_RAWMEMCHR
-	lea	8(%edi), %eax
-	RETURN
-# else
-	lea	8(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_10):
-# ifndef USE_AS_RAWMEMCHR
-	lea	9(%edi), %eax
-	RETURN
-# else
-	lea	9(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_11):
-# ifndef USE_AS_RAWMEMCHR
-	lea	10(%edi), %eax
-	RETURN
-# else
-	lea	10(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_13):
-# ifndef USE_AS_RAWMEMCHR
-	lea	12(%edi), %eax
-	RETURN
-# else
-	lea	12(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_14):
-# ifndef USE_AS_RAWMEMCHR
-	lea	13(%edi), %eax
-	RETURN
-# else
-	lea	13(%edx), %eax
-	ret
-# endif
-
-	.p2align 4
-L(ExitCase1_15):
-# ifndef USE_AS_RAWMEMCHR
-	lea	14(%edi), %eax
-	RETURN
-# else
-	lea	14(%edx), %eax
-	ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
-	.p2align 4
-L(match_case2):
-	sub	%ecx, %edx
-L(match_case2_prolog1):
-	add	%ecx, %edi
-L(match_case2_prolog):
-	test	%al, %al
-	jz	L(match_case2_high)
-	mov	%al, %cl
-	and	$15, %cl
-	jz	L(match_case2_8)
-	test	$0x01, %al
-	jnz	L(ExitCase2_1)
-	test	$0x02, %al
-	jnz	L(ExitCase2_2)
-	test	$0x04, %al
-	jnz	L(ExitCase2_3)
-	sub	$4, %edx
-	jb	L(return_null)
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_8):
-	test	$0x10, %al
-	jnz	L(ExitCase2_5)
-	test	$0x20, %al
-	jnz	L(ExitCase2_6)
-	test	$0x40, %al
-	jnz	L(ExitCase2_7)
-	sub	$8, %edx
-	jb	L(return_null)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_high):
-	mov	%ah, %ch
-	and	$15, %ch
-	jz	L(match_case2_high_8)
-	test	$0x01, %ah
-	jnz	L(ExitCase2_9)
-	test	$0x02, %ah
-	jnz	L(ExitCase2_10)
-	test	$0x04, %ah
-	jnz	L(ExitCase2_11)
-	sub	$12, %edx
-	jb	L(return_null)
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_high_8):
-	test	$0x10, %ah
-	jnz	L(ExitCase2_13)
-	test	$0x20, %ah
-	jnz	L(ExitCase2_14)
-	test	$0x40, %ah
-	jnz	L(ExitCase2_15)
-	sub	$16, %edx
-	jb	L(return_null)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_1):
-	mov	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_2):
-	sub	$2, %edx
-	jb	L(return_null)
-	lea	1(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_3):
-	sub	$3, %edx
-	jb	L(return_null)
-	lea	2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_5):
-	sub	$5, %edx
-	jb	L(return_null)
-	lea	4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_6):
-	sub	$6, %edx
-	jb	L(return_null)
-	lea	5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_7):
-	sub	$7, %edx
-	jb	L(return_null)
-	lea	6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_9):
-	sub	$9, %edx
-	jb	L(return_null)
-	lea	8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_10):
-	sub	$10, %edx
-	jb	L(return_null)
-	lea	9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_11):
-	sub	$11, %edx
-	jb	L(return_null)
-	lea	10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_13):
-	sub	$13, %edx
-	jb	L(return_null)
-	lea	12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_14):
-	sub	$14, %edx
-	jb	L(return_null)
-	lea	13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(ExitCase2_15):
-	sub	$15, %edx
-	jb	L(return_null)
-	lea	14(%edi), %eax
-	RETURN
-# endif
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-# ifndef USE_AS_RAWMEMCHR
-	RETURN
-# else
-	ret
-# endif
-
-END (MEMCHR)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memchr.S b/sysdeps/i386/i686/multiarch/memchr.S
deleted file mode 100644
index bd0dace290..0000000000
--- a/sysdeps/i386/i686/multiarch/memchr.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Multiple versions of memchr
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
-	.text
-ENTRY(__memchr)
-	.type	__memchr, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	HAS_ARCH_FEATURE (Slow_BSF)
-	jz	3f
-
-	LOAD_FUNC_GOT_EAX ( __memchr_sse2)
-	ret
-
-2:	LOAD_FUNC_GOT_EAX (__memchr_ia32)
-	ret
-
-3:	LOAD_FUNC_GOT_EAX (__memchr_sse2_bsf)
-	ret
-END(__memchr)
-
-weak_alias(__memchr, memchr)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __memchr_ia32, @function; \
-	.globl __memchr_ia32; \
-	.p2align 4; \
-	__memchr_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __memchr_ia32, .-__memchr_ia32
-
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_memchr; __GI_memchr = __memchr_ia32
-
-#endif
-#include "../../memchr.S"
diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
deleted file mode 100644
index 2aa13048b2..0000000000
--- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S
+++ /dev/null
@@ -1,1225 +0,0 @@
-/* memcmp with SSE4.2, wmemcmp with SSE4.2
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# ifndef MEMCMP
-#  define MEMCMP	__memcmp_sse4_2
-# endif
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# define PARMS	4
-# define BLK1	PARMS
-# define BLK2	BLK1 + 4
-# define LEN	BLK2 + 4
-# define RETURN	POP (%ebx); ret; CFI_PUSH (%ebx)
-
-
-# ifdef SHARED
-#  define JMPTBL(I, B)	I - B
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-	jump	table with relative offsets.  INDEX is a register contains the
-	index	into the jump table.   SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
-/* We first load PC into EBX.  */	\
-	SETUP_PIC_REG(bx);	\
-/* Get the address of the jump table.  */	\
-	addl	$(TABLE - .), %ebx;	\
-/* Get the entry and convert the relative offset to the	\
-	absolute	address.  */	\
-	addl	(%ebx,INDEX,SCALE), %ebx;	\
-/* We loaded the jump table and adjusted EDX/ESI. Go.  */	\
-	jmp	*%ebx
-# else
-#  define JMPTBL(I, B)	I
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-	jump	table with relative offsets.  INDEX is a register contains the
-	index	into the jump table.   SCALE is the scale of INDEX. */
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
-	jmp	*TABLE(,INDEX,SCALE)
-# endif
-
-
-/* Warning!
-           wmemcmp has to use SIGNED comparison for elements.
-           memcmp has to use UNSIGNED comparison for elemnts.
-*/
-
-	.section .text.sse4.2,"ax",@progbits
-ENTRY (MEMCMP)
-	movl	BLK1(%esp), %eax
-	movl	BLK2(%esp), %edx
-	movl	LEN(%esp), %ecx
-
-# ifdef USE_AS_WMEMCMP
-	shl	$2, %ecx
-	test	%ecx, %ecx
-	jz	L(return0)
-# else
-	cmp	$1, %ecx
-	jbe	L(less1bytes)
-# endif
-
-	pxor	%xmm0, %xmm0
-	cmp	$64, %ecx
-	ja	L(64bytesormore)
-	cmp	$8, %ecx
-
-# ifndef USE_AS_WMEMCMP
-	PUSH	(%ebx)
-	jb	L(less8bytes)
-# else
-	jb	L(less8bytes)
-	PUSH	(%ebx)
-# endif
-
-	add	%ecx, %edx
-	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
-
-# ifndef USE_AS_WMEMCMP
-	.p2align 4
-L(less8bytes):
-	mov	(%eax), %bl
-	cmpb	(%edx), %bl
-	jne	L(nonzero)
-
-	mov	1(%eax), %bl
-	cmpb	1(%edx), %bl
-	jne	L(nonzero)
-
-	cmp	$2, %ecx
-	jz	L(0bytes)
-
-	mov	2(%eax), %bl
-	cmpb	2(%edx), %bl
-	jne	L(nonzero)
-
-	cmp	$3, %ecx
-	jz	L(0bytes)
-
-	mov	3(%eax), %bl
-	cmpb	3(%edx), %bl
-	jne	L(nonzero)
-
-	cmp	$4, %ecx
-	jz	L(0bytes)
-
-	mov	4(%eax), %bl
-	cmpb	4(%edx), %bl
-	jne	L(nonzero)
-
-	cmp	$5, %ecx
-	jz	L(0bytes)
-
-	mov	5(%eax), %bl
-	cmpb	5(%edx), %bl
-	jne	L(nonzero)
-
-	cmp	$6, %ecx
-	jz	L(0bytes)
-
-	mov	6(%eax), %bl
-	cmpb	6(%edx), %bl
-	je	L(0bytes)
-
-L(nonzero):
-	POP	(%ebx)
-	mov	$1, %eax
-	ja	L(above)
-	neg	%eax
-L(above):
-	ret
-	CFI_PUSH (%ebx)
-# endif
-
-	.p2align 4
-L(0bytes):
-	POP	(%ebx)
-	xor	%eax, %eax
-	ret
-
-# ifdef USE_AS_WMEMCMP
-
-/* for wmemcmp, case N == 1 */
-
-	.p2align 4
-L(less8bytes):
-	mov	(%eax), %ecx
-	cmp	(%edx), %ecx
-	je	L(return0)
-	mov	$1, %eax
-	jg	L(find_diff_bigger)
-	neg	%eax
-	ret
-
-	.p2align 4
-L(find_diff_bigger):
-	ret
-
-	.p2align 4
-L(return0):
-	xor	%eax, %eax
-	ret
-# endif
-
-# ifndef USE_AS_WMEMCMP
-	.p2align 4
-L(less1bytes):
-	jb	L(0bytesend)
-	movzbl	(%eax), %eax
-	movzbl	(%edx), %edx
-	sub	%edx, %eax
-	ret
-
-	.p2align 4
-L(0bytesend):
-	xor	%eax, %eax
-	ret
-# endif
-	.p2align 4
-L(64bytesormore):
-	PUSH	(%ebx)
-	mov	%ecx, %ebx
-	mov	$64, %ecx
-	sub	$64, %ebx
-L(64bytesormore_loop):
-	movdqu	(%eax), %xmm1
-	movdqu	(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(find_16diff)
-
-	movdqu	16(%eax), %xmm1
-	movdqu	16(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(find_32diff)
-
-	movdqu	32(%eax), %xmm1
-	movdqu	32(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(find_48diff)
-
-	movdqu	48(%eax), %xmm1
-	movdqu	48(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(find_64diff)
-	add	%ecx, %eax
-	add	%ecx, %edx
-	sub	%ecx, %ebx
-	jae	L(64bytesormore_loop)
-	add	%ebx, %ecx
-	add	%ecx, %edx
-	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
-
-# ifdef USE_AS_WMEMCMP
-
-/* Label needs only for table_64bytes filling */
-L(unreal_case):
-/* no code here */
-
-# endif
-	.p2align 4
-L(find_16diff):
-	sub	$16, %ecx
-L(find_32diff):
-	sub	$16, %ecx
-L(find_48diff):
-	sub	$16, %ecx
-L(find_64diff):
-	add	%ecx, %edx
-	add	%ecx, %eax
-
-# ifndef USE_AS_WMEMCMP
-	.p2align 4
-L(16bytes):
-	mov	-16(%eax), %ecx
-	mov	-16(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(12bytes):
-	mov	-12(%eax), %ecx
-	mov	-12(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(8bytes):
-	mov	-8(%eax), %ecx
-	mov	-8(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(4bytes):
-	mov	-4(%eax), %ecx
-	mov	-4(%edx), %ebx
-	cmp	%ebx, %ecx
-	mov	$0, %eax
-	jne	L(find_diff)
-	RETURN
-# else
-	.p2align 4
-L(16bytes):
-	mov	-16(%eax), %ecx
-	cmp	-16(%edx), %ecx
-	jne	L(find_diff)
-L(12bytes):
-	mov	-12(%eax), %ecx
-	cmp	-12(%edx), %ecx
-	jne	L(find_diff)
-L(8bytes):
-	mov	-8(%eax), %ecx
-	cmp	-8(%edx), %ecx
-	jne	L(find_diff)
-L(4bytes):
-	mov	-4(%eax), %ecx
-	cmp	-4(%edx), %ecx
-	mov	$0, %eax
-	jne	L(find_diff)
-	RETURN
-# endif
-
-# ifndef USE_AS_WMEMCMP
-	.p2align 4
-L(49bytes):
-	movdqu	-49(%eax), %xmm1
-	movdqu	-49(%edx), %xmm2
-	mov	$-49, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(33bytes):
-	movdqu	-33(%eax), %xmm1
-	movdqu	-33(%edx), %xmm2
-	mov	$-33, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(17bytes):
-	mov	-17(%eax), %ecx
-	mov	-17(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(13bytes):
-	mov	-13(%eax), %ecx
-	mov	-13(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(9bytes):
-	mov	-9(%eax), %ecx
-	mov	-9(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(5bytes):
-	mov	-5(%eax), %ecx
-	mov	-5(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzbl	-1(%eax), %ecx
-	cmp	-1(%edx), %cl
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-
-	.p2align 4
-L(50bytes):
-	mov	$-50, %ebx
-	movdqu	-50(%eax), %xmm1
-	movdqu	-50(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(34bytes):
-	mov	$-34, %ebx
-	movdqu	-34(%eax), %xmm1
-	movdqu	-34(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(18bytes):
-	mov	-18(%eax), %ecx
-	mov	-18(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(14bytes):
-	mov	-14(%eax), %ecx
-	mov	-14(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(10bytes):
-	mov	-10(%eax), %ecx
-	mov	-10(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(6bytes):
-	mov	-6(%eax), %ecx
-	mov	-6(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(2bytes):
-	movzwl	-2(%eax), %ecx
-	movzwl	-2(%edx), %ebx
-	cmp	%bl, %cl
-	jne	L(end)
-	cmp	%bh, %ch
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-
-	.p2align 4
-L(51bytes):
-	mov	$-51, %ebx
-	movdqu	-51(%eax), %xmm1
-	movdqu	-51(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(35bytes):
-	mov	$-35, %ebx
-	movdqu	-35(%eax), %xmm1
-	movdqu	-35(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(19bytes):
-	movl	-19(%eax), %ecx
-	movl	-19(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(15bytes):
-	movl	-15(%eax), %ecx
-	movl	-15(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(11bytes):
-	movl	-11(%eax), %ecx
-	movl	-11(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(7bytes):
-	movl	-7(%eax), %ecx
-	movl	-7(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(3bytes):
-	movzwl	-3(%eax), %ecx
-	movzwl	-3(%edx), %ebx
-	cmpb	%bl, %cl
-	jne	L(end)
-	cmp	%bx, %cx
-	jne	L(end)
-L(1bytes):
-	movzbl	-1(%eax), %eax
-	cmpb	-1(%edx), %al
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-# endif
-	.p2align 4
-L(52bytes):
-	movdqu	-52(%eax), %xmm1
-	movdqu	-52(%edx), %xmm2
-	mov	$-52, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(36bytes):
-	movdqu	-36(%eax), %xmm1
-	movdqu	-36(%edx), %xmm2
-	mov	$-36, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(20bytes):
-	movdqu	-20(%eax), %xmm1
-	movdqu	-20(%edx), %xmm2
-	mov	$-20, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-	mov	-4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-4(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-4(%edx), %ecx
-# endif
-	mov	$0, %eax
-	jne	L(find_diff)
-	RETURN
-
-# ifndef USE_AS_WMEMCMP
-	.p2align 4
-L(53bytes):
-	movdqu	-53(%eax), %xmm1
-	movdqu	-53(%edx), %xmm2
-	mov	$-53, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(37bytes):
-	mov	$-37, %ebx
-	movdqu	-37(%eax), %xmm1
-	movdqu	-37(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(21bytes):
-	mov	$-21, %ebx
-	movdqu	-21(%eax), %xmm1
-	movdqu	-21(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-	mov	-5(%eax), %ecx
-	mov	-5(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzbl	-1(%eax), %ecx
-	cmp	-1(%edx), %cl
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-
-	.p2align 4
-L(54bytes):
-	movdqu	-54(%eax), %xmm1
-	movdqu	-54(%edx), %xmm2
-	mov	$-54, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(38bytes):
-	mov	$-38, %ebx
-	movdqu	-38(%eax), %xmm1
-	movdqu	-38(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(22bytes):
-	mov	$-22, %ebx
-	movdqu	-22(%eax), %xmm1
-	movdqu	-22(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-
-	mov	-6(%eax), %ecx
-	mov	-6(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzwl	-2(%eax), %ecx
-	movzwl	-2(%edx), %ebx
-	cmp	%bl, %cl
-	jne	L(end)
-	cmp	%bh, %ch
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-
-	.p2align 4
-L(55bytes):
-	movdqu	-55(%eax), %xmm1
-	movdqu	-55(%edx), %xmm2
-	mov	$-55, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(39bytes):
-	mov	$-39, %ebx
-	movdqu	-39(%eax), %xmm1
-	movdqu	-39(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(23bytes):
-	mov	$-23, %ebx
-	movdqu	-23(%eax), %xmm1
-	movdqu	-23(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-	movl	-7(%eax), %ecx
-	movl	-7(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzwl	-3(%eax), %ecx
-	movzwl	-3(%edx), %ebx
-	cmpb	%bl, %cl
-	jne	L(end)
-	cmp	%bx, %cx
-	jne	L(end)
-	movzbl	-1(%eax), %eax
-	cmpb	-1(%edx), %al
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-# endif
-	.p2align 4
-L(56bytes):
-	movdqu	-56(%eax), %xmm1
-	movdqu	-56(%edx), %xmm2
-	mov	$-56, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(40bytes):
-	mov	$-40, %ebx
-	movdqu	-40(%eax), %xmm1
-	movdqu	-40(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(24bytes):
-	mov	$-24, %ebx
-	movdqu	-24(%eax), %xmm1
-	movdqu	-24(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-
-	mov	-8(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-8(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-8(%edx), %ecx
-# endif
-	jne	L(find_diff)
-
-	mov	-4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-4(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-4(%edx), %ecx
-# endif
-	mov	$0, %eax
-	jne	L(find_diff)
-	RETURN
-
-# ifndef USE_AS_WMEMCMP
-	.p2align 4
-L(57bytes):
-	movdqu	-57(%eax), %xmm1
-	movdqu	-57(%edx), %xmm2
-	mov	$-57, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(41bytes):
-	mov	$-41, %ebx
-	movdqu	-41(%eax), %xmm1
-	movdqu	-41(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(25bytes):
-	mov	$-25, %ebx
-	movdqu	-25(%eax), %xmm1
-	movdqu	-25(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-	mov	-9(%eax), %ecx
-	mov	-9(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	mov	-5(%eax), %ecx
-	mov	-5(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzbl	-1(%eax), %ecx
-	cmp	-1(%edx), %cl
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-
-	.p2align 4
-L(58bytes):
-	movdqu	-58(%eax), %xmm1
-	movdqu	-58(%edx), %xmm2
-	mov	$-58, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(42bytes):
-	mov	$-42, %ebx
-	movdqu	-42(%eax), %xmm1
-	movdqu	-42(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(26bytes):
-	mov	$-26, %ebx
-	movdqu	-26(%eax), %xmm1
-	movdqu	-26(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-
-	mov	-10(%eax), %ecx
-	mov	-10(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-
-	mov	-6(%eax), %ecx
-	mov	-6(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-
-	movzwl	-2(%eax), %ecx
-	movzwl	-2(%edx), %ebx
-	cmp	%bl, %cl
-	jne	L(end)
-	cmp	%bh, %ch
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-
-	.p2align 4
-L(59bytes):
-	movdqu	-59(%eax), %xmm1
-	movdqu	-59(%edx), %xmm2
-	mov	$-59, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(43bytes):
-	mov	$-43, %ebx
-	movdqu	-43(%eax), %xmm1
-	movdqu	-43(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(27bytes):
-	mov	$-27, %ebx
-	movdqu	-27(%eax), %xmm1
-	movdqu	-27(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-	movl	-11(%eax), %ecx
-	movl	-11(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movl	-7(%eax), %ecx
-	movl	-7(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzwl	-3(%eax), %ecx
-	movzwl	-3(%edx), %ebx
-	cmpb	%bl, %cl
-	jne	L(end)
-	cmp	%bx, %cx
-	jne	L(end)
-	movzbl	-1(%eax), %eax
-	cmpb	-1(%edx), %al
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-# endif
-	.p2align 4
-L(60bytes):
-	movdqu	-60(%eax), %xmm1
-	movdqu	-60(%edx), %xmm2
-	mov	$-60, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(44bytes):
-	mov	$-44, %ebx
-	movdqu	-44(%eax), %xmm1
-	movdqu	-44(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(28bytes):
-	mov	$-28, %ebx
-	movdqu	-28(%eax), %xmm1
-	movdqu	-28(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-
-	mov	-12(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-12(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-12(%edx), %ecx
-# endif
-	jne	L(find_diff)
-
-	mov	-8(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-8(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-8(%edx), %ecx
-# endif
-	jne	L(find_diff)
-
-	mov	-4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-4(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-4(%edx), %ecx
-# endif
-	mov	$0, %eax
-	jne	L(find_diff)
-	RETURN
-
-# ifndef USE_AS_WMEMCMP
-	.p2align 4
-L(61bytes):
-	movdqu	-61(%eax), %xmm1
-	movdqu	-61(%edx), %xmm2
-	mov	$-61, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(45bytes):
-	mov	$-45, %ebx
-	movdqu	-45(%eax), %xmm1
-	movdqu	-45(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(29bytes):
-	mov	$-29, %ebx
-	movdqu	-29(%eax), %xmm1
-	movdqu	-29(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-
-	mov	-13(%eax), %ecx
-	mov	-13(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-
-	mov	-9(%eax), %ecx
-	mov	-9(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-
-	mov	-5(%eax), %ecx
-	mov	-5(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzbl	-1(%eax), %ecx
-	cmp	-1(%edx), %cl
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-
-	.p2align 4
-L(62bytes):
-	movdqu	-62(%eax), %xmm1
-	movdqu	-62(%edx), %xmm2
-	mov	$-62, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(46bytes):
-	mov	$-46, %ebx
-	movdqu	-46(%eax), %xmm1
-	movdqu	-46(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(30bytes):
-	mov	$-30, %ebx
-	movdqu	-30(%eax), %xmm1
-	movdqu	-30(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-	mov	-14(%eax), %ecx
-	mov	-14(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	mov	-10(%eax), %ecx
-	mov	-10(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	mov	-6(%eax), %ecx
-	mov	-6(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzwl	-2(%eax), %ecx
-	movzwl	-2(%edx), %ebx
-	cmp	%bl, %cl
-	jne	L(end)
-	cmp	%bh, %ch
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-
-	.p2align 4
-L(63bytes):
-	movdqu	-63(%eax), %xmm1
-	movdqu	-63(%edx), %xmm2
-	mov	$-63, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(47bytes):
-	mov	$-47, %ebx
-	movdqu	-47(%eax), %xmm1
-	movdqu	-47(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(31bytes):
-	mov	$-31, %ebx
-	movdqu	-31(%eax), %xmm1
-	movdqu	-31(%edx), %xmm2
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-
-	movl	-15(%eax), %ecx
-	movl	-15(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movl	-11(%eax), %ecx
-	movl	-11(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movl	-7(%eax), %ecx
-	movl	-7(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzwl	-3(%eax), %ecx
-	movzwl	-3(%edx), %ebx
-	cmpb	%bl, %cl
-	jne	L(end)
-	cmp	%bx, %cx
-	jne	L(end)
-	movzbl	-1(%eax), %eax
-	cmpb	-1(%edx), %al
-	mov	$0, %eax
-	jne	L(end)
-	RETURN
-# endif
-
-	.p2align 4
-L(64bytes):
-	movdqu	-64(%eax), %xmm1
-	movdqu	-64(%edx), %xmm2
-	mov	$-64, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(48bytes):
-	movdqu	-48(%eax), %xmm1
-	movdqu	-48(%edx), %xmm2
-	mov	$-48, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-L(32bytes):
-	movdqu	-32(%eax), %xmm1
-	movdqu	-32(%edx), %xmm2
-	mov	$-32, %ebx
-	pxor	%xmm1, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-
-	mov	-16(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-16(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-16(%edx), %ecx
-# endif
-	jne	L(find_diff)
-
-	mov	-12(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-12(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-12(%edx), %ecx
-# endif
-	jne	L(find_diff)
-
-	mov	-8(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-8(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-8(%edx), %ecx
-# endif
-	jne	L(find_diff)
-
-	mov	-4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
-	mov	-4(%edx), %ebx
-	cmp	%ebx, %ecx
-# else
-	cmp	-4(%edx), %ecx
-# endif
-	mov	$0, %eax
-	jne	L(find_diff)
-	RETURN
-
-# ifndef USE_AS_WMEMCMP
-	.p2align 4
-L(less16bytes):
-	add	%ebx, %eax
-	add	%ebx, %edx
-
-	mov	(%eax), %ecx
-	mov	(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-
-	mov	4(%eax), %ecx
-	mov	4(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-
-	mov	8(%eax), %ecx
-	mov	8(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-
-	mov	12(%eax), %ecx
-	mov	12(%edx), %ebx
-	cmp	%ebx, %ecx
-	mov	$0, %eax
-	jne	L(find_diff)
-	RETURN
-# else
-	.p2align 4
-L(less16bytes):
-	add	%ebx, %eax
-	add	%ebx, %edx
-
-	mov	(%eax), %ecx
-	cmp	(%edx), %ecx
-	jne	L(find_diff)
-
-	mov	4(%eax), %ecx
-	cmp	4(%edx), %ecx
-	jne	L(find_diff)
-
-	mov	8(%eax), %ecx
-	cmp	8(%edx), %ecx
-	jne	L(find_diff)
-
-	mov	12(%eax), %ecx
-	cmp	12(%edx), %ecx
-
-	mov	$0, %eax
-	jne	L(find_diff)
-	RETURN
-# endif
-
-	.p2align 4
-L(find_diff):
-# ifndef USE_AS_WMEMCMP
-	cmpb	%bl, %cl
-	jne	L(end)
-	cmp	%bx, %cx
-	jne	L(end)
-	shr	$16,%ecx
-	shr	$16,%ebx
-	cmp	%bl, %cl
-	jne	L(end)
-	cmp	%bx, %cx
-L(end):
-	POP	(%ebx)
-	mov	$1, %eax
-	ja	L(bigger)
-	neg	%eax
-L(bigger):
-	ret
-# else
-	POP	(%ebx)
-	mov	$1, %eax
-	jg	L(bigger)
-	neg	%eax
-	ret
-
-	.p2align 4
-L(bigger):
-	ret
-# endif
-END (MEMCMP)
-
-	.section .rodata.sse4.2,"a",@progbits
-	.p2align 2
-	.type	L(table_64bytes), @object
-# ifndef USE_AS_WMEMCMP
-L(table_64bytes):
-	.int	JMPTBL (L(0bytes), L(table_64bytes))
-	.int	JMPTBL (L(1bytes), L(table_64bytes))
-	.int	JMPTBL (L(2bytes), L(table_64bytes))
-	.int	JMPTBL (L(3bytes), L(table_64bytes))
-	.int	JMPTBL (L(4bytes), L(table_64bytes))
-	.int	JMPTBL (L(5bytes), L(table_64bytes))
-	.int	JMPTBL (L(6bytes), L(table_64bytes))
-	.int	JMPTBL (L(7bytes), L(table_64bytes))
-	.int	JMPTBL (L(8bytes), L(table_64bytes))
-	.int	JMPTBL (L(9bytes), L(table_64bytes))
-	.int	JMPTBL (L(10bytes), L(table_64bytes))
-	.int	JMPTBL (L(11bytes), L(table_64bytes))
-	.int	JMPTBL (L(12bytes), L(table_64bytes))
-	.int	JMPTBL (L(13bytes), L(table_64bytes))
-	.int	JMPTBL (L(14bytes), L(table_64bytes))
-	.int	JMPTBL (L(15bytes), L(table_64bytes))
-	.int	JMPTBL (L(16bytes), L(table_64bytes))
-	.int	JMPTBL (L(17bytes), L(table_64bytes))
-	.int	JMPTBL (L(18bytes), L(table_64bytes))
-	.int	JMPTBL (L(19bytes), L(table_64bytes))
-	.int	JMPTBL (L(20bytes), L(table_64bytes))
-	.int	JMPTBL (L(21bytes), L(table_64bytes))
-	.int	JMPTBL (L(22bytes), L(table_64bytes))
-	.int	JMPTBL (L(23bytes), L(table_64bytes))
-	.int	JMPTBL (L(24bytes), L(table_64bytes))
-	.int	JMPTBL (L(25bytes), L(table_64bytes))
-	.int	JMPTBL (L(26bytes), L(table_64bytes))
-	.int	JMPTBL (L(27bytes), L(table_64bytes))
-	.int	JMPTBL (L(28bytes), L(table_64bytes))
-	.int	JMPTBL (L(29bytes), L(table_64bytes))
-	.int	JMPTBL (L(30bytes), L(table_64bytes))
-	.int	JMPTBL (L(31bytes), L(table_64bytes))
-	.int	JMPTBL (L(32bytes), L(table_64bytes))
-	.int	JMPTBL (L(33bytes), L(table_64bytes))
-	.int	JMPTBL (L(34bytes), L(table_64bytes))
-	.int	JMPTBL (L(35bytes), L(table_64bytes))
-	.int	JMPTBL (L(36bytes), L(table_64bytes))
-	.int	JMPTBL (L(37bytes), L(table_64bytes))
-	.int	JMPTBL (L(38bytes), L(table_64bytes))
-	.int	JMPTBL (L(39bytes), L(table_64bytes))
-	.int	JMPTBL (L(40bytes), L(table_64bytes))
-	.int	JMPTBL (L(41bytes), L(table_64bytes))
-	.int	JMPTBL (L(42bytes), L(table_64bytes))
-	.int	JMPTBL (L(43bytes), L(table_64bytes))
-	.int	JMPTBL (L(44bytes), L(table_64bytes))
-	.int	JMPTBL (L(45bytes), L(table_64bytes))
-	.int	JMPTBL (L(46bytes), L(table_64bytes))
-	.int	JMPTBL (L(47bytes), L(table_64bytes))
-	.int	JMPTBL (L(48bytes), L(table_64bytes))
-	.int	JMPTBL (L(49bytes), L(table_64bytes))
-	.int	JMPTBL (L(50bytes), L(table_64bytes))
-	.int	JMPTBL (L(51bytes), L(table_64bytes))
-	.int	JMPTBL (L(52bytes), L(table_64bytes))
-	.int	JMPTBL (L(53bytes), L(table_64bytes))
-	.int	JMPTBL (L(54bytes), L(table_64bytes))
-	.int	JMPTBL (L(55bytes), L(table_64bytes))
-	.int	JMPTBL (L(56bytes), L(table_64bytes))
-	.int	JMPTBL (L(57bytes), L(table_64bytes))
-	.int	JMPTBL (L(58bytes), L(table_64bytes))
-	.int	JMPTBL (L(59bytes), L(table_64bytes))
-	.int	JMPTBL (L(60bytes), L(table_64bytes))
-	.int	JMPTBL (L(61bytes), L(table_64bytes))
-	.int	JMPTBL (L(62bytes), L(table_64bytes))
-	.int	JMPTBL (L(63bytes), L(table_64bytes))
-	.int	JMPTBL (L(64bytes), L(table_64bytes))
-# else
-L(table_64bytes):
-	.int	JMPTBL (L(0bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(4bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(8bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(12bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(16bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(20bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(24bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(28bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(32bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(36bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(40bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(44bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(48bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(52bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(56bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(60bytes), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(unreal_case), L(table_64bytes))
-	.int	JMPTBL (L(64bytes), L(table_64bytes))
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
deleted file mode 100644
index 5ebf5a4d73..0000000000
--- a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
+++ /dev/null
@@ -1,2157 +0,0 @@
-/* memcmp with SSSE3, wmemcmp with SSSE3
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# ifndef MEMCMP
-#  define MEMCMP		__memcmp_ssse3
-# endif
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# define PARMS		4
-# define BLK1		PARMS
-# define BLK2		BLK1+4
-# define LEN		BLK2+4
-# define RETURN_END	POP (%edi); POP (%esi); POP (%ebx); ret
-# define RETURN		RETURN_END; cfi_restore_state; cfi_remember_state
-
-/* Warning!
-           wmemcmp has to use SIGNED comparison for elements.
-           memcmp has to use UNSIGNED comparison for elemnts.
-*/
-
-	atom_text_section
-ENTRY (MEMCMP)
-	movl	LEN(%esp), %ecx
-
-# ifdef USE_AS_WMEMCMP
-	shl	$2, %ecx
-	test	%ecx, %ecx
-	jz	L(zero)
-# endif
-
-	movl	BLK1(%esp), %eax
-	cmp	$48, %ecx
-	movl	BLK2(%esp), %edx
-	jae	L(48bytesormore)
-
-# ifndef USE_AS_WMEMCMP
-	cmp	$1, %ecx
-	jbe	L(less1bytes)
-# endif
-
-	PUSH	(%ebx)
-	add	%ecx, %edx
-	add	%ecx, %eax
-	jmp	L(less48bytes)
-
-	CFI_POP	(%ebx)
-
-# ifndef USE_AS_WMEMCMP
-	.p2align 4
-L(less1bytes):
-	jb	L(zero)
-	movb	(%eax), %cl
-	cmp	(%edx), %cl
-	je	L(zero)
-	mov	$1, %eax
-	ja	L(1bytesend)
-	neg	%eax
-L(1bytesend):
-	ret
-# endif
-
-	.p2align 4
-L(zero):
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(48bytesormore):
-	PUSH	(%ebx)
-	PUSH	(%esi)
-	PUSH	(%edi)
-	cfi_remember_state
-	movdqu	(%eax), %xmm3
-	movdqu	(%edx), %xmm0
-	movl	%eax, %edi
-	movl	%edx, %esi
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%edi), %edi
-
-	sub	$0xffff, %edx
-	lea	16(%esi), %esi
-	jnz	L(less16bytes)
-	mov	%edi, %edx
-	and	$0xf, %edx
-	xor	%edx, %edi
-	sub	%edx, %esi
-	add	%edx, %ecx
-	mov	%esi, %edx
-	and	$0xf, %edx
-	jz	L(shr_0)
-	xor	%edx, %esi
-
-# ifndef USE_AS_WMEMCMP
-	cmp	$8, %edx
-	jae	L(next_unaligned_table)
-	cmp	$0, %edx
-	je	L(shr_0)
-	cmp	$1, %edx
-	je	L(shr_1)
-	cmp	$2, %edx
-	je	L(shr_2)
-	cmp	$3, %edx
-	je	L(shr_3)
-	cmp	$4, %edx
-	je	L(shr_4)
-	cmp	$5, %edx
-	je	L(shr_5)
-	cmp	$6, %edx
-	je	L(shr_6)
-	jmp	L(shr_7)
-
-	.p2align 2
-L(next_unaligned_table):
-	cmp	$8, %edx
-	je	L(shr_8)
-	cmp	$9, %edx
-	je	L(shr_9)
-	cmp	$10, %edx
-	je	L(shr_10)
-	cmp	$11, %edx
-	je	L(shr_11)
-	cmp	$12, %edx
-	je	L(shr_12)
-	cmp	$13, %edx
-	je	L(shr_13)
-	cmp	$14, %edx
-	je	L(shr_14)
-	jmp	L(shr_15)
-# else
-	cmp	$0, %edx
-	je	L(shr_0)
-	cmp	$4, %edx
-	je	L(shr_4)
-	cmp	$8, %edx
-	je	L(shr_8)
-	jmp	L(shr_12)
-# endif
-
-	.p2align 4
-L(shr_0):
-	cmp	$80, %ecx
-	jae	L(shr_0_gobble)
-	lea	-48(%ecx), %ecx
-	xor	%eax, %eax
-	movaps	(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-	movaps	16(%esi), %xmm2
-	pcmpeqb	16(%edi), %xmm2
-	pand	%xmm1, %xmm2
-	pmovmskb %xmm2, %edx
-	add	$32, %edi
-	add	$32, %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_0_gobble):
-	lea	-48(%ecx), %ecx
-	movdqa	(%esi), %xmm0
-	xor	%eax, %eax
-	pcmpeqb	(%edi), %xmm0
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm2
-	pcmpeqb	16(%edi), %xmm2
-L(shr_0_gobble_loop):
-	pand	%xmm0, %xmm2
-	sub	$32, %ecx
-	pmovmskb %xmm2, %edx
-	movdqa	%xmm0, %xmm1
-	movdqa	32(%esi), %xmm0
-	movdqa	48(%esi), %xmm2
-	sbb	$0xffff, %edx
-	pcmpeqb	32(%edi), %xmm0
-	pcmpeqb	48(%edi), %xmm2
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	jz	L(shr_0_gobble_loop)
-
-	pand	%xmm0, %xmm2
-	cmp	$0, %ecx
-	jge	L(shr_0_gobble_loop_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_0_gobble_loop_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm2, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-# ifndef USE_AS_WMEMCMP
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_1):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_1_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$1,(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$1,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	1(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_1_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$1,(%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$1,16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_1_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$1,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$1,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_1_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_1_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_1_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	1(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_2):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_2_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$2,(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$2,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	2(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_2_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$2,(%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$2,16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_2_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$2,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$2,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_2_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_2_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_2_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	2(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_3):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_3_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$3,(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$3,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	3(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_3_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$3,(%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$3,16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_3_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$3,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$3,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_3_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_3_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_3_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	3(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-# endif
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_4):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_4_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$4,(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$4,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	4(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_4_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$4,(%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$4,16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_4_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$4,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$4,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_4_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_4_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_4_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	4(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-# ifndef USE_AS_WMEMCMP
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_5):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_5_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$5,(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$5,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	5(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_5_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$5,(%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$5,16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_5_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$5,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$5,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_5_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_5_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_5_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	5(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_6):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_6_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$6,(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$6,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	6(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_6_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$6,(%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$6,16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_6_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$6,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$6,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_6_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_6_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_6_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	6(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_7):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_7_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$7,(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$7,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	7(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_7_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$7,(%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$7,16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_7_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$7,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$7,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_7_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_7_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_7_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	7(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-# endif
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_8):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_8_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$8,(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$8,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	8(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_8_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$8,(%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$8,16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_8_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$8,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$8,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_8_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_8_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_8_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	8(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-# ifndef USE_AS_WMEMCMP
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_9):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_9_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$9,(%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$9,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	9(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_9_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$9,(%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$9,16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_9_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$9,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$9,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_9_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_9_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_9_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	9(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_10):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_10_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$10, (%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$10,%xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	10(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_10_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$10, (%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$10, 16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_10_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$10,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$10,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_10_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_10_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_10_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	10(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_11):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_11_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$11, (%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$11, %xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	11(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_11_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$11, (%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$11, 16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_11_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$11,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$11,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_11_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_11_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_11_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	11(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-# endif
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_12):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_12_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$12, (%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$12, %xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	12(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_12_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$12, (%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$12, 16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_12_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$12,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$12,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_12_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_12_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_12_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	12(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-# ifndef USE_AS_WMEMCMP
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_13):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_13_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$13, (%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$13, %xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	13(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_13_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$13, (%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$13, 16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_13_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$13,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$13,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_13_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_13_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_13_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	13(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_14):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_14_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$14, (%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$14, %xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	14(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_14_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$14, (%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$14, 16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_14_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$14,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$14,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_14_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_14_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_14_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	14(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_15):
-	cmp	$80, %ecx
-	lea	-48(%ecx), %ecx
-	mov	%edx, %eax
-	jae	L(shr_15_gobble)
-
-	movdqa	16(%esi), %xmm1
-	movdqa	%xmm1, %xmm2
-	palignr	$15, (%esi), %xmm1
-	pcmpeqb	(%edi), %xmm1
-
-	movdqa	32(%esi), %xmm3
-	palignr	$15, %xmm2, %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-	pand	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-	lea	(%ecx, %edi,1), %eax
-	lea	15(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(shr_15_gobble):
-	sub	$32, %ecx
-	movdqa	16(%esi), %xmm0
-	palignr	$15, (%esi), %xmm0
-	pcmpeqb	(%edi), %xmm0
-
-	movdqa	32(%esi), %xmm3
-	palignr	$15, 16(%esi), %xmm3
-	pcmpeqb	16(%edi), %xmm3
-
-L(shr_15_gobble_loop):
-	pand	%xmm0, %xmm3
-	sub	$32, %ecx
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-
-	movdqa	64(%esi), %xmm3
-	palignr	$15,48(%esi), %xmm3
-	sbb	$0xffff, %edx
-	movdqa	48(%esi), %xmm0
-	palignr	$15,32(%esi), %xmm0
-	pcmpeqb	32(%edi), %xmm0
-	lea	32(%esi), %esi
-	pcmpeqb	48(%edi), %xmm3
-
-	lea	32(%edi), %edi
-	jz	L(shr_15_gobble_loop)
-	pand	%xmm0, %xmm3
-
-	cmp	$0, %ecx
-	jge	L(shr_15_gobble_next)
-	inc	%edx
-	add	$32, %ecx
-L(shr_15_gobble_next):
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pmovmskb %xmm3, %edx
-	movdqa	%xmm0, %xmm1
-	lea	32(%edi), %edi
-	lea	32(%esi), %esi
-	sub	$0xffff, %edx
-	jnz	L(exit)
-
-	lea	(%ecx, %edi,1), %eax
-	lea	15(%ecx, %esi,1), %edx
-	POP	(%edi)
-	POP	(%esi)
-	jmp	L(less48bytes)
-# endif
-
-	cfi_restore_state
-	cfi_remember_state
-	.p2align 4
-L(exit):
-	pmovmskb %xmm1, %ebx
-	sub	$0xffff, %ebx
-	jz	L(first16bytes)
-	lea	-16(%esi), %esi
-	lea	-16(%edi), %edi
-	mov	%ebx, %edx
-
-L(first16bytes):
-	add	%eax, %esi
-L(less16bytes):
-
-# ifndef USE_AS_WMEMCMP
-	test	%dl, %dl
-	jz	L(next_24_bytes)
-
-	test	$0x01, %dl
-	jnz	L(Byte16)
-
-	test	$0x02, %dl
-	jnz	L(Byte17)
-
-	test	$0x04, %dl
-	jnz	L(Byte18)
-
-	test	$0x08, %dl
-	jnz	L(Byte19)
-
-	test	$0x10, %dl
-	jnz	L(Byte20)
-
-	test	$0x20, %dl
-	jnz	L(Byte21)
-
-	test	$0x40, %dl
-	jnz	L(Byte22)
-L(Byte23):
-	movzbl	-9(%edi), %eax
-	movzbl	-9(%esi), %edx
-	sub	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(Byte16):
-	movzbl	-16(%edi), %eax
-	movzbl	-16(%esi), %edx
-	sub	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(Byte17):
-	movzbl	-15(%edi), %eax
-	movzbl	-15(%esi), %edx
-	sub	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(Byte18):
-	movzbl	-14(%edi), %eax
-	movzbl	-14(%esi), %edx
-	sub	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(Byte19):
-	movzbl	-13(%edi), %eax
-	movzbl	-13(%esi), %edx
-	sub	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(Byte20):
-	movzbl	-12(%edi), %eax
-	movzbl	-12(%esi), %edx
-	sub	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(Byte21):
-	movzbl	-11(%edi), %eax
-	movzbl	-11(%esi), %edx
-	sub	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(Byte22):
-	movzbl	-10(%edi), %eax
-	movzbl	-10(%esi), %edx
-	sub	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(next_24_bytes):
-	lea	8(%edi), %edi
-	lea	8(%esi), %esi
-	test	$0x01, %dh
-	jnz	L(Byte16)
-
-	test	$0x02, %dh
-	jnz	L(Byte17)
-
-	test	$0x04, %dh
-	jnz	L(Byte18)
-
-	test	$0x08, %dh
-	jnz	L(Byte19)
-
-	test	$0x10, %dh
-	jnz	L(Byte20)
-
-	test	$0x20, %dh
-	jnz	L(Byte21)
-
-	test	$0x40, %dh
-	jnz	L(Byte22)
-
-	.p2align 4
-L(Byte31):
-	movzbl	-9(%edi), %eax
-	movzbl	-9(%esi), %edx
-	sub	%edx, %eax
-	RETURN_END
-# else
-
-/* special for wmemcmp */
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words)
-	and	$15, %dl
-	jz	L(second_double_word)
-	mov	-16(%edi), %eax
-	cmp	-16(%esi), %eax
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word):
-	mov	-12(%edi), %eax
-	cmp	-12(%esi), %eax
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words):
-	and	$15, %dh
-	jz	L(fourth_double_word)
-	mov	-8(%edi), %eax
-	cmp	-8(%esi), %eax
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word):
-	mov	-4(%edi), %eax
-	cmp	-4(%esi), %eax
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(nequal):
-	mov	$1, %eax
-	jg	L(nequal_bigger)
-	neg	%eax
-	RETURN
-
-	.p2align 4
-L(nequal_bigger):
-	RETURN_END
-# endif
-
-	CFI_PUSH (%ebx)
-
-	.p2align 4
-L(more8bytes):
-	cmp	$16, %ecx
-	jae	L(more16bytes)
-	cmp	$8, %ecx
-	je	L(8bytes)
-# ifndef USE_AS_WMEMCMP
-	cmp	$9, %ecx
-	je	L(9bytes)
-	cmp	$10, %ecx
-	je	L(10bytes)
-	cmp	$11, %ecx
-	je	L(11bytes)
-	cmp	$12, %ecx
-	je	L(12bytes)
-	cmp	$13, %ecx
-	je	L(13bytes)
-	cmp	$14, %ecx
-	je	L(14bytes)
-	jmp	L(15bytes)
-# else
-	jmp	L(12bytes)
-# endif
-
-	.p2align 4
-L(more16bytes):
-	cmp	$24, %ecx
-	jae	L(more24bytes)
-	cmp	$16, %ecx
-	je	L(16bytes)
-# ifndef USE_AS_WMEMCMP
-	cmp	$17, %ecx
-	je	L(17bytes)
-	cmp	$18, %ecx
-	je	L(18bytes)
-	cmp	$19, %ecx
-	je	L(19bytes)
-	cmp	$20, %ecx
-	je	L(20bytes)
-	cmp	$21, %ecx
-	je	L(21bytes)
-	cmp	$22, %ecx
-	je	L(22bytes)
-	jmp	L(23bytes)
-# else
-	jmp	L(20bytes)
-# endif
-
-	.p2align 4
-L(more24bytes):
-	cmp	$32, %ecx
-	jae	L(more32bytes)
-	cmp	$24, %ecx
-	je	L(24bytes)
-# ifndef USE_AS_WMEMCMP
-	cmp	$25, %ecx
-	je	L(25bytes)
-	cmp	$26, %ecx
-	je	L(26bytes)
-	cmp	$27, %ecx
-	je	L(27bytes)
-	cmp	$28, %ecx
-	je	L(28bytes)
-	cmp	$29, %ecx
-	je	L(29bytes)
-	cmp	$30, %ecx
-	je	L(30bytes)
-	jmp	L(31bytes)
-# else
-	jmp	L(28bytes)
-# endif
-
-	.p2align 4
-L(more32bytes):
-	cmp	$40, %ecx
-	jae	L(more40bytes)
-	cmp	$32, %ecx
-	je	L(32bytes)
-# ifndef USE_AS_WMEMCMP
-	cmp	$33, %ecx
-	je	L(33bytes)
-	cmp	$34, %ecx
-	je	L(34bytes)
-	cmp	$35, %ecx
-	je	L(35bytes)
-	cmp	$36, %ecx
-	je	L(36bytes)
-	cmp	$37, %ecx
-	je	L(37bytes)
-	cmp	$38, %ecx
-	je	L(38bytes)
-	jmp	L(39bytes)
-# else
-	jmp	L(36bytes)
-# endif
-
-	.p2align 4
-L(less48bytes):
-	cmp	$8, %ecx
-	jae	L(more8bytes)
-# ifndef USE_AS_WMEMCMP
-	cmp	$2, %ecx
-	je	L(2bytes)
-	cmp	$3, %ecx
-	je	L(3bytes)
-	cmp	$4, %ecx
-	je	L(4bytes)
-	cmp	$5, %ecx
-	je	L(5bytes)
-	cmp	$6, %ecx
-	je	L(6bytes)
-	jmp	L(7bytes)
-# else
-	jmp	L(4bytes)
-# endif
-
-	.p2align 4
-L(more40bytes):
-	cmp	$40, %ecx
-	je	L(40bytes)
-# ifndef USE_AS_WMEMCMP
-	cmp	$41, %ecx
-	je	L(41bytes)
-	cmp	$42, %ecx
-	je	L(42bytes)
-	cmp	$43, %ecx
-	je	L(43bytes)
-	cmp	$44, %ecx
-	je	L(44bytes)
-	cmp	$45, %ecx
-	je	L(45bytes)
-	cmp	$46, %ecx
-	je	L(46bytes)
-	jmp	L(47bytes)
-
-	.p2align 4
-L(44bytes):
-	mov	-44(%eax), %ecx
-	mov	-44(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(40bytes):
-	mov	-40(%eax), %ecx
-	mov	-40(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(36bytes):
-	mov	-36(%eax), %ecx
-	mov	-36(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(32bytes):
-	mov	-32(%eax), %ecx
-	mov	-32(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(28bytes):
-	mov	-28(%eax), %ecx
-	mov	-28(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(24bytes):
-	mov	-24(%eax), %ecx
-	mov	-24(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(20bytes):
-	mov	-20(%eax), %ecx
-	mov	-20(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(16bytes):
-	mov	-16(%eax), %ecx
-	mov	-16(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(12bytes):
-	mov	-12(%eax), %ecx
-	mov	-12(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(8bytes):
-	mov	-8(%eax), %ecx
-	mov	-8(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(4bytes):
-	mov	-4(%eax), %ecx
-	mov	-4(%edx), %ebx
-	cmp	%ebx, %ecx
-	mov	$0, %eax
-	jne	L(find_diff)
-	POP	(%ebx)
-	ret
-	CFI_PUSH (%ebx)
-# else
-	.p2align 4
-L(44bytes):
-	mov	-44(%eax), %ecx
-	cmp	-44(%edx), %ecx
-	jne	L(find_diff)
-L(40bytes):
-	mov	-40(%eax), %ecx
-	cmp	-40(%edx), %ecx
-	jne	L(find_diff)
-L(36bytes):
-	mov	-36(%eax), %ecx
-	cmp	-36(%edx), %ecx
-	jne	L(find_diff)
-L(32bytes):
-	mov	-32(%eax), %ecx
-	cmp	-32(%edx), %ecx
-	jne	L(find_diff)
-L(28bytes):
-	mov	-28(%eax), %ecx
-	cmp	-28(%edx), %ecx
-	jne	L(find_diff)
-L(24bytes):
-	mov	-24(%eax), %ecx
-	cmp	-24(%edx), %ecx
-	jne	L(find_diff)
-L(20bytes):
-	mov	-20(%eax), %ecx
-	cmp	-20(%edx), %ecx
-	jne	L(find_diff)
-L(16bytes):
-	mov	-16(%eax), %ecx
-	cmp	-16(%edx), %ecx
-	jne	L(find_diff)
-L(12bytes):
-	mov	-12(%eax), %ecx
-	cmp	-12(%edx), %ecx
-	jne	L(find_diff)
-L(8bytes):
-	mov	-8(%eax), %ecx
-	cmp	-8(%edx), %ecx
-	jne	L(find_diff)
-L(4bytes):
-	mov	-4(%eax), %ecx
-	xor	%eax, %eax
-	cmp	-4(%edx), %ecx
-	jne	L(find_diff)
-	POP	(%ebx)
-	ret
-	CFI_PUSH (%ebx)
-# endif
-
-# ifndef USE_AS_WMEMCMP
-
-	.p2align 4
-L(45bytes):
-	mov	-45(%eax), %ecx
-	mov	-45(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(41bytes):
-	mov	-41(%eax), %ecx
-	mov	-41(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(37bytes):
-	mov	-37(%eax), %ecx
-	mov	-37(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(33bytes):
-	mov	-33(%eax), %ecx
-	mov	-33(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(29bytes):
-	mov	-29(%eax), %ecx
-	mov	-29(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(25bytes):
-	mov	-25(%eax), %ecx
-	mov	-25(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(21bytes):
-	mov	-21(%eax), %ecx
-	mov	-21(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(17bytes):
-	mov	-17(%eax), %ecx
-	mov	-17(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(13bytes):
-	mov	-13(%eax), %ecx
-	mov	-13(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(9bytes):
-	mov	-9(%eax), %ecx
-	mov	-9(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(5bytes):
-	mov	-5(%eax), %ecx
-	mov	-5(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-	movzbl	-1(%eax), %ecx
-	cmp	-1(%edx), %cl
-	mov	$0, %eax
-	jne	L(end)
-	POP	(%ebx)
-	ret
-	CFI_PUSH (%ebx)
-
-	.p2align 4
-L(46bytes):
-	mov	-46(%eax), %ecx
-	mov	-46(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(42bytes):
-	mov	-42(%eax), %ecx
-	mov	-42(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(38bytes):
-	mov	-38(%eax), %ecx
-	mov	-38(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(34bytes):
-	mov	-34(%eax), %ecx
-	mov	-34(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(30bytes):
-	mov	-30(%eax), %ecx
-	mov	-30(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(26bytes):
-	mov	-26(%eax), %ecx
-	mov	-26(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(22bytes):
-	mov	-22(%eax), %ecx
-	mov	-22(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(18bytes):
-	mov	-18(%eax), %ecx
-	mov	-18(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(14bytes):
-	mov	-14(%eax), %ecx
-	mov	-14(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(10bytes):
-	mov	-10(%eax), %ecx
-	mov	-10(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(6bytes):
-	mov	-6(%eax), %ecx
-	mov	-6(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(2bytes):
-	movzwl	-2(%eax), %ecx
-	movzwl	-2(%edx), %ebx
-	cmp	%bl, %cl
-	jne	L(end)
-	cmp	%bh, %ch
-	mov	$0, %eax
-	jne	L(end)
-	POP	(%ebx)
-	ret
-	CFI_PUSH (%ebx)
-
-	.p2align 4
-L(47bytes):
-	movl	-47(%eax), %ecx
-	movl	-47(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(43bytes):
-	movl	-43(%eax), %ecx
-	movl	-43(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(39bytes):
-	movl	-39(%eax), %ecx
-	movl	-39(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(35bytes):
-	movl	-35(%eax), %ecx
-	movl	-35(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(31bytes):
-	movl	-31(%eax), %ecx
-	movl	-31(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(27bytes):
-	movl	-27(%eax), %ecx
-	movl	-27(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(23bytes):
-	movl	-23(%eax), %ecx
-	movl	-23(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(19bytes):
-	movl	-19(%eax), %ecx
-	movl	-19(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(15bytes):
-	movl	-15(%eax), %ecx
-	movl	-15(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(11bytes):
-	movl	-11(%eax), %ecx
-	movl	-11(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(7bytes):
-	movl	-7(%eax), %ecx
-	movl	-7(%edx), %ebx
-	cmp	%ebx, %ecx
-	jne	L(find_diff)
-L(3bytes):
-	movzwl	-3(%eax), %ecx
-	movzwl	-3(%edx), %ebx
-	cmpb	%bl, %cl
-	jne	L(end)
-	cmp	%bx, %cx
-	jne	L(end)
-	movzbl	-1(%eax), %eax
-	cmpb	-1(%edx), %al
-	mov	$0, %eax
-	jne	L(end)
-	POP	(%ebx)
-	ret
-	CFI_PUSH (%ebx)
-
-	.p2align 4
-L(find_diff):
-	cmpb	%bl, %cl
-	jne	L(end)
-	cmp	%bx, %cx
-	jne	L(end)
-	shr	$16,%ecx
-	shr	$16,%ebx
-	cmp	%bl, %cl
-	jne	L(end)
-	cmp	%bx, %cx
-
-	.p2align 4
-L(end):
-	POP	(%ebx)
-	mov	$1, %eax
-	ja	L(bigger)
-	neg	%eax
-L(bigger):
-	ret
-# else
-
-/* for wmemcmp */
-	.p2align 4
-L(find_diff):
-	POP	(%ebx)
-	mov	$1, %eax
-	jg	L(find_diff_bigger)
-	neg	%eax
-	ret
-
-	.p2align 4
-L(find_diff_bigger):
-	ret
-
-# endif
-END (MEMCMP)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcmp.S b/sysdeps/i386/i686/multiarch/memcmp.S
deleted file mode 100644
index 1fc5994a17..0000000000
--- a/sysdeps/i386/i686/multiarch/memcmp.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Multiple versions of memcmp
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
-	.text
-ENTRY(memcmp)
-	.type	memcmp, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__memcmp_ia32)
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memcmp_ssse3)
-	HAS_CPU_FEATURE (SSE4_2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memcmp_sse4_2)
-2:	ret
-END(memcmp)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __memcmp_ia32, @function; \
-	.p2align 4; \
-	.globl __memcmp_ia32; \
-	.hidden __memcmp_ia32; \
-	__memcmp_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __memcmp_ia32, .-__memcmp_ia32
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_memcmp; __GI_memcmp = __memcmp_ia32
-# endif
-#endif
-
-#include "../memcmp.S"
diff --git a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S
deleted file mode 100644
index 2fe2072cb1..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S
+++ /dev/null
@@ -1,681 +0,0 @@
-/* memcpy optimized with SSE2 unaligned memory access instructions.
-   Copyright (C) 2014-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc) \
-    && (defined SHARED \
-	|| defined USE_AS_MEMMOVE \
-	|| !defined USE_MULTIARCH)
-
-# include <sysdep.h>
-# include "asm-syntax.h"
-
-# ifndef MEMCPY
-#  define MEMCPY	__memcpy_sse2_unaligned
-#  define MEMCPY_CHK	__memcpy_chk_sse2_unaligned
-# endif
-
-# ifdef USE_AS_BCOPY
-#  define SRC		PARMS
-#  define DEST		SRC+4
-#  define LEN		DEST+4
-# else
-#  define DEST		PARMS
-#  define SRC		DEST+4
-#  define LEN		SRC+4
-# endif
-
-# define CFI_PUSH(REG)		\
-  cfi_adjust_cfa_offset (4);		\
-  cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)		\
-  cfi_adjust_cfa_offset (-4);		\
-  cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# define PARMS		8		/* Preserve EBX.  */
-# define ENTRANCE	PUSH (%ebx);
-# define RETURN_END	POP (%ebx); ret
-# define RETURN	RETURN_END; CFI_PUSH (%ebx)
-
-	.section .text.sse2,"ax",@progbits
-# if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-# endif
-
-ENTRY (MEMCPY)
-	ENTRANCE
-	movl	LEN(%esp), %ecx
-	movl	SRC(%esp), %eax
-	movl	DEST(%esp), %edx
-	cmp	%edx, %eax
-
-# ifdef USE_AS_MEMMOVE
-	jg	L(check_forward)
-
-L(mm_len_0_or_more_backward):
-/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
-	separately.  */
-	cmp	$16, %ecx
-	jbe	L(mm_len_0_16_bytes_backward)
-
-	cmpl	$32, %ecx
-	jg	L(mm_len_32_or_more_backward)
-
-/* Copy [0..32] and return.  */
-	movdqu	(%eax), %xmm0
-	movdqu	-16(%eax, %ecx), %xmm1
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm1, -16(%edx, %ecx)
-	jmp	L(return)
-
-L(mm_len_32_or_more_backward):
-	cmpl	$64, %ecx
-	jg	L(mm_len_64_or_more_backward)
-
-/* Copy [0..64] and return.  */
-	movdqu	(%eax), %xmm0
-	movdqu	16(%eax), %xmm1
-	movdqu	-16(%eax, %ecx), %xmm2
-	movdqu	-32(%eax, %ecx), %xmm3
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm1, 16(%edx)
-	movdqu	%xmm2, -16(%edx, %ecx)
-	movdqu	%xmm3, -32(%edx, %ecx)
-	jmp	L(return)
-
-L(mm_len_64_or_more_backward):
-	cmpl	$128, %ecx
-	jg	L(mm_len_128_or_more_backward)
-
-/* Copy [0..128] and return.  */
-	movdqu	(%eax), %xmm0
-	movdqu	16(%eax), %xmm1
-	movdqu	32(%eax), %xmm2
-	movdqu	48(%eax), %xmm3
-	movdqu	-64(%eax, %ecx), %xmm4
-	movdqu	-48(%eax, %ecx), %xmm5
-	movdqu	-32(%eax, %ecx), %xmm6
-	movdqu	-16(%eax, %ecx), %xmm7
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm1, 16(%edx)
-	movdqu	%xmm2, 32(%edx)
-	movdqu	%xmm3, 48(%edx)
-	movdqu	%xmm4, -64(%edx, %ecx)
-	movdqu	%xmm5, -48(%edx, %ecx)
-	movdqu	%xmm6, -32(%edx, %ecx)
-	movdqu	%xmm7, -16(%edx, %ecx)
-	jmp	L(return)
-
-L(mm_len_128_or_more_backward):
-	add	%ecx, %eax
-	cmp	%edx, %eax
-	movl	SRC(%esp), %eax
-	jle	L(forward)
-	PUSH (%esi)
-	PUSH (%edi)
-	PUSH (%ebx)
-
-/* Aligning the address of destination. */
-	movdqu	(%eax), %xmm4
-	movdqu	16(%eax), %xmm5
-	movdqu	32(%eax), %xmm6
-	movdqu	48(%eax), %xmm7
-	leal	(%edx, %ecx), %esi
-	movdqu	-16(%eax, %ecx), %xmm0
-	subl	$16, %esp
-	movdqu	%xmm0, (%esp)
-	mov	%ecx, %edi
-	movl	%esi, %ecx
-	andl	$-16, %ecx
-	leal	(%ecx), %ebx
-	subl	%edx, %ebx
-	leal	(%eax, %ebx), %eax
-	shrl	$6, %ebx
-
-# ifdef SHARED_CACHE_SIZE_HALF
-	cmp	$SHARED_CACHE_SIZE_HALF, %edi
-# else
-#  ifdef SHARED
-	PUSH (%ebx)
-	SETUP_PIC_REG (bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %edi
-	POP (%ebx)
-#  else
-	cmp	__x86_shared_cache_size_half, %edi
-#  endif
-# endif
-	jae	L(mm_large_page_loop_backward)
-
-	.p2align 4
-L(mm_main_loop_backward):
-
-	prefetcht0 -128(%eax)
-
-	movdqu	-64(%eax), %xmm0
-	movdqu	-48(%eax), %xmm1
-	movdqu	-32(%eax), %xmm2
-	movdqu	-16(%eax), %xmm3
-	movaps	%xmm0, -64(%ecx)
-	subl	$64, %eax
-	movaps	%xmm1, -48(%ecx)
-	movaps	%xmm2, -32(%ecx)
-	movaps	%xmm3, -16(%ecx)
-	subl	$64, %ecx
-	sub	$1, %ebx
-	jnz	L(mm_main_loop_backward)
-	movdqu	(%esp), %xmm0
-	addl	$16, %esp
-	movdqu	%xmm0, -16(%esi)
-	movdqu	%xmm4, (%edx)
-	movdqu	%xmm5, 16(%edx)
-	movdqu	%xmm6, 32(%edx)
-	movdqu	%xmm7, 48(%edx)
-	POP (%ebx)
-	jmp	L(mm_return_pop_all)
-
-/* Copy [0..16] and return.  */
-L(mm_len_0_16_bytes_backward):
-	testb	$24, %cl
-	jnz	L(mm_len_9_16_bytes_backward)
-	testb	$4, %cl
-	.p2align 4,,5
-	jnz	L(mm_len_5_8_bytes_backward)
-	testl	%ecx, %ecx
-	.p2align 4,,2
-	je	L(return)
-	testb	$2, %cl
-	.p2align 4,,1
-	jne	L(mm_len_3_4_bytes_backward)
-	movzbl	-1(%eax,%ecx), %ebx
-	movzbl	(%eax), %eax
-	movb	%bl, -1(%edx,%ecx)
-	movb	%al, (%edx)
-	jmp	L(return)
-
-L(mm_len_3_4_bytes_backward):
-	movzwl	-2(%eax,%ecx), %ebx
-	movzwl	(%eax), %eax
-	movw	%bx, -2(%edx,%ecx)
-	movw	%ax, (%edx)
-	jmp	L(return)
-
-L(mm_len_9_16_bytes_backward):
-	PUSH (%esi)
-	movl	-4(%eax,%ecx), %ebx
-	movl	-8(%eax,%ecx), %esi
-	movl	%ebx, -4(%edx,%ecx)
-	movl	%esi, -8(%edx,%ecx)
-	subl	$8, %ecx
-	POP (%esi)
-	jmp	L(mm_len_0_16_bytes_backward)
-
-L(mm_len_5_8_bytes_backward):
-	movl	(%eax), %ebx
-	movl	-4(%eax,%ecx), %eax
-	movl	%ebx, (%edx)
-	movl	%eax, -4(%edx,%ecx)
-	jmp	L(return)
-
-/* Big length copy backward part.  */
-	.p2align 4
-L(mm_large_page_loop_backward):
-	movdqu	-64(%eax), %xmm0
-	movdqu	-48(%eax), %xmm1
-	movdqu	-32(%eax), %xmm2
-	movdqu	-16(%eax), %xmm3
-	movntdq	%xmm0, -64(%ecx)
-	subl	$64, %eax
-	movntdq	%xmm1, -48(%ecx)
-	movntdq	%xmm2, -32(%ecx)
-	movntdq	%xmm3, -16(%ecx)
-	subl	$64, %ecx
-	sub	$1, %ebx
-	jnz	L(mm_large_page_loop_backward)
-	sfence
-	movdqu	(%esp), %xmm0
-	addl	$16, %esp
-	movdqu	%xmm0, -16(%esi)
-	movdqu	%xmm4, (%edx)
-	movdqu	%xmm5, 16(%edx)
-	movdqu	%xmm6, 32(%edx)
-	movdqu	%xmm7, 48(%edx)
-	POP (%ebx)
-	jmp	L(mm_return_pop_all)
-
-L(check_forward):
-	add	%edx, %ecx
-	cmp	%eax, %ecx
-	movl	LEN(%esp), %ecx
-	jle	L(forward)
-
-/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
-	separately.  */
-	cmp	$16, %ecx
-	jbe	L(mm_len_0_16_bytes_forward)
-
-	cmpl	$32, %ecx
-	ja	L(mm_len_32_or_more_forward)
-
-/* Copy [0..32] and return.  */
-	movdqu	(%eax), %xmm0
-	movdqu	-16(%eax, %ecx), %xmm1
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm1, -16(%edx, %ecx)
-	jmp	L(return)
-
-L(mm_len_32_or_more_forward):
-	cmpl	$64, %ecx
-	ja	L(mm_len_64_or_more_forward)
-
-/* Copy [0..64] and return.  */
-	movdqu	(%eax), %xmm0
-	movdqu	16(%eax), %xmm1
-	movdqu	-16(%eax, %ecx), %xmm2
-	movdqu	-32(%eax, %ecx), %xmm3
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm1, 16(%edx)
-	movdqu	%xmm2, -16(%edx, %ecx)
-	movdqu	%xmm3, -32(%edx, %ecx)
-	jmp	L(return)
-
-L(mm_len_64_or_more_forward):
-	cmpl	$128, %ecx
-	ja	L(mm_len_128_or_more_forward)
-
-/* Copy [0..128] and return.  */
-	movdqu	(%eax), %xmm0
-	movdqu	16(%eax), %xmm1
-	movdqu	32(%eax), %xmm2
-	movdqu	48(%eax), %xmm3
-	movdqu	-64(%eax, %ecx), %xmm4
-	movdqu	-48(%eax, %ecx), %xmm5
-	movdqu	-32(%eax, %ecx), %xmm6
-	movdqu	-16(%eax, %ecx), %xmm7
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm1, 16(%edx)
-	movdqu	%xmm2, 32(%edx)
-	movdqu	%xmm3, 48(%edx)
-	movdqu	%xmm4, -64(%edx, %ecx)
-	movdqu	%xmm5, -48(%edx, %ecx)
-	movdqu	%xmm6, -32(%edx, %ecx)
-	movdqu	%xmm7, -16(%edx, %ecx)
-	jmp	L(return)
-
-L(mm_len_128_or_more_forward):
-	PUSH (%esi)
-	PUSH (%edi)
-	PUSH (%ebx)
-
-/* Aligning the address of destination. */
-	movdqu	-16(%eax, %ecx), %xmm4
-	movdqu	-32(%eax, %ecx), %xmm5
-	movdqu	-48(%eax, %ecx), %xmm6
-	movdqu	-64(%eax, %ecx), %xmm7
-	leal	(%edx, %ecx), %esi
-	movdqu	(%eax), %xmm0
-	subl	$16, %esp
-	movdqu	%xmm0, (%esp)
-	mov	%ecx, %edi
-	leal	16(%edx), %ecx
-	andl	$-16, %ecx
-	movl	%ecx, %ebx
-	subl	%edx, %ebx
-	addl	%ebx, %eax
-	movl	%esi, %ebx
-	subl	%ecx, %ebx
-	shrl	$6, %ebx
-
-# ifdef SHARED_CACHE_SIZE_HALF
-	cmp	$SHARED_CACHE_SIZE_HALF, %edi
-# else
-#  ifdef SHARED
-	PUSH (%ebx)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %edi
-	POP (%ebx)
-#  else
-	cmp	__x86_shared_cache_size_half, %edi
-#  endif
-# endif
-	jae	L(mm_large_page_loop_forward)
-
-	.p2align 4
-L(mm_main_loop_forward):
-
-	prefetcht0 128(%eax)
-
-	movdqu	(%eax), %xmm0
-	movdqu	16(%eax), %xmm1
-	movdqu	32(%eax), %xmm2
-	movdqu	48(%eax), %xmm3
-	movdqa	%xmm0, (%ecx)
-	addl	$64, %eax
-	movaps	%xmm1, 16(%ecx)
-	movaps	%xmm2, 32(%ecx)
-	movaps	%xmm3, 48(%ecx)
-	addl	$64, %ecx
-	sub	$1, %ebx
-	jnz	L(mm_main_loop_forward)
-	movdqu	(%esp), %xmm0
-	addl	$16, %esp
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm4, -16(%esi)
-	movdqu	%xmm5, -32(%esi)
-	movdqu	%xmm6, -48(%esi)
-	movdqu	%xmm7, -64(%esi)
-	POP (%ebx)
-	jmp	L(mm_return_pop_all)
-
-L(mm_len_0_16_bytes_forward):
-	testb	$24, %cl
-	jne	L(mm_len_9_16_bytes_forward)
-	testb	$4, %cl
-	.p2align 4,,5
-	jne	L(mm_len_5_8_bytes_forward)
-	testl	%ecx, %ecx
-	.p2align 4,,2
-	je	L(return)
-	testb	$2, %cl
-	.p2align 4,,1
-	jne	L(mm_len_2_4_bytes_forward)
-	movzbl	-1(%eax,%ecx), %ebx
-	movzbl	(%eax), %eax
-	movb	%bl, -1(%edx,%ecx)
-	movb	%al, (%edx)
-	jmp	L(return)
-
-L(mm_len_2_4_bytes_forward):
-	movzwl	-2(%eax,%ecx), %ebx
-	movzwl	(%eax), %eax
-	movw	%bx, -2(%edx,%ecx)
-	movw	%ax, (%edx)
-	jmp	L(return)
-
-L(mm_len_5_8_bytes_forward):
-	movl	(%eax), %ebx
-	movl	-4(%eax,%ecx), %eax
-	movl	%ebx, (%edx)
-	movl	%eax, -4(%edx,%ecx)
-	jmp	L(return)
-
-L(mm_len_9_16_bytes_forward):
-	movq	(%eax), %xmm0
-	movq	-8(%eax, %ecx), %xmm1
-	movq	%xmm0, (%edx)
-	movq	%xmm1, -8(%edx, %ecx)
-	jmp	L(return)
-
-L(mm_return_pop_all):
-	movl	%edx, %eax
-	POP (%edi)
-	POP (%esi)
-	RETURN
-
-/* Big length copy forward part.  */
-	.p2align 4
-L(mm_large_page_loop_forward):
-	movdqu	(%eax), %xmm0
-	movdqu	16(%eax), %xmm1
-	movdqu	32(%eax), %xmm2
-	movdqu	48(%eax), %xmm3
-	movntdq	%xmm0, (%ecx)
-	addl	$64, %eax
-	movntdq	%xmm1, 16(%ecx)
-	movntdq	%xmm2, 32(%ecx)
-	movntdq	%xmm3, 48(%ecx)
-	addl	$64, %ecx
-	sub	$1, %ebx
-	jnz	L(mm_large_page_loop_forward)
-	sfence
-	movdqu	(%esp), %xmm0
-	addl	$16, %esp
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm4, -16(%esi)
-	movdqu	%xmm5, -32(%esi)
-	movdqu	%xmm6, -48(%esi)
-	movdqu	%xmm7, -64(%esi)
-	POP (%ebx)
-	jmp	L(mm_return_pop_all)
-# endif
-
-L(forward):
-	cmp	$16, %ecx
-	jbe	L(len_0_16_bytes)
-
-# ifdef SHARED_CACHE_SIZE_HALF
-	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_shared_cache_size_half, %ecx
-#  endif
-# endif
-	jae     L(large_page)
-
-	movdqu	(%eax), %xmm0
-	movdqu	-16(%eax, %ecx), %xmm1
-	cmpl    $32, %ecx
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm1, -16(%edx, %ecx)
-	jbe	L(return)
-
-	movdqu	16(%eax), %xmm0
-	movdqu	-32(%eax, %ecx), %xmm1
-	cmpl    $64, %ecx
-	movdqu	%xmm0, 16(%edx)
-	movdqu	%xmm1, -32(%edx, %ecx)
-	jbe	L(return)
-
-	movdqu	32(%eax), %xmm0
-	movdqu	48(%eax), %xmm1
-	movdqu	-48(%eax, %ecx), %xmm2
-	movdqu	-64(%eax, %ecx), %xmm3
-	cmpl    $128, %ecx
-	movdqu	%xmm0, 32(%edx)
-	movdqu	%xmm1, 48(%edx)
-	movdqu	%xmm2, -48(%edx, %ecx)
-	movdqu	%xmm3, -64(%edx, %ecx)
-	jbe	L(return)
-
-/* Now the main loop: we align the address of the destination.  */
-	leal	64(%edx), %ebx
-	andl	$-64, %ebx
-
-	addl	%edx, %ecx
-	andl	$-64, %ecx
-
-	subl	%edx, %eax
-
-/* We should stop two iterations before the termination
-	(in order not to misprefetch).  */
-	subl	$64, %ecx
-	cmpl	%ebx, %ecx
-	je	L(main_loop_just_one_iteration)
-
-	subl	$64, %ecx
-	cmpl	%ebx, %ecx
-	je	L(main_loop_last_two_iterations)
-
-	.p2align 4
-L(main_loop_cache):
-
-	prefetcht0 128(%ebx, %eax)
-
-	movdqu	(%ebx, %eax), %xmm0
-	movdqu	16(%ebx, %eax), %xmm1
-	movdqu	32(%ebx, %eax), %xmm2
-	movdqu	48(%ebx, %eax), %xmm3
-	movdqa	%xmm0, (%ebx)
-	movaps	%xmm1, 16(%ebx)
-	movaps	%xmm2, 32(%ebx)
-	movaps	%xmm3, 48(%ebx)
-	lea	64(%ebx), %ebx
-	cmpl	%ebx, %ecx
-	jne	L(main_loop_cache)
-
-L(main_loop_last_two_iterations):
-	movdqu	(%ebx, %eax), %xmm0
-	movdqu	16(%ebx, %eax), %xmm1
-	movdqu	32(%ebx, %eax), %xmm2
-	movdqu	48(%ebx, %eax), %xmm3
-	movdqu	64(%ebx, %eax), %xmm4
-	movdqu	80(%ebx, %eax), %xmm5
-	movdqu	96(%ebx, %eax), %xmm6
-	movdqu	112(%ebx, %eax), %xmm7
-	movdqa	%xmm0, (%ebx)
-	movaps	%xmm1, 16(%ebx)
-	movaps	%xmm2, 32(%ebx)
-	movaps	%xmm3, 48(%ebx)
-	movaps	%xmm4, 64(%ebx)
-	movaps	%xmm5, 80(%ebx)
-	movaps	%xmm6, 96(%ebx)
-	movaps	%xmm7, 112(%ebx)
-	jmp	L(return)
-
-L(main_loop_just_one_iteration):
-	movdqu	(%ebx, %eax), %xmm0
-	movdqu	16(%ebx, %eax), %xmm1
-	movdqu	32(%ebx, %eax), %xmm2
-	movdqu	48(%ebx, %eax), %xmm3
-	movdqa	%xmm0, (%ebx)
-	movaps	%xmm1, 16(%ebx)
-	movaps	%xmm2, 32(%ebx)
-	movaps	%xmm3, 48(%ebx)
-	jmp	L(return)
-
-L(large_page):
-	movdqu	(%eax), %xmm0
-	movdqu	16(%eax), %xmm1
-	movdqu	32(%eax), %xmm2
-	movdqu	48(%eax), %xmm3
-	movdqu	-64(%eax, %ecx), %xmm4
-	movdqu	-48(%eax, %ecx), %xmm5
-	movdqu	-32(%eax, %ecx), %xmm6
-	movdqu	-16(%eax, %ecx), %xmm7
-	movdqu	%xmm0, (%edx)
-	movdqu	%xmm1, 16(%edx)
-	movdqu	%xmm2, 32(%edx)
-	movdqu	%xmm3, 48(%edx)
-	movdqu	%xmm4, -64(%edx, %ecx)
-	movdqu	%xmm5, -48(%edx, %ecx)
-	movdqu	%xmm6, -32(%edx, %ecx)
-	movdqu	%xmm7, -16(%edx, %ecx)
-
-	movdqu	64(%eax), %xmm0
-	movdqu	80(%eax), %xmm1
-	movdqu	96(%eax), %xmm2
-	movdqu	112(%eax), %xmm3
-	movdqu	-128(%eax, %ecx), %xmm4
-	movdqu	-112(%eax, %ecx), %xmm5
-	movdqu	-96(%eax, %ecx), %xmm6
-	movdqu	-80(%eax, %ecx), %xmm7
-	movdqu	%xmm0, 64(%edx)
-	movdqu	%xmm1, 80(%edx)
-	movdqu	%xmm2, 96(%edx)
-	movdqu	%xmm3, 112(%edx)
-	movdqu	%xmm4, -128(%edx, %ecx)
-	movdqu	%xmm5, -112(%edx, %ecx)
-	movdqu	%xmm6, -96(%edx, %ecx)
-	movdqu	%xmm7, -80(%edx, %ecx)
-
-/* Now the main loop with non temporal stores. We align
-	the address of the destination.  */
-	leal	128(%edx), %ebx
-	andl	$-128, %ebx
-
-	addl	%edx, %ecx
-	andl	$-128, %ecx
-
-	subl	%edx, %eax
-
-	.p2align 4
-L(main_loop_large_page):
-	movdqu	(%ebx, %eax), %xmm0
-	movdqu	16(%ebx, %eax), %xmm1
-	movdqu	32(%ebx, %eax), %xmm2
-	movdqu	48(%ebx, %eax), %xmm3
-	movdqu	64(%ebx, %eax), %xmm4
-	movdqu	80(%ebx, %eax), %xmm5
-	movdqu	96(%ebx, %eax), %xmm6
-	movdqu	112(%ebx, %eax), %xmm7
-	movntdq	%xmm0, (%ebx)
-	movntdq	%xmm1, 16(%ebx)
-	movntdq	%xmm2, 32(%ebx)
-	movntdq	%xmm3, 48(%ebx)
-	movntdq	%xmm4, 64(%ebx)
-	movntdq	%xmm5, 80(%ebx)
-	movntdq	%xmm6, 96(%ebx)
-	movntdq	%xmm7, 112(%ebx)
-	lea	128(%ebx), %ebx
-	cmpl	%ebx, %ecx
-	jne	L(main_loop_large_page)
-	sfence
-	jmp	L(return)
-
-L(len_0_16_bytes):
-	testb	$24, %cl
-	jne	L(len_9_16_bytes)
-	testb	$4, %cl
-	.p2align 4,,5
-	jne	L(len_5_8_bytes)
-	testl	%ecx, %ecx
-	.p2align 4,,2
-	je	L(return)
-	movzbl	(%eax), %ebx
-	testb	$2, %cl
-	movb	%bl, (%edx)
-	je	L(return)
-	movzwl	-2(%eax,%ecx), %ebx
-	movw	%bx, -2(%edx,%ecx)
-	jmp	L(return)
-
-L(len_9_16_bytes):
-	movq	(%eax), %xmm0
-	movq	-8(%eax, %ecx), %xmm1
-	movq	%xmm0, (%edx)
-	movq	%xmm1, -8(%edx, %ecx)
-	jmp	L(return)
-
-L(len_5_8_bytes):
-	movl	(%eax), %ebx
-	movl	%ebx, (%edx)
-	movl	-4(%eax,%ecx), %ebx
-	movl	%ebx, -4(%edx,%ecx)
-
-L(return):
-	movl	%edx, %eax
-# if !defined USE_AS_BCOPY && defined USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-# endif
-	RETURN
-
-END (MEMCPY)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
deleted file mode 100644
index 687e083147..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
+++ /dev/null
@@ -1,1809 +0,0 @@
-/* memcpy with SSSE3 and REP string.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#if IS_IN (libc) \
-    && (defined SHARED \
-	|| defined USE_AS_MEMMOVE \
-	|| !defined USE_MULTIARCH)
-
-#include "asm-syntax.h"
-
-#ifndef MEMCPY
-# define MEMCPY		__memcpy_ssse3_rep
-# define MEMCPY_CHK	__memcpy_chk_ssse3_rep
-#endif
-
-#ifdef USE_AS_BCOPY
-# define SRC		PARMS
-# define DEST		SRC+4
-# define LEN		DEST+4
-#else
-# define DEST		PARMS
-# define SRC		DEST+4
-# define LEN		SRC+4
-#endif
-
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#ifdef SHARED
-# define PARMS		8		/* Preserve EBX.  */
-# define ENTRANCE	PUSH (%ebx);
-# define RETURN_END	POP (%ebx); ret
-# define RETURN		RETURN_END; CFI_PUSH (%ebx)
-# define JMPTBL(I, B)	I - B
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-   jump table with relative offsets.  INDEX is a register contains the
-   index into the jump table.   SCALE is the scale of INDEX. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-    /* We first load PC into EBX.  */				\
-    SETUP_PIC_REG(bx);						\
-    /* Get the address of the jump table.  */			\
-    addl	$(TABLE - .), %ebx;				\
-    /* Get the entry and convert the relative offset to the	\
-       absolute address.  */					\
-    addl	(%ebx,INDEX,SCALE), %ebx;			\
-    /* We loaded the jump table.  Go.  */			\
-    jmp		*%ebx
-
-# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)			\
-    addl	$(TABLE - .), %ebx
-
-# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE)	\
-    addl	(%ebx,INDEX,SCALE), %ebx;			\
-    /* We loaded the jump table.  Go.  */			\
-    jmp		*%ebx
-#else
-# define PARMS		4
-# define ENTRANCE
-# define RETURN_END	ret
-# define RETURN		RETURN_END
-# define JMPTBL(I, B)	I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-   absolute offsets.  INDEX is a register contains the index into the
-   jump table.  SCALE is the scale of INDEX. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-    jmp		*TABLE(,INDEX,SCALE)
-
-# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)
-
-# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE)		\
-    jmp		*TABLE(,INDEX,SCALE)
-#endif
-
-	.section .text.ssse3,"ax",@progbits
-#if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-#endif
-ENTRY (MEMCPY)
-	ENTRANCE
-	movl	LEN(%esp), %ecx
-	movl	SRC(%esp), %eax
-	movl	DEST(%esp), %edx
-
-#ifdef USE_AS_MEMMOVE
-	cmp	%eax, %edx
-	jb	L(copy_forward)
-	je	L(fwd_write_0bytes)
-	cmp	$48, %ecx
-	jb	L(bk_write_less48bytes)
-	add	%ecx, %eax
-	cmp	%eax, %edx
-	movl	SRC(%esp), %eax
-	jb	L(copy_backward)
-
-L(copy_forward):
-#endif
-	cmp	$48, %ecx
-	jae	L(48bytesormore)
-
-L(fwd_write_less32bytes):
-#ifndef USE_AS_MEMMOVE
-	cmp	%dl, %al
-	jb	L(bk_write)
-#endif
-	add	%ecx, %edx
-	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-#ifndef USE_AS_MEMMOVE
-L(bk_write):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-#endif
-
-	ALIGN (4)
-/* ECX > 32 and EDX is 4 byte aligned.  */
-L(48bytesormore):
-	movdqu	(%eax), %xmm0
-	PUSH (%edi)
-	movl	%edx, %edi
-	and	$-16, %edx
-	PUSH (%esi)
-	cfi_remember_state
-	add	$16, %edx
-	movl	%edi, %esi
-	sub	%edx, %edi
-	add	%edi, %ecx
-	sub	%edi, %eax
-
-#ifdef SHARED_CACHE_SIZE_HALF
-	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
-#else
-# ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_shared_cache_size_half, %ecx
-# endif
-#endif
-
-	mov	%eax, %edi
-	jae	L(large_page)
-	and	$0xf, %edi
-	jz	L(shl_0)
-
-	BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
-
-	ALIGN (4)
-L(shl_0):
-	movdqu	%xmm0, (%esi)
-	xor	%edi, %edi
-	cmp	$127, %ecx
-	ja	L(shl_0_gobble)
-	lea	-32(%ecx), %ecx
-L(shl_0_loop):
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-	jb	L(shl_0_end)
-
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-	jb	L(shl_0_end)
-
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-	jb	L(shl_0_end)
-
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-L(shl_0_end):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	add	%edi, %eax
-	POP (%esi)
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-L(shl_0_gobble):
-
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	mov	__x86_data_cache_size_half@GOTOFF(%ebx), %edi
-# else
-	mov	__x86_data_cache_size_half, %edi
-# endif
-#endif
-	mov	%edi, %esi
-	shr	$3, %esi
-	sub	%esi, %edi
-	cmp	%edi, %ecx
-	jae	L(shl_0_gobble_mem_start)
-	sub	$128, %ecx
-	ALIGN (4)
-L(shl_0_gobble_cache_loop):
-	movdqa	(%eax), %xmm0
-	movaps	0x10(%eax), %xmm1
-	movaps	0x20(%eax), %xmm2
-	movaps	0x30(%eax), %xmm3
-	movaps	0x40(%eax), %xmm4
-	movaps	0x50(%eax), %xmm5
-	movaps	0x60(%eax), %xmm6
-	movaps	0x70(%eax), %xmm7
-	lea	0x80(%eax), %eax
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movaps	%xmm1, 0x10(%edx)
-	movaps	%xmm2, 0x20(%edx)
-	movaps	%xmm3, 0x30(%edx)
-	movaps	%xmm4, 0x40(%edx)
-	movaps	%xmm5, 0x50(%edx)
-	movaps	%xmm6, 0x60(%edx)
-	movaps	%xmm7, 0x70(%edx)
-	lea	0x80(%edx), %edx
-
-	jae	L(shl_0_gobble_cache_loop)
-	add	$0x80, %ecx
-	cmp	$0x40, %ecx
-	jb	L(shl_0_cache_less_64bytes)
-
-	movdqa	(%eax), %xmm0
-	sub	$0x40, %ecx
-	movdqa	0x10(%eax), %xmm1
-
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-
-	movdqa	0x20(%eax), %xmm0
-	movdqa	0x30(%eax), %xmm1
-	add	$0x40, %eax
-
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm1, 0x30(%edx)
-	add	$0x40, %edx
-L(shl_0_cache_less_64bytes):
-	cmp	$0x20, %ecx
-	jb	L(shl_0_cache_less_32bytes)
-	movdqa	(%eax), %xmm0
-	sub	$0x20, %ecx
-	movdqa	0x10(%eax), %xmm1
-	add	$0x20, %eax
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	add	$0x20, %edx
-L(shl_0_cache_less_32bytes):
-	cmp	$0x10, %ecx
-	jb	L(shl_0_cache_less_16bytes)
-	sub	$0x10, %ecx
-	movdqa	(%eax), %xmm0
-	add	$0x10, %eax
-	movdqa	%xmm0, (%edx)
-	add	$0x10, %edx
-L(shl_0_cache_less_16bytes):
-	add	%ecx, %edx
-	add	%ecx, %eax
-	POP (%esi)
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_0_gobble_mem_start):
-	cmp	%al, %dl
-	je	L(copy_page_by_rep)
-	sub	$128, %ecx
-L(shl_0_gobble_mem_loop):
-	prefetchnta 0x1c0(%eax)
-	prefetchnta 0x280(%eax)
-	prefetchnta 0x1c0(%edx)
-	prefetchnta 0x280(%edx)
-
-	movdqa	(%eax), %xmm0
-	movaps	0x10(%eax), %xmm1
-	movaps	0x20(%eax), %xmm2
-	movaps	0x30(%eax), %xmm3
-	movaps	0x40(%eax), %xmm4
-	movaps	0x50(%eax), %xmm5
-	movaps	0x60(%eax), %xmm6
-	movaps	0x70(%eax), %xmm7
-	lea	0x80(%eax), %eax
-	sub	$0x80, %ecx
-	movdqa	%xmm0, (%edx)
-	movaps	%xmm1, 0x10(%edx)
-	movaps	%xmm2, 0x20(%edx)
-	movaps	%xmm3, 0x30(%edx)
-	movaps	%xmm4, 0x40(%edx)
-	movaps	%xmm5, 0x50(%edx)
-	movaps	%xmm6, 0x60(%edx)
-	movaps	%xmm7, 0x70(%edx)
-	lea	0x80(%edx), %edx
-
-	jae	L(shl_0_gobble_mem_loop)
-	add	$0x80, %ecx
-	cmp	$0x40, %ecx
-	jb	L(shl_0_mem_less_64bytes)
-
-	movdqa	(%eax), %xmm0
-	sub	$0x40, %ecx
-	movdqa	0x10(%eax), %xmm1
-
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-
-	movdqa	0x20(%eax), %xmm0
-	movdqa	0x30(%eax), %xmm1
-	add	$0x40, %eax
-
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm1, 0x30(%edx)
-	add	$0x40, %edx
-L(shl_0_mem_less_64bytes):
-	cmp	$0x20, %ecx
-	jb	L(shl_0_mem_less_32bytes)
-	movdqa	(%eax), %xmm0
-	sub	$0x20, %ecx
-	movdqa	0x10(%eax), %xmm1
-	add	$0x20, %eax
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	add	$0x20, %edx
-L(shl_0_mem_less_32bytes):
-	cmp	$0x10, %ecx
-	jb	L(shl_0_mem_less_16bytes)
-	sub	$0x10, %ecx
-	movdqa	(%eax), %xmm0
-	add	$0x10, %eax
-	movdqa	%xmm0, (%edx)
-	add	$0x10, %edx
-L(shl_0_mem_less_16bytes):
-	add	%ecx, %edx
-	add	%ecx, %eax
-	POP (%esi)
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_1):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$1, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_1_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$1, %xmm2, %xmm3
-	palignr	$1, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_1_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$1, %xmm2, %xmm3
-	palignr	$1, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_1_loop)
-
-L(shl_1_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	1(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_2):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$2, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_2_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$2, %xmm2, %xmm3
-	palignr	$2, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_2_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$2, %xmm2, %xmm3
-	palignr	$2, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_2_loop)
-
-L(shl_2_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	2(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_3):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$3, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_3_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$3, %xmm2, %xmm3
-	palignr	$3, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_3_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$3, %xmm2, %xmm3
-	palignr	$3, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_3_loop)
-
-L(shl_3_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	3(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_4):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$4, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_4_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$4, %xmm2, %xmm3
-	palignr	$4, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_4_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$4, %xmm2, %xmm3
-	palignr	$4, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_4_loop)
-
-L(shl_4_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	4(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_5):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$5, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_5_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$5, %xmm2, %xmm3
-	palignr	$5, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_5_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$5, %xmm2, %xmm3
-	palignr	$5, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_5_loop)
-
-L(shl_5_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	5(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_6):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$6, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_6_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$6, %xmm2, %xmm3
-	palignr	$6, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_6_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$6, %xmm2, %xmm3
-	palignr	$6, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_6_loop)
-
-L(shl_6_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	6(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_7):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$7, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_7_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$7, %xmm2, %xmm3
-	palignr	$7, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_7_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$7, %xmm2, %xmm3
-	palignr	$7, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_7_loop)
-
-L(shl_7_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	7(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_8):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$8, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_8_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$8, %xmm2, %xmm3
-	palignr	$8, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_8_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$8, %xmm2, %xmm3
-	palignr	$8, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_8_loop)
-
-L(shl_8_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	8(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_9):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$9, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_9_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$9, %xmm2, %xmm3
-	palignr	$9, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_9_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$9, %xmm2, %xmm3
-	palignr	$9, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_9_loop)
-
-L(shl_9_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	9(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_10):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$10, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_10_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$10, %xmm2, %xmm3
-	palignr	$10, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_10_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$10, %xmm2, %xmm3
-	palignr	$10, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_10_loop)
-
-L(shl_10_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	10(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_11):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$11, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_11_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$11, %xmm2, %xmm3
-	palignr	$11, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_11_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$11, %xmm2, %xmm3
-	palignr	$11, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_11_loop)
-
-L(shl_11_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	11(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_12):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$12, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_12_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$12, %xmm2, %xmm3
-	palignr	$12, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_12_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$12, %xmm2, %xmm3
-	palignr	$12, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_12_loop)
-
-L(shl_12_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	12(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_13):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$13, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_13_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$13, %xmm2, %xmm3
-	palignr	$13, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_13_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$13, %xmm2, %xmm3
-	palignr	$13, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_13_loop)
-
-L(shl_13_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	13(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_14):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$14, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_14_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$14, %xmm2, %xmm3
-	palignr	$14, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_14_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$14, %xmm2, %xmm3
-	palignr	$14, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_14_loop)
-
-L(shl_14_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	14(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(shl_15):
-	BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
-	sub	$15, %eax
-	movaps	(%eax), %xmm1
-	xor	%edi, %edi
-	sub	$32, %ecx
-	movdqu	%xmm0, (%esi)
-	POP (%esi)
-L(shl_15_loop):
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$15, %xmm2, %xmm3
-	palignr	$15, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(shl_15_end)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$15, %xmm2, %xmm3
-	palignr	$15, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(shl_15_loop)
-
-L(shl_15_end):
-	add	$32, %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	15(%edi, %eax), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-
-	ALIGN (4)
-L(fwd_write_44bytes):
-	movl	-44(%eax), %ecx
-	movl	%ecx, -44(%edx)
-L(fwd_write_40bytes):
-	movl	-40(%eax), %ecx
-	movl	%ecx, -40(%edx)
-L(fwd_write_36bytes):
-	movl	-36(%eax), %ecx
-	movl	%ecx, -36(%edx)
-L(fwd_write_32bytes):
-	movl	-32(%eax), %ecx
-	movl	%ecx, -32(%edx)
-L(fwd_write_28bytes):
-	movl	-28(%eax), %ecx
-	movl	%ecx, -28(%edx)
-L(fwd_write_24bytes):
-	movl	-24(%eax), %ecx
-	movl	%ecx, -24(%edx)
-L(fwd_write_20bytes):
-	movl	-20(%eax), %ecx
-	movl	%ecx, -20(%edx)
-L(fwd_write_16bytes):
-	movl	-16(%eax), %ecx
-	movl	%ecx, -16(%edx)
-L(fwd_write_12bytes):
-	movl	-12(%eax), %ecx
-	movl	%ecx, -12(%edx)
-L(fwd_write_8bytes):
-	movl	-8(%eax), %ecx
-	movl	%ecx, -8(%edx)
-L(fwd_write_4bytes):
-	movl	-4(%eax), %ecx
-	movl	%ecx, -4(%edx)
-L(fwd_write_0bytes):
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-# else
-	movl	DEST(%esp), %eax
-# endif
-#endif
-	RETURN
-
-	ALIGN (4)
-L(fwd_write_5bytes):
-	movl	-5(%eax), %ecx
-	movl	-4(%eax), %eax
-	movl	%ecx, -5(%edx)
-	movl	%eax, -4(%edx)
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-# else
-	movl	DEST(%esp), %eax
-# endif
-#endif
-	RETURN
-
-	ALIGN (4)
-L(fwd_write_45bytes):
-	movl	-45(%eax), %ecx
-	movl	%ecx, -45(%edx)
-L(fwd_write_41bytes):
-	movl	-41(%eax), %ecx
-	movl	%ecx, -41(%edx)
-L(fwd_write_37bytes):
-	movl	-37(%eax), %ecx
-	movl	%ecx, -37(%edx)
-L(fwd_write_33bytes):
-	movl	-33(%eax), %ecx
-	movl	%ecx, -33(%edx)
-L(fwd_write_29bytes):
-	movl	-29(%eax), %ecx
-	movl	%ecx, -29(%edx)
-L(fwd_write_25bytes):
-	movl	-25(%eax), %ecx
-	movl	%ecx, -25(%edx)
-L(fwd_write_21bytes):
-	movl	-21(%eax), %ecx
-	movl	%ecx, -21(%edx)
-L(fwd_write_17bytes):
-	movl	-17(%eax), %ecx
-	movl	%ecx, -17(%edx)
-L(fwd_write_13bytes):
-	movl	-13(%eax), %ecx
-	movl	%ecx, -13(%edx)
-L(fwd_write_9bytes):
-	movl	-9(%eax), %ecx
-	movl	%ecx, -9(%edx)
-	movl	-5(%eax), %ecx
-	movl	%ecx, -5(%edx)
-L(fwd_write_1bytes):
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-# else
-	movl	DEST(%esp), %eax
-# endif
-#endif
-	RETURN
-
-	ALIGN (4)
-L(fwd_write_46bytes):
-	movl	-46(%eax), %ecx
-	movl	%ecx, -46(%edx)
-L(fwd_write_42bytes):
-	movl	-42(%eax), %ecx
-	movl	%ecx, -42(%edx)
-L(fwd_write_38bytes):
-	movl	-38(%eax), %ecx
-	movl	%ecx, -38(%edx)
-L(fwd_write_34bytes):
-	movl	-34(%eax), %ecx
-	movl	%ecx, -34(%edx)
-L(fwd_write_30bytes):
-	movl	-30(%eax), %ecx
-	movl	%ecx, -30(%edx)
-L(fwd_write_26bytes):
-	movl	-26(%eax), %ecx
-	movl	%ecx, -26(%edx)
-L(fwd_write_22bytes):
-	movl	-22(%eax), %ecx
-	movl	%ecx, -22(%edx)
-L(fwd_write_18bytes):
-	movl	-18(%eax), %ecx
-	movl	%ecx, -18(%edx)
-L(fwd_write_14bytes):
-	movl	-14(%eax), %ecx
-	movl	%ecx, -14(%edx)
-L(fwd_write_10bytes):
-	movl	-10(%eax), %ecx
-	movl	%ecx, -10(%edx)
-L(fwd_write_6bytes):
-	movl	-6(%eax), %ecx
-	movl	%ecx, -6(%edx)
-L(fwd_write_2bytes):
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-# else
-	movl	DEST(%esp), %eax
-# endif
-#endif
-	RETURN
-
-	ALIGN (4)
-L(fwd_write_47bytes):
-	movl	-47(%eax), %ecx
-	movl	%ecx, -47(%edx)
-L(fwd_write_43bytes):
-	movl	-43(%eax), %ecx
-	movl	%ecx, -43(%edx)
-L(fwd_write_39bytes):
-	movl	-39(%eax), %ecx
-	movl	%ecx, -39(%edx)
-L(fwd_write_35bytes):
-	movl	-35(%eax), %ecx
-	movl	%ecx, -35(%edx)
-L(fwd_write_31bytes):
-	movl	-31(%eax), %ecx
-	movl	%ecx, -31(%edx)
-L(fwd_write_27bytes):
-	movl	-27(%eax), %ecx
-	movl	%ecx, -27(%edx)
-L(fwd_write_23bytes):
-	movl	-23(%eax), %ecx
-	movl	%ecx, -23(%edx)
-L(fwd_write_19bytes):
-	movl	-19(%eax), %ecx
-	movl	%ecx, -19(%edx)
-L(fwd_write_15bytes):
-	movl	-15(%eax), %ecx
-	movl	%ecx, -15(%edx)
-L(fwd_write_11bytes):
-	movl	-11(%eax), %ecx
-	movl	%ecx, -11(%edx)
-L(fwd_write_7bytes):
-	movl	-7(%eax), %ecx
-	movl	%ecx, -7(%edx)
-L(fwd_write_3bytes):
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-# else
-	movl	DEST(%esp), %eax
-# endif
-#endif
-	RETURN_END
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(large_page):
-	movdqu	(%eax), %xmm1
-	movdqu	%xmm0, (%esi)
-	movntdq	%xmm1, (%edx)
-	add	$0x10, %eax
-	add	$0x10, %edx
-	sub	$0x10, %ecx
-	cmp	%al, %dl
-	je	L(copy_page_by_rep)
-L(large_page_loop_init):
-	POP (%esi)
-	sub	$0x80, %ecx
-	POP (%edi)
-L(large_page_loop):
-	prefetchnta	0x1c0(%eax)
-	prefetchnta	0x280(%eax)
-	movdqu	(%eax), %xmm0
-	movdqu	0x10(%eax), %xmm1
-	movdqu	0x20(%eax), %xmm2
-	movdqu	0x30(%eax), %xmm3
-	movdqu	0x40(%eax), %xmm4
-	movdqu	0x50(%eax), %xmm5
-	movdqu	0x60(%eax), %xmm6
-	movdqu	0x70(%eax), %xmm7
-	lea	0x80(%eax), %eax
-	lfence
-	sub	$0x80, %ecx
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm1, 0x10(%edx)
-	movntdq	%xmm2, 0x20(%edx)
-	movntdq	%xmm3, 0x30(%edx)
-	movntdq	%xmm4, 0x40(%edx)
-	movntdq	%xmm5, 0x50(%edx)
-	movntdq	%xmm6, 0x60(%edx)
-	movntdq	%xmm7, 0x70(%edx)
-	lea	0x80(%edx), %edx
-	jae	L(large_page_loop)
-	add	$0x80, %ecx
-	cmp	$0x40, %ecx
-	jb	L(large_page_less_64bytes)
-
-	movdqu	(%eax), %xmm0
-	movdqu	0x10(%eax), %xmm1
-	movdqu	0x20(%eax), %xmm2
-	movdqu	0x30(%eax), %xmm3
-	lea	0x40(%eax), %eax
-
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm1, 0x10(%edx)
-	movntdq	%xmm2, 0x20(%edx)
-	movntdq	%xmm3, 0x30(%edx)
-	lea	0x40(%edx), %edx
-	sub	$0x40, %ecx
-L(large_page_less_64bytes):
-	cmp	$32, %ecx
-	jb	L(large_page_less_32bytes)
-	movdqu	(%eax), %xmm0
-	movdqu	0x10(%eax), %xmm1
-	lea	0x20(%eax), %eax
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm1, 0x10(%edx)
-	lea	0x20(%edx), %edx
-	sub	$0x20, %ecx
-L(large_page_less_32bytes):
-	add	%ecx, %edx
-	add	%ecx, %eax
-	sfence
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
-	cfi_restore_state
-	cfi_remember_state
-	ALIGN (4)
-L(copy_page_by_rep):
-	mov	%eax, %esi
-	mov	%edx, %edi
-	mov	%ecx, %edx
-	shr	$2, %ecx
-	and	$3, %edx
-	rep	movsl
-	jz	L(copy_page_by_rep_exit)
-	cmp	$2, %edx
-	jb	L(copy_page_by_rep_left_1)
-	movzwl	(%esi), %eax
-	movw	%ax, (%edi)
-	add	$2, %esi
-	add	$2, %edi
-	sub	$2, %edx
-	jz	L(copy_page_by_rep_exit)
-L(copy_page_by_rep_left_1):
-	movzbl	(%esi), %eax
-	movb	%al, (%edi)
-L(copy_page_by_rep_exit):
-	POP (%esi)
-	POP (%edi)
-#ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-# endif
-#endif
-	RETURN
-
-	ALIGN (4)
-L(bk_write_44bytes):
-	movl	40(%eax), %ecx
-	movl	%ecx, 40(%edx)
-L(bk_write_40bytes):
-	movl	36(%eax), %ecx
-	movl	%ecx, 36(%edx)
-L(bk_write_36bytes):
-	movl	32(%eax), %ecx
-	movl	%ecx, 32(%edx)
-L(bk_write_32bytes):
-	movl	28(%eax), %ecx
-	movl	%ecx, 28(%edx)
-L(bk_write_28bytes):
-	movl	24(%eax), %ecx
-	movl	%ecx, 24(%edx)
-L(bk_write_24bytes):
-	movl	20(%eax), %ecx
-	movl	%ecx, 20(%edx)
-L(bk_write_20bytes):
-	movl	16(%eax), %ecx
-	movl	%ecx, 16(%edx)
-L(bk_write_16bytes):
-	movl	12(%eax), %ecx
-	movl	%ecx, 12(%edx)
-L(bk_write_12bytes):
-	movl	8(%eax), %ecx
-	movl	%ecx, 8(%edx)
-L(bk_write_8bytes):
-	movl	4(%eax), %ecx
-	movl	%ecx, 4(%edx)
-L(bk_write_4bytes):
-	movl	(%eax), %ecx
-	movl	%ecx, (%edx)
-L(bk_write_0bytes):
-#ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-# endif
-#endif
-	RETURN
-
-	ALIGN (4)
-L(bk_write_45bytes):
-	movl	41(%eax), %ecx
-	movl	%ecx, 41(%edx)
-L(bk_write_41bytes):
-	movl	37(%eax), %ecx
-	movl	%ecx, 37(%edx)
-L(bk_write_37bytes):
-	movl	33(%eax), %ecx
-	movl	%ecx, 33(%edx)
-L(bk_write_33bytes):
-	movl	29(%eax), %ecx
-	movl	%ecx, 29(%edx)
-L(bk_write_29bytes):
-	movl	25(%eax), %ecx
-	movl	%ecx, 25(%edx)
-L(bk_write_25bytes):
-	movl	21(%eax), %ecx
-	movl	%ecx, 21(%edx)
-L(bk_write_21bytes):
-	movl	17(%eax), %ecx
-	movl	%ecx, 17(%edx)
-L(bk_write_17bytes):
-	movl	13(%eax), %ecx
-	movl	%ecx, 13(%edx)
-L(bk_write_13bytes):
-	movl	9(%eax), %ecx
-	movl	%ecx, 9(%edx)
-L(bk_write_9bytes):
-	movl	5(%eax), %ecx
-	movl	%ecx, 5(%edx)
-L(bk_write_5bytes):
-	movl	1(%eax), %ecx
-	movl	%ecx, 1(%edx)
-L(bk_write_1bytes):
-	movzbl	(%eax), %ecx
-	movb	%cl, (%edx)
-#ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-# endif
-#endif
-	RETURN
-
-	ALIGN (4)
-L(bk_write_46bytes):
-	movl	42(%eax), %ecx
-	movl	%ecx, 42(%edx)
-L(bk_write_42bytes):
-	movl	38(%eax), %ecx
-	movl	%ecx, 38(%edx)
-L(bk_write_38bytes):
-	movl	34(%eax), %ecx
-	movl	%ecx, 34(%edx)
-L(bk_write_34bytes):
-	movl	30(%eax), %ecx
-	movl	%ecx, 30(%edx)
-L(bk_write_30bytes):
-	movl	26(%eax), %ecx
-	movl	%ecx, 26(%edx)
-L(bk_write_26bytes):
-	movl	22(%eax), %ecx
-	movl	%ecx, 22(%edx)
-L(bk_write_22bytes):
-	movl	18(%eax), %ecx
-	movl	%ecx, 18(%edx)
-L(bk_write_18bytes):
-	movl	14(%eax), %ecx
-	movl	%ecx, 14(%edx)
-L(bk_write_14bytes):
-	movl	10(%eax), %ecx
-	movl	%ecx, 10(%edx)
-L(bk_write_10bytes):
-	movl	6(%eax), %ecx
-	movl	%ecx, 6(%edx)
-L(bk_write_6bytes):
-	movl	2(%eax), %ecx
-	movl	%ecx, 2(%edx)
-L(bk_write_2bytes):
-	movzwl	(%eax), %ecx
-	movw	%cx, (%edx)
-#ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-# endif
-#endif
-	RETURN
-
-	ALIGN (4)
-L(bk_write_47bytes):
-	movl	43(%eax), %ecx
-	movl	%ecx, 43(%edx)
-L(bk_write_43bytes):
-	movl	39(%eax), %ecx
-	movl	%ecx, 39(%edx)
-L(bk_write_39bytes):
-	movl	35(%eax), %ecx
-	movl	%ecx, 35(%edx)
-L(bk_write_35bytes):
-	movl	31(%eax), %ecx
-	movl	%ecx, 31(%edx)
-L(bk_write_31bytes):
-	movl	27(%eax), %ecx
-	movl	%ecx, 27(%edx)
-L(bk_write_27bytes):
-	movl	23(%eax), %ecx
-	movl	%ecx, 23(%edx)
-L(bk_write_23bytes):
-	movl	19(%eax), %ecx
-	movl	%ecx, 19(%edx)
-L(bk_write_19bytes):
-	movl	15(%eax), %ecx
-	movl	%ecx, 15(%edx)
-L(bk_write_15bytes):
-	movl	11(%eax), %ecx
-	movl	%ecx, 11(%edx)
-L(bk_write_11bytes):
-	movl	7(%eax), %ecx
-	movl	%ecx, 7(%edx)
-L(bk_write_7bytes):
-	movl	3(%eax), %ecx
-	movl	%ecx, 3(%edx)
-L(bk_write_3bytes):
-	movzwl	1(%eax), %ecx
-	movw	%cx, 1(%edx)
-	movzbl	(%eax), %eax
-	movb	%al, (%edx)
-#ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-# endif
-#endif
-	RETURN_END
-
-
-	.pushsection .rodata.ssse3,"a",@progbits
-	ALIGN (2)
-L(table_48bytes_fwd):
-	.int	JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
-
-	ALIGN (2)
-L(shl_table):
-	.int	JMPTBL (L(shl_0), L(shl_table))
-	.int	JMPTBL (L(shl_1), L(shl_table))
-	.int	JMPTBL (L(shl_2), L(shl_table))
-	.int	JMPTBL (L(shl_3), L(shl_table))
-	.int	JMPTBL (L(shl_4), L(shl_table))
-	.int	JMPTBL (L(shl_5), L(shl_table))
-	.int	JMPTBL (L(shl_6), L(shl_table))
-	.int	JMPTBL (L(shl_7), L(shl_table))
-	.int	JMPTBL (L(shl_8), L(shl_table))
-	.int	JMPTBL (L(shl_9), L(shl_table))
-	.int	JMPTBL (L(shl_10), L(shl_table))
-	.int	JMPTBL (L(shl_11), L(shl_table))
-	.int	JMPTBL (L(shl_12), L(shl_table))
-	.int	JMPTBL (L(shl_13), L(shl_table))
-	.int	JMPTBL (L(shl_14), L(shl_table))
-	.int	JMPTBL (L(shl_15), L(shl_table))
-
-	ALIGN (2)
-L(table_48_bytes_bwd):
-	.int	JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
-
-	.popsection
-
-#ifdef USE_AS_MEMMOVE
-	ALIGN (4)
-L(copy_backward):
-	PUSH (%esi)
-	movl	%eax, %esi
-	add	%ecx, %edx
-	add	%ecx, %esi
-	testl	$0x3, %edx
-	jnz	L(bk_align)
-
-L(bk_aligned_4):
-	cmp	$64, %ecx
-	jae	L(bk_write_more64bytes)
-
-L(bk_write_64bytesless):
-	cmp	$32, %ecx
-	jb	L(bk_write_less32bytes)
-
-L(bk_write_more32bytes):
-	/* Copy 32 bytes at a time.  */
-	sub	$32, %ecx
-	movl	-4(%esi), %eax
-	movl	%eax, -4(%edx)
-	movl	-8(%esi), %eax
-	movl	%eax, -8(%edx)
-	movl	-12(%esi), %eax
-	movl	%eax, -12(%edx)
-	movl	-16(%esi), %eax
-	movl	%eax, -16(%edx)
-	movl	-20(%esi), %eax
-	movl	%eax, -20(%edx)
-	movl	-24(%esi), %eax
-	movl	%eax, -24(%edx)
-	movl	-28(%esi), %eax
-	movl	%eax, -28(%edx)
-	movl	-32(%esi), %eax
-	movl	%eax, -32(%edx)
-	sub	$32, %edx
-	sub	$32, %esi
-
-L(bk_write_less32bytes):
-	movl	%esi, %eax
-	sub	%ecx, %edx
-	sub	%ecx, %eax
-	POP (%esi)
-L(bk_write_less48bytes):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-
-	CFI_PUSH (%esi)
-	ALIGN (4)
-L(bk_align):
-	cmp	$8, %ecx
-	jbe	L(bk_write_less32bytes)
-	testl	$1, %edx
-	/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
-	   then (EDX & 2) must be != 0.  */
-	jz	L(bk_got2)
-	sub	$1, %esi
-	sub	$1, %ecx
-	sub	$1, %edx
-	movzbl	(%esi), %eax
-	movb	%al, (%edx)
-
-	testl	$2, %edx
-	jz	L(bk_aligned_4)
-
-L(bk_got2):
-	sub	$2, %esi
-	sub	$2, %ecx
-	sub	$2, %edx
-	movzwl	(%esi), %eax
-	movw	%ax, (%edx)
-	jmp	L(bk_aligned_4)
-
-	ALIGN (4)
-L(bk_write_more64bytes):
-	/* Check alignment of last byte.  */
-	testl	$15, %edx
-	jz	L(bk_ssse3_cpy_pre)
-
-/* EDX is aligned 4 bytes, but not 16 bytes.  */
-L(bk_ssse3_align):
-	sub	$4, %esi
-	sub	$4, %ecx
-	sub	$4, %edx
-	movl	(%esi), %eax
-	movl	%eax, (%edx)
-
-	testl	$15, %edx
-	jz	L(bk_ssse3_cpy_pre)
-
-	sub	$4, %esi
-	sub	$4, %ecx
-	sub	$4, %edx
-	movl	(%esi), %eax
-	movl	%eax, (%edx)
-
-	testl	$15, %edx
-	jz	L(bk_ssse3_cpy_pre)
-
-	sub	$4, %esi
-	sub	$4, %ecx
-	sub	$4, %edx
-	movl	(%esi), %eax
-	movl	%eax, (%edx)
-
-L(bk_ssse3_cpy_pre):
-	cmp	$64, %ecx
-	jb	L(bk_write_more32bytes)
-
-L(bk_ssse3_cpy):
-	sub	$64, %esi
-	sub	$64, %ecx
-	sub	$64, %edx
-	movdqu	0x30(%esi), %xmm3
-	movdqa	%xmm3, 0x30(%edx)
-	movdqu	0x20(%esi), %xmm2
-	movdqa	%xmm2, 0x20(%edx)
-	movdqu	0x10(%esi), %xmm1
-	movdqa	%xmm1, 0x10(%edx)
-	movdqu	(%esi), %xmm0
-	movdqa	%xmm0, (%edx)
-	cmp	$64, %ecx
-	jae	L(bk_ssse3_cpy)
-	jmp	L(bk_write_64bytesless)
-
-#endif
-
-END (MEMCPY)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
deleted file mode 100644
index 53e8a6ca1d..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
+++ /dev/null
@@ -1,3162 +0,0 @@
-/* memcpy with SSSE3
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc) \
-    && (defined SHARED \
-	|| defined USE_AS_MEMMOVE \
-	|| !defined USE_MULTIARCH)
-
-# include <sysdep.h>
-# include "asm-syntax.h"
-
-# ifndef MEMCPY
-#  define MEMCPY		__memcpy_ssse3
-#  define MEMCPY_CHK	__memcpy_chk_ssse3
-# endif
-
-# ifdef USE_AS_BCOPY
-#  define SRC		PARMS
-#  define DEST		SRC+4
-#  define LEN		DEST+4
-# else
-#  define DEST		PARMS
-#  define SRC		DEST+4
-#  define LEN		SRC+4
-# endif
-
-# define CFI_PUSH(REG)		\
-  cfi_adjust_cfa_offset (4);		\
-  cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)		\
-  cfi_adjust_cfa_offset (-4);		\
-  cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# ifdef SHARED
-#  define PARMS		8		/* Preserve EBX.  */
-#  define ENTRANCE	PUSH (%ebx);
-#  define RETURN_END	POP (%ebx); ret
-#  define RETURN		RETURN_END; CFI_PUSH (%ebx)
-#  define JMPTBL(I, B)	I - B
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-	jump table with relative offsets.  INDEX is a register contains the
-	index into the jump table.   SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-    /* We first load PC into EBX.  */		\
-	SETUP_PIC_REG(bx);		\
-    /* Get the address of the jump table.  */		\
-	addl	$(TABLE - .), %ebx;		\
-    /* Get the entry and convert the relative offset to the		\
-	absolute	address.  */		\
-	addl	(%ebx, INDEX, SCALE), %ebx;		\
-    /* We loaded the jump table.  Go.  */		\
-	jmp	*%ebx
-# else
-
-#  define PARMS		4
-#  define ENTRANCE
-#  define RETURN_END	ret
-#  define RETURN		RETURN_END
-#  define JMPTBL(I, B)	I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-	absolute offsets.  INDEX is a register contains the index into the
-	jump table.  SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-	jmp	*TABLE(, INDEX, SCALE)
-# endif
-
-	.section .text.ssse3,"ax",@progbits
-# if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-# endif
-ENTRY (MEMCPY)
-	ENTRANCE
-	movl	LEN(%esp), %ecx
-	movl	SRC(%esp), %eax
-	movl	DEST(%esp), %edx
-
-# ifdef USE_AS_MEMMOVE
-	cmp	%eax, %edx
-	jb	L(copy_forward)
-	je	L(fwd_write_0bytes)
-	cmp	$32, %ecx
-	jae	L(memmove_bwd)
-	jmp	L(bk_write_less32bytes_2)
-
-	.p2align 4
-L(memmove_bwd):
-	add	%ecx, %eax
-	cmp	%eax, %edx
-	movl	SRC(%esp), %eax
-	jb	L(copy_backward)
-
-L(copy_forward):
-# endif
-	cmp	$48, %ecx
-	jae	L(48bytesormore)
-
-L(fwd_write_less32bytes):
-# ifndef USE_AS_MEMMOVE
-	cmp	%dl, %al
-	jb	L(bk_write)
-# endif
-	add	%ecx, %edx
-	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-# ifndef USE_AS_MEMMOVE
-	.p2align 4
-L(bk_write):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-# endif
-
-	.p2align 4
-L(48bytesormore):
-# ifndef USE_AS_MEMMOVE
-	movlpd	(%eax), %xmm0
-	movlpd	8(%eax), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-# else
-	movdqu	(%eax), %xmm0
-# endif
-	PUSH (%edi)
-	movl	%edx, %edi
-	and	$-16, %edx
-	add	$16, %edx
-	sub	%edx, %edi
-	add	%edi, %ecx
-	sub	%edi, %eax
-
-# ifdef SHARED_CACHE_SIZE_HALF
-	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_shared_cache_size_half, %ecx
-#  endif
-# endif
-
-	mov	%eax, %edi
-	jae	L(large_page)
-	and	$0xf, %edi
-	jz	L(shl_0)
-	BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
-
-	.p2align 4
-L(shl_0):
-# ifdef USE_AS_MEMMOVE
-	movl	DEST+4(%esp), %edi
-	movdqu	%xmm0, (%edi)
-# endif
-	xor	%edi, %edi
-	cmp	$127, %ecx
-	ja	L(shl_0_gobble)
-	lea	-32(%ecx), %ecx
-
-	.p2align 4
-L(shl_0_loop):
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-	jb	L(shl_0_end)
-
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-	jb	L(shl_0_end)
-
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-	jb	L(shl_0_end)
-
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-
-L(shl_0_end):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	add	%edi, %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_0_gobble):
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	POP	(%edi)
-	lea	-128(%ecx), %ecx
-	jae	L(shl_0_gobble_mem_loop)
-
-	.p2align 4
-L(shl_0_gobble_cache_loop):
-	movdqa	(%eax), %xmm0
-	movdqa	0x10(%eax), %xmm1
-	movdqa	0x20(%eax), %xmm2
-	movdqa	0x30(%eax), %xmm3
-	movdqa	0x40(%eax), %xmm4
-	movdqa	0x50(%eax), %xmm5
-	movdqa	0x60(%eax), %xmm6
-	movdqa	0x70(%eax), %xmm7
-	lea	0x80(%eax), %eax
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	movdqa	%xmm2, 0x20(%edx)
-	movdqa	%xmm3, 0x30(%edx)
-	movdqa	%xmm4, 0x40(%edx)
-	movdqa	%xmm5, 0x50(%edx)
-	movdqa	%xmm6, 0x60(%edx)
-	movdqa	%xmm7, 0x70(%edx)
-	lea	0x80(%edx), %edx
-
-	jae	L(shl_0_gobble_cache_loop)
-	cmp	$-0x40, %ecx
-	lea	0x80(%ecx), %ecx
-	jl	L(shl_0_cache_less_64bytes)
-
-	movdqa	(%eax), %xmm0
-	sub	$0x40, %ecx
-	movdqa	0x10(%eax), %xmm1
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	movdqa	0x20(%eax), %xmm0
-	movdqa	0x30(%eax), %xmm1
-	add	$0x40, %eax
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm1, 0x30(%edx)
-	add	$0x40, %edx
-
-L(shl_0_cache_less_64bytes):
-	cmp	$0x20, %ecx
-	jb	L(shl_0_cache_less_32bytes)
-	movdqa	(%eax), %xmm0
-	sub	$0x20, %ecx
-	movdqa	0x10(%eax), %xmm1
-	add	$0x20, %eax
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	add	$0x20, %edx
-
-L(shl_0_cache_less_32bytes):
-	cmp	$0x10, %ecx
-	jb	L(shl_0_cache_less_16bytes)
-	sub	$0x10, %ecx
-	movdqa	(%eax), %xmm0
-	add	$0x10, %eax
-	movdqa	%xmm0, (%edx)
-	add	$0x10, %edx
-
-L(shl_0_cache_less_16bytes):
-	add	%ecx, %edx
-	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
-	.p2align 4
-L(shl_0_gobble_mem_loop):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x280(%eax)
-	prefetcht0 0x1c0(%edx)
-
-	movdqa	(%eax), %xmm0
-	movdqa	0x10(%eax), %xmm1
-	movdqa	0x20(%eax), %xmm2
-	movdqa	0x30(%eax), %xmm3
-	movdqa	0x40(%eax), %xmm4
-	movdqa	0x50(%eax), %xmm5
-	movdqa	0x60(%eax), %xmm6
-	movdqa	0x70(%eax), %xmm7
-	lea	0x80(%eax), %eax
-	sub	$0x80, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	movdqa	%xmm2, 0x20(%edx)
-	movdqa	%xmm3, 0x30(%edx)
-	movdqa	%xmm4, 0x40(%edx)
-	movdqa	%xmm5, 0x50(%edx)
-	movdqa	%xmm6, 0x60(%edx)
-	movdqa	%xmm7, 0x70(%edx)
-	lea	0x80(%edx), %edx
-
-	jae	L(shl_0_gobble_mem_loop)
-	cmp	$-0x40, %ecx
-	lea	0x80(%ecx), %ecx
-	jl	L(shl_0_mem_less_64bytes)
-
-	movdqa	(%eax), %xmm0
-	sub	$0x40, %ecx
-	movdqa	0x10(%eax), %xmm1
-
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-
-	movdqa	0x20(%eax), %xmm0
-	movdqa	0x30(%eax), %xmm1
-	add	$0x40, %eax
-
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm1, 0x30(%edx)
-	add	$0x40, %edx
-
-L(shl_0_mem_less_64bytes):
-	cmp	$0x20, %ecx
-	jb	L(shl_0_mem_less_32bytes)
-	movdqa	(%eax), %xmm0
-	sub	$0x20, %ecx
-	movdqa	0x10(%eax), %xmm1
-	add	$0x20, %eax
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	add	$0x20, %edx
-
-L(shl_0_mem_less_32bytes):
-	cmp	$0x10, %ecx
-	jb	L(shl_0_mem_less_16bytes)
-	sub	$0x10, %ecx
-	movdqa	(%eax), %xmm0
-	add	$0x10, %eax
-	movdqa	%xmm0, (%edx)
-	add	$0x10, %edx
-
-L(shl_0_mem_less_16bytes):
-	add	%ecx, %edx
-	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
-
-	.p2align 4
-L(shl_1):
-# ifndef USE_AS_MEMMOVE
-	movaps	-1(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-1(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_1_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl1LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	15(%eax), %xmm2
-	movaps	31(%eax), %xmm3
-	movaps	47(%eax), %xmm4
-	movaps	63(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$1, %xmm4, %xmm5
-	palignr	$1, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$1, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl1LoopStart)
-
-L(Shl1LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	15(%eax), %xmm2
-	movaps	31(%eax), %xmm3
-	palignr	$1, %xmm2, %xmm3
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_1_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-1(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_1_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$1, %xmm2, %xmm3
-	palignr	$1, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_1_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$1, %xmm2, %xmm3
-	palignr	$1, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_1_no_prefetch_loop)
-
-L(sh_1_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	1(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_2):
-# ifndef USE_AS_MEMMOVE
-	movaps	-2(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-2(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_2_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl2LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	14(%eax), %xmm2
-	movaps	30(%eax), %xmm3
-	movaps	46(%eax), %xmm4
-	movaps	62(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$2, %xmm4, %xmm5
-	palignr	$2, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$2, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl2LoopStart)
-
-L(Shl2LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	14(%eax), %xmm2
-	movaps	30(%eax), %xmm3
-	palignr	$2, %xmm2, %xmm3
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_2_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-2(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_2_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$2, %xmm2, %xmm3
-	palignr	$2, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_2_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$2, %xmm2, %xmm3
-	palignr	$2, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_2_no_prefetch_loop)
-
-L(sh_2_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	2(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_3):
-# ifndef USE_AS_MEMMOVE
-	movaps	-3(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-3(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_3_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl3LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	13(%eax), %xmm2
-	movaps	29(%eax), %xmm3
-	movaps	45(%eax), %xmm4
-	movaps	61(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$3, %xmm4, %xmm5
-	palignr	$3, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$3, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl3LoopStart)
-
-L(Shl3LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	13(%eax), %xmm2
-	movaps	29(%eax), %xmm3
-	palignr	$3, %xmm2, %xmm3
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_3_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-3(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_3_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$3, %xmm2, %xmm3
-	palignr	$3, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(sh_3_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$3, %xmm2, %xmm3
-	palignr	$3, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(sh_3_no_prefetch_loop)
-
-L(sh_3_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	3(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_4):
-# ifndef USE_AS_MEMMOVE
-	movaps	-4(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-4(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_4_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl4LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	12(%eax), %xmm2
-	movaps	28(%eax), %xmm3
-	movaps	44(%eax), %xmm4
-	movaps	60(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$4, %xmm4, %xmm5
-	palignr	$4, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$4, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl4LoopStart)
-
-L(Shl4LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	12(%eax), %xmm2
-	movaps	28(%eax), %xmm3
-	palignr	$4, %xmm2, %xmm3
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_4_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-4(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_4_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$4, %xmm2, %xmm3
-	palignr	$4, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(sh_4_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$4, %xmm2, %xmm3
-	palignr	$4, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(sh_4_no_prefetch_loop)
-
-L(sh_4_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	4(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_5):
-# ifndef USE_AS_MEMMOVE
-	movaps	-5(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-5(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_5_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl5LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	11(%eax), %xmm2
-	movaps	27(%eax), %xmm3
-	movaps	43(%eax), %xmm4
-	movaps	59(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$5, %xmm4, %xmm5
-	palignr	$5, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$5, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl5LoopStart)
-
-L(Shl5LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	11(%eax), %xmm2
-	movaps	27(%eax), %xmm3
-	palignr	$5, %xmm2, %xmm3
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_5_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-5(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_5_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$5, %xmm2, %xmm3
-	palignr	$5, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(sh_5_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$5, %xmm2, %xmm3
-	palignr	$5, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(sh_5_no_prefetch_loop)
-
-L(sh_5_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	5(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_6):
-# ifndef USE_AS_MEMMOVE
-	movaps	-6(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-6(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_6_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl6LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	10(%eax), %xmm2
-	movaps	26(%eax), %xmm3
-	movaps	42(%eax), %xmm4
-	movaps	58(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$6, %xmm4, %xmm5
-	palignr	$6, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$6, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl6LoopStart)
-
-L(Shl6LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	10(%eax), %xmm2
-	movaps	26(%eax), %xmm3
-	palignr	$6, %xmm2, %xmm3
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_6_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-6(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_6_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$6, %xmm2, %xmm3
-	palignr	$6, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(sh_6_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$6, %xmm2, %xmm3
-	palignr	$6, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(sh_6_no_prefetch_loop)
-
-L(sh_6_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	6(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_7):
-# ifndef USE_AS_MEMMOVE
-	movaps	-7(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-7(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_7_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl7LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	9(%eax), %xmm2
-	movaps	25(%eax), %xmm3
-	movaps	41(%eax), %xmm4
-	movaps	57(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$7, %xmm4, %xmm5
-	palignr	$7, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$7, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl7LoopStart)
-
-L(Shl7LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	9(%eax), %xmm2
-	movaps	25(%eax), %xmm3
-	palignr	$7, %xmm2, %xmm3
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_7_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-7(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_7_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$7, %xmm2, %xmm3
-	palignr	$7, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_7_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$7, %xmm2, %xmm3
-	palignr	$7, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_7_no_prefetch_loop)
-
-L(sh_7_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	7(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_8):
-# ifndef USE_AS_MEMMOVE
-	movaps	-8(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-8(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_8_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl8LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	8(%eax), %xmm2
-	movaps	24(%eax), %xmm3
-	movaps	40(%eax), %xmm4
-	movaps	56(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$8, %xmm4, %xmm5
-	palignr	$8, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$8, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl8LoopStart)
-
-L(LoopLeave8):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	8(%eax), %xmm2
-	movaps	24(%eax), %xmm3
-	palignr	$8, %xmm2, %xmm3
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_8_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-8(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_8_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$8, %xmm2, %xmm3
-	palignr	$8, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_8_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$8, %xmm2, %xmm3
-	palignr	$8, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_8_no_prefetch_loop)
-
-L(sh_8_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	8(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_9):
-# ifndef USE_AS_MEMMOVE
-	movaps	-9(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-9(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_9_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl9LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	7(%eax), %xmm2
-	movaps	23(%eax), %xmm3
-	movaps	39(%eax), %xmm4
-	movaps	55(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$9, %xmm4, %xmm5
-	palignr	$9, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$9, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl9LoopStart)
-
-L(Shl9LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	7(%eax), %xmm2
-	movaps	23(%eax), %xmm3
-	palignr	$9, %xmm2, %xmm3
-	palignr	$9, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_9_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-9(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_9_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$9, %xmm2, %xmm3
-	palignr	$9, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_9_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$9, %xmm2, %xmm3
-	palignr	$9, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_9_no_prefetch_loop)
-
-L(sh_9_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	9(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_10):
-# ifndef USE_AS_MEMMOVE
-	movaps	-10(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-10(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_10_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl10LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	6(%eax), %xmm2
-	movaps	22(%eax), %xmm3
-	movaps	38(%eax), %xmm4
-	movaps	54(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$10, %xmm4, %xmm5
-	palignr	$10, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$10, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl10LoopStart)
-
-L(Shl10LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	6(%eax), %xmm2
-	movaps	22(%eax), %xmm3
-	palignr	$10, %xmm2, %xmm3
-	palignr	$10, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_10_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-10(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_10_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$10, %xmm2, %xmm3
-	palignr	$10, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_10_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$10, %xmm2, %xmm3
-	palignr	$10, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_10_no_prefetch_loop)
-
-L(sh_10_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	10(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_11):
-# ifndef USE_AS_MEMMOVE
-	movaps	-11(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-11(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_11_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl11LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	5(%eax), %xmm2
-	movaps	21(%eax), %xmm3
-	movaps	37(%eax), %xmm4
-	movaps	53(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$11, %xmm4, %xmm5
-	palignr	$11, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$11, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl11LoopStart)
-
-L(Shl11LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	5(%eax), %xmm2
-	movaps	21(%eax), %xmm3
-	palignr	$11, %xmm2, %xmm3
-	palignr	$11, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_11_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-11(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_11_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$11, %xmm2, %xmm3
-	palignr	$11, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_11_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$11, %xmm2, %xmm3
-	palignr	$11, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_11_no_prefetch_loop)
-
-L(sh_11_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	11(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_12):
-# ifndef USE_AS_MEMMOVE
-	movaps	-12(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-12(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_12_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl12LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	4(%eax), %xmm2
-	movaps	20(%eax), %xmm3
-	movaps	36(%eax), %xmm4
-	movaps	52(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$12, %xmm4, %xmm5
-	palignr	$12, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$12, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl12LoopStart)
-
-L(Shl12LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	4(%eax), %xmm2
-	movaps	20(%eax), %xmm3
-	palignr	$12, %xmm2, %xmm3
-	palignr	$12, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_12_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-12(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_12_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$12, %xmm2, %xmm3
-	palignr	$12, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_12_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$12, %xmm2, %xmm3
-	palignr	$12, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_12_no_prefetch_loop)
-
-L(sh_12_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	12(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_13):
-# ifndef USE_AS_MEMMOVE
-	movaps	-13(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-13(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_13_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl13LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	3(%eax), %xmm2
-	movaps	19(%eax), %xmm3
-	movaps	35(%eax), %xmm4
-	movaps	51(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$13, %xmm4, %xmm5
-	palignr	$13, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$13, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl13LoopStart)
-
-L(Shl13LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	3(%eax), %xmm2
-	movaps	19(%eax), %xmm3
-	palignr	$13, %xmm2, %xmm3
-	palignr	$13, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_13_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-13(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_13_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$13, %xmm2, %xmm3
-	palignr	$13, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_13_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$13, %xmm2, %xmm3
-	palignr	$13, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_13_no_prefetch_loop)
-
-L(sh_13_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	13(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_14):
-# ifndef USE_AS_MEMMOVE
-	movaps	-14(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-14(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_14_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl14LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	2(%eax), %xmm2
-	movaps	18(%eax), %xmm3
-	movaps	34(%eax), %xmm4
-	movaps	50(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$14, %xmm4, %xmm5
-	palignr	$14, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$14, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl14LoopStart)
-
-L(Shl14LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	2(%eax), %xmm2
-	movaps	18(%eax), %xmm3
-	palignr	$14, %xmm2, %xmm3
-	palignr	$14, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_14_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-14(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_14_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$14, %xmm2, %xmm3
-	palignr	$14, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_14_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$14, %xmm2, %xmm3
-	palignr	$14, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_14_no_prefetch_loop)
-
-L(sh_14_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	14(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_15):
-# ifndef USE_AS_MEMMOVE
-	movaps	-15(%eax), %xmm1
-# else
-	movl	DEST+4(%esp), %edi
-	movaps	-15(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-# else
-#  ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-#  else
-	cmp	__x86_data_cache_size_half, %ecx
-#  endif
-# endif
-	jb L(sh_15_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl15LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	1(%eax), %xmm2
-	movaps	17(%eax), %xmm3
-	movaps	33(%eax), %xmm4
-	movaps	49(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$15, %xmm4, %xmm5
-	palignr	$15, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$15, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl15LoopStart)
-
-L(Shl15LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	1(%eax), %xmm2
-	movaps	17(%eax), %xmm3
-	palignr	$15, %xmm2, %xmm3
-	palignr	$15, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_15_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-15(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_15_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$15, %xmm2, %xmm3
-	palignr	$15, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_15_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$15, %xmm2, %xmm3
-	palignr	$15, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_15_no_prefetch_loop)
-
-L(sh_15_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	15(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_end_0):
-	lea	32(%ecx), %ecx
-	lea	(%edx, %ecx), %edx
-	lea	(%eax, %ecx), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	.p2align 4
-L(fwd_write_44bytes):
-	movq	-44(%eax), %xmm0
-	movq	%xmm0, -44(%edx)
-L(fwd_write_36bytes):
-	movq	-36(%eax), %xmm0
-	movq	%xmm0, -36(%edx)
-L(fwd_write_28bytes):
-	movq	-28(%eax), %xmm0
-	movq	%xmm0, -28(%edx)
-L(fwd_write_20bytes):
-	movq	-20(%eax), %xmm0
-	movq	%xmm0, -20(%edx)
-L(fwd_write_12bytes):
-	movq	-12(%eax), %xmm0
-	movq	%xmm0, -12(%edx)
-L(fwd_write_4bytes):
-	movl	-4(%eax), %ecx
-	movl	%ecx, -4(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_40bytes):
-	movq	-40(%eax), %xmm0
-	movq	%xmm0, -40(%edx)
-L(fwd_write_32bytes):
-	movq	-32(%eax), %xmm0
-	movq	%xmm0, -32(%edx)
-L(fwd_write_24bytes):
-	movq	-24(%eax), %xmm0
-	movq	%xmm0, -24(%edx)
-L(fwd_write_16bytes):
-	movq	-16(%eax), %xmm0
-	movq	%xmm0, -16(%edx)
-L(fwd_write_8bytes):
-	movq	-8(%eax), %xmm0
-	movq	%xmm0, -8(%edx)
-L(fwd_write_0bytes):
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_5bytes):
-	movl	-5(%eax), %ecx
-	movl	-4(%eax), %eax
-	movl	%ecx, -5(%edx)
-	movl	%eax, -4(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_45bytes):
-	movq	-45(%eax), %xmm0
-	movq	%xmm0, -45(%edx)
-L(fwd_write_37bytes):
-	movq	-37(%eax), %xmm0
-	movq	%xmm0, -37(%edx)
-L(fwd_write_29bytes):
-	movq	-29(%eax), %xmm0
-	movq	%xmm0, -29(%edx)
-L(fwd_write_21bytes):
-	movq	-21(%eax), %xmm0
-	movq	%xmm0, -21(%edx)
-L(fwd_write_13bytes):
-	movq	-13(%eax), %xmm0
-	movq	%xmm0, -13(%edx)
-	movl	-5(%eax), %ecx
-	movl	%ecx, -5(%edx)
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_41bytes):
-	movq	-41(%eax), %xmm0
-	movq	%xmm0, -41(%edx)
-L(fwd_write_33bytes):
-	movq	-33(%eax), %xmm0
-	movq	%xmm0, -33(%edx)
-L(fwd_write_25bytes):
-	movq	-25(%eax), %xmm0
-	movq	%xmm0, -25(%edx)
-L(fwd_write_17bytes):
-	movq	-17(%eax), %xmm0
-	movq	%xmm0, -17(%edx)
-L(fwd_write_9bytes):
-	movq	-9(%eax), %xmm0
-	movq	%xmm0, -9(%edx)
-L(fwd_write_1bytes):
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_46bytes):
-	movq	-46(%eax), %xmm0
-	movq	%xmm0, -46(%edx)
-L(fwd_write_38bytes):
-	movq	-38(%eax), %xmm0
-	movq	%xmm0, -38(%edx)
-L(fwd_write_30bytes):
-	movq	-30(%eax), %xmm0
-	movq	%xmm0, -30(%edx)
-L(fwd_write_22bytes):
-	movq	-22(%eax), %xmm0
-	movq	%xmm0, -22(%edx)
-L(fwd_write_14bytes):
-	movq	-14(%eax), %xmm0
-	movq	%xmm0, -14(%edx)
-L(fwd_write_6bytes):
-	movl	-6(%eax), %ecx
-	movl	%ecx, -6(%edx)
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_42bytes):
-	movq	-42(%eax), %xmm0
-	movq	%xmm0, -42(%edx)
-L(fwd_write_34bytes):
-	movq	-34(%eax), %xmm0
-	movq	%xmm0, -34(%edx)
-L(fwd_write_26bytes):
-	movq	-26(%eax), %xmm0
-	movq	%xmm0, -26(%edx)
-L(fwd_write_18bytes):
-	movq	-18(%eax), %xmm0
-	movq	%xmm0, -18(%edx)
-L(fwd_write_10bytes):
-	movq	-10(%eax), %xmm0
-	movq	%xmm0, -10(%edx)
-L(fwd_write_2bytes):
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_47bytes):
-	movq	-47(%eax), %xmm0
-	movq	%xmm0, -47(%edx)
-L(fwd_write_39bytes):
-	movq	-39(%eax), %xmm0
-	movq	%xmm0, -39(%edx)
-L(fwd_write_31bytes):
-	movq	-31(%eax), %xmm0
-	movq	%xmm0, -31(%edx)
-L(fwd_write_23bytes):
-	movq	-23(%eax), %xmm0
-	movq	%xmm0, -23(%edx)
-L(fwd_write_15bytes):
-	movq	-15(%eax), %xmm0
-	movq	%xmm0, -15(%edx)
-L(fwd_write_7bytes):
-	movl	-7(%eax), %ecx
-	movl	%ecx, -7(%edx)
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_43bytes):
-	movq	-43(%eax), %xmm0
-	movq	%xmm0, -43(%edx)
-L(fwd_write_35bytes):
-	movq	-35(%eax), %xmm0
-	movq	%xmm0, -35(%edx)
-L(fwd_write_27bytes):
-	movq	-27(%eax), %xmm0
-	movq	%xmm0, -27(%edx)
-L(fwd_write_19bytes):
-	movq	-19(%eax), %xmm0
-	movq	%xmm0, -19(%edx)
-L(fwd_write_11bytes):
-	movq	-11(%eax), %xmm0
-	movq	%xmm0, -11(%edx)
-L(fwd_write_3bytes):
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_40bytes_align):
-	movdqa	-40(%eax), %xmm0
-	movdqa	%xmm0, -40(%edx)
-L(fwd_write_24bytes_align):
-	movdqa	-24(%eax), %xmm0
-	movdqa	%xmm0, -24(%edx)
-L(fwd_write_8bytes_align):
-	movq	-8(%eax), %xmm0
-	movq	%xmm0, -8(%edx)
-L(fwd_write_0bytes_align):
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_32bytes_align):
-	movdqa	-32(%eax), %xmm0
-	movdqa	%xmm0, -32(%edx)
-L(fwd_write_16bytes_align):
-	movdqa	-16(%eax), %xmm0
-	movdqa	%xmm0, -16(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_5bytes_align):
-	movl	-5(%eax), %ecx
-	movl	-4(%eax), %eax
-	movl	%ecx, -5(%edx)
-	movl	%eax, -4(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_45bytes_align):
-	movdqa	-45(%eax), %xmm0
-	movdqa	%xmm0, -45(%edx)
-L(fwd_write_29bytes_align):
-	movdqa	-29(%eax), %xmm0
-	movdqa	%xmm0, -29(%edx)
-L(fwd_write_13bytes_align):
-	movq	-13(%eax), %xmm0
-	movq	%xmm0, -13(%edx)
-	movl	-5(%eax), %ecx
-	movl	%ecx, -5(%edx)
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_37bytes_align):
-	movdqa	-37(%eax), %xmm0
-	movdqa	%xmm0, -37(%edx)
-L(fwd_write_21bytes_align):
-	movdqa	-21(%eax), %xmm0
-	movdqa	%xmm0, -21(%edx)
-	movl	-5(%eax), %ecx
-	movl	%ecx, -5(%edx)
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_41bytes_align):
-	movdqa	-41(%eax), %xmm0
-	movdqa	%xmm0, -41(%edx)
-L(fwd_write_25bytes_align):
-	movdqa	-25(%eax), %xmm0
-	movdqa	%xmm0, -25(%edx)
-L(fwd_write_9bytes_align):
-	movq	-9(%eax), %xmm0
-	movq	%xmm0, -9(%edx)
-L(fwd_write_1bytes_align):
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_33bytes_align):
-	movdqa	-33(%eax), %xmm0
-	movdqa	%xmm0, -33(%edx)
-L(fwd_write_17bytes_align):
-	movdqa	-17(%eax), %xmm0
-	movdqa	%xmm0, -17(%edx)
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_46bytes_align):
-	movdqa	-46(%eax), %xmm0
-	movdqa	%xmm0, -46(%edx)
-L(fwd_write_30bytes_align):
-	movdqa	-30(%eax), %xmm0
-	movdqa	%xmm0, -30(%edx)
-L(fwd_write_14bytes_align):
-	movq	-14(%eax), %xmm0
-	movq	%xmm0, -14(%edx)
-L(fwd_write_6bytes_align):
-	movl	-6(%eax), %ecx
-	movl	%ecx, -6(%edx)
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_38bytes_align):
-	movdqa	-38(%eax), %xmm0
-	movdqa	%xmm0, -38(%edx)
-L(fwd_write_22bytes_align):
-	movdqa	-22(%eax), %xmm0
-	movdqa	%xmm0, -22(%edx)
-	movl	-6(%eax), %ecx
-	movl	%ecx, -6(%edx)
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_42bytes_align):
-	movdqa	-42(%eax), %xmm0
-	movdqa	%xmm0, -42(%edx)
-L(fwd_write_26bytes_align):
-	movdqa	-26(%eax), %xmm0
-	movdqa	%xmm0, -26(%edx)
-L(fwd_write_10bytes_align):
-	movq	-10(%eax), %xmm0
-	movq	%xmm0, -10(%edx)
-L(fwd_write_2bytes_align):
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_34bytes_align):
-	movdqa	-34(%eax), %xmm0
-	movdqa	%xmm0, -34(%edx)
-L(fwd_write_18bytes_align):
-	movdqa	-18(%eax), %xmm0
-	movdqa	%xmm0, -18(%edx)
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_47bytes_align):
-	movdqa	-47(%eax), %xmm0
-	movdqa	%xmm0, -47(%edx)
-L(fwd_write_31bytes_align):
-	movdqa	-31(%eax), %xmm0
-	movdqa	%xmm0, -31(%edx)
-L(fwd_write_15bytes_align):
-	movq	-15(%eax), %xmm0
-	movq	%xmm0, -15(%edx)
-L(fwd_write_7bytes_align):
-	movl	-7(%eax), %ecx
-	movl	%ecx, -7(%edx)
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_39bytes_align):
-	movdqa	-39(%eax), %xmm0
-	movdqa	%xmm0, -39(%edx)
-L(fwd_write_23bytes_align):
-	movdqa	-23(%eax), %xmm0
-	movdqa	%xmm0, -23(%edx)
-	movl	-7(%eax), %ecx
-	movl	%ecx, -7(%edx)
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_43bytes_align):
-	movdqa	-43(%eax), %xmm0
-	movdqa	%xmm0, -43(%edx)
-L(fwd_write_27bytes_align):
-	movdqa	-27(%eax), %xmm0
-	movdqa	%xmm0, -27(%edx)
-L(fwd_write_11bytes_align):
-	movq	-11(%eax), %xmm0
-	movq	%xmm0, -11(%edx)
-L(fwd_write_3bytes_align):
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_35bytes_align):
-	movdqa	-35(%eax), %xmm0
-	movdqa	%xmm0, -35(%edx)
-L(fwd_write_19bytes_align):
-	movdqa	-19(%eax), %xmm0
-	movdqa	%xmm0, -19(%edx)
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_44bytes_align):
-	movdqa	-44(%eax), %xmm0
-	movdqa	%xmm0, -44(%edx)
-L(fwd_write_28bytes_align):
-	movdqa	-28(%eax), %xmm0
-	movdqa	%xmm0, -28(%edx)
-L(fwd_write_12bytes_align):
-	movq	-12(%eax), %xmm0
-	movq	%xmm0, -12(%edx)
-L(fwd_write_4bytes_align):
-	movl	-4(%eax), %ecx
-	movl	%ecx, -4(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_36bytes_align):
-	movdqa	-36(%eax), %xmm0
-	movdqa	%xmm0, -36(%edx)
-L(fwd_write_20bytes_align):
-	movdqa	-20(%eax), %xmm0
-	movdqa	%xmm0, -20(%edx)
-	movl	-4(%eax), %ecx
-	movl	%ecx, -4(%edx)
-# ifndef USE_AS_BCOPY
-#  ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#  else
-	movl	DEST(%esp), %eax
-#  endif
-# endif
-	RETURN_END
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(large_page):
-	movdqu	(%eax), %xmm1
-# ifdef USE_AS_MEMMOVE
-	movl	DEST+4(%esp), %edi
-	movdqu	%xmm0, (%edi)
-# endif
-	lea	16(%eax), %eax
-	movntdq	%xmm1, (%edx)
-	lea	16(%edx), %edx
-	lea	-0x90(%ecx), %ecx
-	POP (%edi)
-
-	.p2align 4
-L(large_page_loop):
-	movdqu	(%eax), %xmm0
-	movdqu	0x10(%eax), %xmm1
-	movdqu	0x20(%eax), %xmm2
-	movdqu	0x30(%eax), %xmm3
-	movdqu	0x40(%eax), %xmm4
-	movdqu	0x50(%eax), %xmm5
-	movdqu	0x60(%eax), %xmm6
-	movdqu	0x70(%eax), %xmm7
-	lea	0x80(%eax), %eax
-
-	sub	$0x80, %ecx
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm1, 0x10(%edx)
-	movntdq	%xmm2, 0x20(%edx)
-	movntdq	%xmm3, 0x30(%edx)
-	movntdq	%xmm4, 0x40(%edx)
-	movntdq	%xmm5, 0x50(%edx)
-	movntdq	%xmm6, 0x60(%edx)
-	movntdq	%xmm7, 0x70(%edx)
-	lea	0x80(%edx), %edx
-	jae	L(large_page_loop)
-	cmp	$-0x40, %ecx
-	lea	0x80(%ecx), %ecx
-	jl	L(large_page_less_64bytes)
-
-	movdqu	(%eax), %xmm0
-	movdqu	0x10(%eax), %xmm1
-	movdqu	0x20(%eax), %xmm2
-	movdqu	0x30(%eax), %xmm3
-	lea	0x40(%eax), %eax
-
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm1, 0x10(%edx)
-	movntdq	%xmm2, 0x20(%edx)
-	movntdq	%xmm3, 0x30(%edx)
-	lea	0x40(%edx), %edx
-	sub	$0x40, %ecx
-L(large_page_less_64bytes):
-	cmp	$32, %ecx
-	jb	L(large_page_less_32bytes)
-	movdqu	(%eax), %xmm0
-	movdqu	0x10(%eax), %xmm1
-	lea	0x20(%eax), %eax
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm1, 0x10(%edx)
-	lea	0x20(%edx), %edx
-	sub	$0x20, %ecx
-L(large_page_less_32bytes):
-	add	%ecx, %edx
-	add	%ecx, %eax
-	sfence
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
-	.p2align 4
-L(bk_write_44bytes):
-	movq	36(%eax), %xmm0
-	movq	%xmm0, 36(%edx)
-L(bk_write_36bytes):
-	movq	28(%eax), %xmm0
-	movq	%xmm0, 28(%edx)
-L(bk_write_28bytes):
-	movq	20(%eax), %xmm0
-	movq	%xmm0, 20(%edx)
-L(bk_write_20bytes):
-	movq	12(%eax), %xmm0
-	movq	%xmm0, 12(%edx)
-L(bk_write_12bytes):
-	movq	4(%eax), %xmm0
-	movq	%xmm0, 4(%edx)
-L(bk_write_4bytes):
-	movl	(%eax), %ecx
-	movl	%ecx, (%edx)
-L(bk_write_0bytes):
-# ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#  ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(bk_write_40bytes):
-	movq	32(%eax), %xmm0
-	movq	%xmm0, 32(%edx)
-L(bk_write_32bytes):
-	movq	24(%eax), %xmm0
-	movq	%xmm0, 24(%edx)
-L(bk_write_24bytes):
-	movq	16(%eax), %xmm0
-	movq	%xmm0, 16(%edx)
-L(bk_write_16bytes):
-	movq	8(%eax), %xmm0
-	movq	%xmm0, 8(%edx)
-L(bk_write_8bytes):
-	movq	(%eax), %xmm0
-	movq	%xmm0, (%edx)
-# ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#  ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(bk_write_45bytes):
-	movq	37(%eax), %xmm0
-	movq	%xmm0, 37(%edx)
-L(bk_write_37bytes):
-	movq	29(%eax), %xmm0
-	movq	%xmm0, 29(%edx)
-L(bk_write_29bytes):
-	movq	21(%eax), %xmm0
-	movq	%xmm0, 21(%edx)
-L(bk_write_21bytes):
-	movq	13(%eax), %xmm0
-	movq	%xmm0, 13(%edx)
-L(bk_write_13bytes):
-	movq	5(%eax), %xmm0
-	movq	%xmm0, 5(%edx)
-L(bk_write_5bytes):
-	movl	1(%eax), %ecx
-	movl	%ecx, 1(%edx)
-L(bk_write_1bytes):
-	movzbl	(%eax), %ecx
-	movb	%cl, (%edx)
-# ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#  ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(bk_write_41bytes):
-	movq	33(%eax), %xmm0
-	movq	%xmm0, 33(%edx)
-L(bk_write_33bytes):
-	movq	25(%eax), %xmm0
-	movq	%xmm0, 25(%edx)
-L(bk_write_25bytes):
-	movq	17(%eax), %xmm0
-	movq	%xmm0, 17(%edx)
-L(bk_write_17bytes):
-	movq	9(%eax), %xmm0
-	movq	%xmm0, 9(%edx)
-L(bk_write_9bytes):
-	movq	1(%eax), %xmm0
-	movq	%xmm0, 1(%edx)
-	movzbl	(%eax), %ecx
-	movb	%cl, (%edx)
-# ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#  ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(bk_write_46bytes):
-	movq	38(%eax), %xmm0
-	movq	%xmm0, 38(%edx)
-L(bk_write_38bytes):
-	movq	30(%eax), %xmm0
-	movq	%xmm0, 30(%edx)
-L(bk_write_30bytes):
-	movq	22(%eax), %xmm0
-	movq	%xmm0, 22(%edx)
-L(bk_write_22bytes):
-	movq	14(%eax), %xmm0
-	movq	%xmm0, 14(%edx)
-L(bk_write_14bytes):
-	movq	6(%eax), %xmm0
-	movq	%xmm0, 6(%edx)
-L(bk_write_6bytes):
-	movl	2(%eax), %ecx
-	movl	%ecx, 2(%edx)
-	movzwl	(%eax), %ecx
-	movw	%cx, (%edx)
-# ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#  ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(bk_write_42bytes):
-	movq	34(%eax), %xmm0
-	movq	%xmm0, 34(%edx)
-L(bk_write_34bytes):
-	movq	26(%eax), %xmm0
-	movq	%xmm0, 26(%edx)
-L(bk_write_26bytes):
-	movq	18(%eax), %xmm0
-	movq	%xmm0, 18(%edx)
-L(bk_write_18bytes):
-	movq	10(%eax), %xmm0
-	movq	%xmm0, 10(%edx)
-L(bk_write_10bytes):
-	movq	2(%eax), %xmm0
-	movq	%xmm0, 2(%edx)
-L(bk_write_2bytes):
-	movzwl	(%eax), %ecx
-	movw	%cx, (%edx)
-# ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#  ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(bk_write_47bytes):
-	movq	39(%eax), %xmm0
-	movq	%xmm0, 39(%edx)
-L(bk_write_39bytes):
-	movq	31(%eax), %xmm0
-	movq	%xmm0, 31(%edx)
-L(bk_write_31bytes):
-	movq	23(%eax), %xmm0
-	movq	%xmm0, 23(%edx)
-L(bk_write_23bytes):
-	movq	15(%eax), %xmm0
-	movq	%xmm0, 15(%edx)
-L(bk_write_15bytes):
-	movq	7(%eax), %xmm0
-	movq	%xmm0, 7(%edx)
-L(bk_write_7bytes):
-	movl	3(%eax), %ecx
-	movl	%ecx, 3(%edx)
-	movzwl	1(%eax), %ecx
-	movw	%cx, 1(%edx)
-	movzbl	(%eax), %eax
-	movb	%al, (%edx)
-# ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#  ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#  endif
-# endif
-	RETURN
-
-	.p2align 4
-L(bk_write_43bytes):
-	movq	35(%eax), %xmm0
-	movq	%xmm0, 35(%edx)
-L(bk_write_35bytes):
-	movq	27(%eax), %xmm0
-	movq	%xmm0, 27(%edx)
-L(bk_write_27bytes):
-	movq	19(%eax), %xmm0
-	movq	%xmm0, 19(%edx)
-L(bk_write_19bytes):
-	movq	11(%eax), %xmm0
-	movq	%xmm0, 11(%edx)
-L(bk_write_11bytes):
-	movq	3(%eax), %xmm0
-	movq	%xmm0, 3(%edx)
-L(bk_write_3bytes):
-	movzwl	1(%eax), %ecx
-	movw	%cx, 1(%edx)
-	movzbl	(%eax), %eax
-	movb	%al, (%edx)
-# ifndef USE_AS_BCOPY
-	movl	DEST(%esp), %eax
-#  ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#  endif
-# endif
-	RETURN_END
-
-
-	.pushsection .rodata.ssse3,"a",@progbits
-	.p2align 2
-L(table_48bytes_fwd):
-	.int	JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
-
-	.p2align 2
-L(table_48bytes_fwd_align):
-	.int	JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align))
-
-	.p2align 2
-L(shl_table):
-	.int	JMPTBL (L(shl_0), L(shl_table))
-	.int	JMPTBL (L(shl_1), L(shl_table))
-	.int	JMPTBL (L(shl_2), L(shl_table))
-	.int	JMPTBL (L(shl_3), L(shl_table))
-	.int	JMPTBL (L(shl_4), L(shl_table))
-	.int	JMPTBL (L(shl_5), L(shl_table))
-	.int	JMPTBL (L(shl_6), L(shl_table))
-	.int	JMPTBL (L(shl_7), L(shl_table))
-	.int	JMPTBL (L(shl_8), L(shl_table))
-	.int	JMPTBL (L(shl_9), L(shl_table))
-	.int	JMPTBL (L(shl_10), L(shl_table))
-	.int	JMPTBL (L(shl_11), L(shl_table))
-	.int	JMPTBL (L(shl_12), L(shl_table))
-	.int	JMPTBL (L(shl_13), L(shl_table))
-	.int	JMPTBL (L(shl_14), L(shl_table))
-	.int	JMPTBL (L(shl_15), L(shl_table))
-
-	.p2align 2
-L(table_48_bytes_bwd):
-	.int	JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
-
-	.popsection
-
-# ifdef USE_AS_MEMMOVE
-	.p2align 4
-L(copy_backward):
-	PUSH (%edi)
-	movl	%eax, %edi
-	lea	(%ecx,%edx,1),%edx
-	lea	(%ecx,%edi,1),%edi
-	testl	$0x3, %edx
-	jnz	L(bk_align)
-
-L(bk_aligned_4):
-	cmp	$64, %ecx
-	jae	L(bk_write_more64bytes)
-
-L(bk_write_64bytesless):
-	cmp	$32, %ecx
-	jb	L(bk_write_less32bytes)
-
-L(bk_write_more32bytes):
-	/* Copy 32 bytes at a time.  */
-	sub	$32, %ecx
-	movq	-8(%edi), %xmm0
-	movq	%xmm0, -8(%edx)
-	movq	-16(%edi), %xmm0
-	movq	%xmm0, -16(%edx)
-	movq	-24(%edi), %xmm0
-	movq	%xmm0, -24(%edx)
-	movq	-32(%edi), %xmm0
-	movq	%xmm0, -32(%edx)
-	sub	$32, %edx
-	sub	$32, %edi
-
-L(bk_write_less32bytes):
-	movl	%edi, %eax
-	sub	%ecx, %edx
-	sub	%ecx, %eax
-	POP (%edi)
-L(bk_write_less32bytes_2):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(bk_align):
-	cmp	$8, %ecx
-	jbe	L(bk_write_less32bytes)
-	testl	$1, %edx
-	/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
-	then	(EDX & 2) must be != 0.  */
-	jz	L(bk_got2)
-	sub	$1, %edi
-	sub	$1, %ecx
-	sub	$1, %edx
-	movzbl	(%edi), %eax
-	movb	%al, (%edx)
-
-	testl	$2, %edx
-	jz	L(bk_aligned_4)
-
-L(bk_got2):
-	sub	$2, %edi
-	sub	$2, %ecx
-	sub	$2, %edx
-	movzwl	(%edi), %eax
-	movw	%ax, (%edx)
-	jmp	L(bk_aligned_4)
-
-	.p2align 4
-L(bk_write_more64bytes):
-	/* Check alignment of last byte.  */
-	testl	$15, %edx
-	jz	L(bk_ssse3_cpy_pre)
-
-/* EDX is aligned 4 bytes, but not 16 bytes.  */
-L(bk_ssse3_align):
-	sub	$4, %edi
-	sub	$4, %ecx
-	sub	$4, %edx
-	movl	(%edi), %eax
-	movl	%eax, (%edx)
-
-	testl	$15, %edx
-	jz	L(bk_ssse3_cpy_pre)
-
-	sub	$4, %edi
-	sub	$4, %ecx
-	sub	$4, %edx
-	movl	(%edi), %eax
-	movl	%eax, (%edx)
-
-	testl	$15, %edx
-	jz	L(bk_ssse3_cpy_pre)
-
-	sub	$4, %edi
-	sub	$4, %ecx
-	sub	$4, %edx
-	movl	(%edi), %eax
-	movl	%eax, (%edx)
-
-L(bk_ssse3_cpy_pre):
-	cmp	$64, %ecx
-	jb	L(bk_write_more32bytes)
-
-	.p2align 4
-L(bk_ssse3_cpy):
-	sub	$64, %edi
-	sub	$64, %ecx
-	sub	$64, %edx
-	movdqu	0x30(%edi), %xmm3
-	movdqa	%xmm3, 0x30(%edx)
-	movdqu	0x20(%edi), %xmm2
-	movdqa	%xmm2, 0x20(%edx)
-	movdqu	0x10(%edi), %xmm1
-	movdqa	%xmm1, 0x10(%edx)
-	movdqu	(%edi), %xmm0
-	movdqa	%xmm0, (%edx)
-	cmp	$64, %ecx
-	jae	L(bk_ssse3_cpy)
-	jmp	L(bk_write_64bytesless)
-
-# endif
-
-END (MEMCPY)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S
deleted file mode 100644
index f725944620..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/* Multiple versions of memcpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  In static binaries we need memcpy before the initialization
-   happened.  */
-#if defined SHARED && IS_IN (libc)
-	.text
-ENTRY(memcpy)
-	.type	memcpy, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__memcpy_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memcpy_sse2_unaligned)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memcpy_ssse3)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memcpy_ssse3_rep)
-2:	ret
-END(memcpy)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __memcpy_ia32, @function; \
-	.p2align 4; \
-	.globl __memcpy_ia32; \
-	.hidden __memcpy_ia32; \
-	__memcpy_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __memcpy_ia32, .-__memcpy_ia32
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
-	.type __memcpy_chk_ia32, @function; \
-	.globl __memcpy_chk_ia32; \
-	.p2align 4; \
-	__memcpy_chk_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
-	cfi_endproc; .size __memcpy_chk_ia32, .-__memcpy_chk_ia32
-
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_memcpy; __GI_memcpy = __memcpy_ia32
-#endif
-
-#include "../memcpy.S"
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S
deleted file mode 100644
index 1b4fbe2e6f..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy_chk.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* Multiple versions of __memcpy_chk
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  There are no multiarch memcpy functions for static binaries.
- */
-#if IS_IN (libc)
-# ifdef SHARED
-	.text
-ENTRY(__memcpy_chk)
-	.type	__memcpy_chk, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__memcpy_chk_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memcpy_chk_sse2_unaligned)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3_rep)
-2:	ret
-END(__memcpy_chk)
-# else
-#  include "../memcpy_chk.S"
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S
deleted file mode 100644
index 3873594cb2..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define MEMCPY		__memmove_sse2_unaligned
-#define MEMCPY_CHK	__memmove_chk_sse2_unaligned
-#include "memcpy-sse2-unaligned.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S
deleted file mode 100644
index d202fc4a13..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define MEMCPY		__memmove_ssse3_rep
-#define MEMCPY_CHK	__memmove_chk_ssse3_rep
-#include "memcpy-ssse3-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove-ssse3.S b/sysdeps/i386/i686/multiarch/memmove-ssse3.S
deleted file mode 100644
index 295430b1ef..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define MEMCPY		__memmove_ssse3
-#define MEMCPY_CHK	__memmove_chk_ssse3
-#include "memcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S
deleted file mode 100644
index 6eb418ca7f..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Multiple versions of memmove
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#if IS_IN (libc)
-	.text
-ENTRY(memmove)
-	.type	memmove, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__memmove_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memmove_sse2_unaligned)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memmove_ssse3)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memmove_ssse3_rep)
-2:	ret
-END(memmove)
-
-# ifdef SHARED
-#  undef ENTRY
-#  define ENTRY(name) \
-	.type __memmove_ia32, @function; \
-	.p2align 4; \
-	.globl __memmove_ia32; \
-	.hidden __memmove_ia32; \
-	__memmove_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# else
-#  undef ENTRY
-#  define ENTRY(name) \
-	.type __memmove_ia32, @function; \
-	.globl __memmove_ia32; \
-	.p2align 4; \
-	__memmove_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# endif
-
-# undef END
-# define END(name) \
-	cfi_endproc; .size __memmove_ia32, .-__memmove_ia32
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
-	.type __memmove_chk_ia32, @function; \
-	.globl __memmove_chk_ia32; \
-	.p2align 4; \
-	__memmove_chk_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
-	cfi_endproc; .size __memmove_chk_ia32, .-__memmove_chk_ia32
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_memmove; __GI_memmove = __memmove_ia32
-# endif
-#endif
-
-#include "../memmove.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S
deleted file mode 100644
index 314834c4c6..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove_chk.S
+++ /dev/null
@@ -1,94 +0,0 @@
-/* Multiple versions of __memmove_chk
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#if IS_IN (libc)
-	.text
-ENTRY(__memmove_chk)
-	.type	__memmove_chk, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__memmove_chk_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memmove_chk_sse2_unaligned)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3_rep)
-2:	ret
-END(__memmove_chk)
-
-# ifndef SHARED
-	.type __memmove_chk_sse2_unaligned, @function
-	.p2align 4;
-__memmove_chk_sse2_unaligned:
-	cfi_startproc
-	CALL_MCOUNT
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	__chk_fail
-	jmp	__memmove_sse2_unaligned
-	cfi_endproc
-	.size __memmove_chk_sse2_unaligned, .-__memmove_chk_sse2_unaligned
-
-	.type __memmove_chk_ssse3, @function
-	.p2align 4;
-__memmove_chk_ssse3:
-	cfi_startproc
-	CALL_MCOUNT
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	__chk_fail
-	jmp	__memmove_ssse3
-	cfi_endproc
-	.size __memmove_chk_ssse3, .-__memmove_chk_ssse3
-
-	.type __memmove_chk_ssse3_rep, @function
-	.p2align 4;
-__memmove_chk_ssse3_rep:
-	cfi_startproc
-	CALL_MCOUNT
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	__chk_fail
-	jmp	__memmove_ssse3_rep
-	cfi_endproc
-	.size __memmove_chk_ssse3_rep, .-__memmove_chk_ssse3_rep
-
-	.type __memmove_chk_ia32, @function
-	.p2align 4;
-__memmove_chk_ia32:
-	cfi_startproc
-	CALL_MCOUNT
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	__chk_fail
-	jmp	__memmove_ia32
-	cfi_endproc
-	.size __memmove_chk_ia32, .-__memmove_chk_ia32
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S
deleted file mode 100644
index a1cea50771..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY		__mempcpy_sse2_unaligned
-#define MEMCPY_CHK	__mempcpy_chk_sse2_unaligned
-#include "memcpy-sse2-unaligned.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S
deleted file mode 100644
index 5357b33e18..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY		__mempcpy_ssse3_rep
-#define MEMCPY_CHK	__mempcpy_chk_ssse3_rep
-#include "memcpy-ssse3-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S b/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S
deleted file mode 100644
index 822d98e954..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY		__mempcpy_ssse3
-#define MEMCPY_CHK	__mempcpy_chk_ssse3
-#include "memcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S
deleted file mode 100644
index 06e377fbc9..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy.S
+++ /dev/null
@@ -1,81 +0,0 @@
-/* Multiple versions of mempcpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  In static binaries we need mempcpy before the initialization
-   happened.  */
-#if defined SHARED && IS_IN (libc)
-	.text
-ENTRY(__mempcpy)
-	.type	__mempcpy, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__mempcpy_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__mempcpy_sse2_unaligned)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__mempcpy_ssse3)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__mempcpy_ssse3_rep)
-2:	ret
-END(__mempcpy)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __mempcpy_ia32, @function; \
-	.p2align 4; \
-	.globl __mempcpy_ia32; \
-	.hidden __mempcpy_ia32; \
-	__mempcpy_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __mempcpy_ia32, .-__mempcpy_ia32
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
-	.type __mempcpy_chk_ia32, @function; \
-	.globl __mempcpy_chk_ia32; \
-	.p2align 4; \
-	__mempcpy_chk_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
-	cfi_endproc; .size __mempcpy_chk_ia32, .-__mempcpy_chk_ia32
-
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-# define libc_hidden_def(name) \
-	.globl __GI_mempcpy; __GI_mempcpy = __mempcpy_ia32
-# define libc_hidden_builtin_def(name) \
-	.globl __GI___mempcpy; __GI___mempcpy = __mempcpy_ia32
-#endif
-
-#include "../mempcpy.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
deleted file mode 100644
index e13e5248a5..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* Multiple versions of __mempcpy_chk
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  There are no multiarch mempcpy functions for static binaries.
- */
-#if IS_IN (libc)
-# ifdef SHARED
-	.text
-ENTRY(__mempcpy_chk)
-	.type	__mempcpy_chk, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__mempcpy_chk_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__mempcpy_chk_sse2_unaligned)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3_rep)
-2:	ret
-END(__mempcpy_chk)
-# else
-#  include "../mempcpy_chk.S"
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c
deleted file mode 100644
index ef7bbbe792..0000000000
--- a/sysdeps/i386/i686/multiarch/memrchr-c.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#if IS_IN (libc)
-# define MEMRCHR  __memrchr_ia32
-# include <string.h>
-extern void *__memrchr_ia32 (const void *, int, size_t);
-#endif
-
-#include "string/memrchr.c"
diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
deleted file mode 100644
index dbbe94fd08..0000000000
--- a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
+++ /dev/null
@@ -1,417 +0,0 @@
-/* Optimized memrchr with sse2
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS  4
-# define STR1  PARMS
-# define STR2  STR1+4
-# define LEN   STR2+4
-
-# define MEMCHR __memrchr_sse2_bsf
-
-	.text
-ENTRY (MEMCHR)
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-	mov	LEN(%esp), %edx
-
-	sub	$16, %edx
-	jbe	L(length_less16)
-
-	punpcklbw %xmm1, %xmm1
-	add	%edx, %ecx
-	punpcklbw %xmm1, %xmm1
-
-	movdqu	(%ecx), %xmm0
-	pshufd	$0, %xmm1, %xmm1
-	pcmpeqb	%xmm1, %xmm0
-
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches0)
-
-	sub	$64, %ecx
-	mov	%ecx, %eax
-	and	$15, %eax
-	jz	L(loop_prolog)
-
-	add	$16, %ecx
-	add	$16, %edx
-	sub	%eax, %ecx
-	sub	%eax, %edx
-
-	.p2align 4
-/* Loop start on aligned string.  */
-L(loop_prolog):
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	(%ecx), %xmm4
-	pcmpeqb	%xmm1, %xmm4
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(matches0)
-
-	sub	$64, %ecx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches0)
-
-	mov	%ecx, %eax
-	and	$63, %eax
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	add	$64, %ecx
-	add	$64, %edx
-	sub	%eax, %ecx
-	sub	%eax, %edx
-
-	.p2align 4
-L(align64_loop):
-	sub	$64, %ecx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	(%ecx), %xmm0
-	movdqa	16(%ecx), %xmm2
-	movdqa	32(%ecx), %xmm3
-	movdqa	48(%ecx), %xmm4
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
-
-	pmaxub	%xmm3, %xmm0
-	pmaxub	%xmm4, %xmm2
-	pmaxub	%xmm0, %xmm2
-	pmovmskb %xmm2, %eax
-
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm2
-
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	(%ecx), %xmm1
-
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	pmovmskb %xmm1, %eax
-	bsr	%eax, %eax
-
-	add	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_loop):
-	add	$64, %edx
-	cmp	$32, %edx
-	jbe	L(exit_loop_32)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16_1)
-	cmp	$48, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	(%ecx), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches0_1)
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(exit_loop_32):
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48_1)
-	cmp	$16, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	32(%ecx), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches32_1)
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(matches0):
-	bsr	%eax, %eax
-	add	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches16):
-	bsr	%eax, %eax
-	lea	16(%eax, %ecx), %eax
-	ret
-
-	.p2align 4
-L(matches32):
-	bsr	%eax, %eax
-	lea	32(%eax, %ecx), %eax
-	ret
-
-	.p2align 4
-L(matches48):
-	bsr	%eax, %eax
-	lea	48(%eax, %ecx), %eax
-	ret
-
-	.p2align 4
-L(matches0_1):
-	bsr	%eax, %eax
-	sub	$64, %edx
-	add	%eax, %edx
-	jl	L(return_null)
-	add	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches16_1):
-	bsr	%eax, %eax
-	sub	$48, %edx
-	add	%eax, %edx
-	jl	L(return_null)
-	lea	16(%ecx, %eax), %eax
-	ret
-
-	.p2align 4
-L(matches32_1):
-	bsr	%eax, %eax
-	sub	$32, %edx
-	add	%eax, %edx
-	jl	L(return_null)
-	lea	32(%ecx, %eax), %eax
-	ret
-
-	.p2align 4
-L(matches48_1):
-	bsr	%eax, %eax
-	sub	$16, %edx
-	add	%eax, %edx
-	jl	L(return_null)
-	lea	48(%ecx, %eax), %eax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(length_less16_offset0):
-	mov	%dl, %cl
-	pcmpeqb	(%eax), %xmm1
-
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-	mov	%edx, %ecx
-
-	pmovmskb %xmm1, %edx
-
-	and	%ecx, %edx
-	test	%edx, %edx
-	jz	L(return_null)
-
-	bsr	%edx, %ecx
-	add	%ecx, %eax
-	ret
-
-	.p2align 4
-L(length_less16):
-	punpcklbw %xmm1, %xmm1
-	mov	%ecx, %eax
-	punpcklbw %xmm1, %xmm1
-	add	$16, %edx
-	jz	L(return_null)
-
-	pshufd	$0, %xmm1, %xmm1
-	and	$15, %ecx
-	jz	L(length_less16_offset0)
-
-	PUSH	(%edi)
-	mov	%cl, %dh
-	add	%dl, %dh
-	and	$-16, %eax
-
-	sub	$16, %dh
-	ja	L(length_less16_part2)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edi
-
-	sar	%cl, %edi
-	add	%ecx, %eax
-	mov	%dl, %cl
-
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	and	%edx, %edi
-	test	%edi, %edi
-	jz	L(ret_null)
-
-	bsr	%edi, %edi
-	add	%edi, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(length_less16_part2):
-	movdqa	16(%eax), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %edi
-
-	mov	%cl, %ch
-
-	mov	%dh, %cl
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	and	%edx, %edi
-
-	test	%edi, %edi
-	jnz	L(length_less16_part2_return)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edi
-
-	mov	%ch, %cl
-	sar	%cl, %edi
-	test	%edi, %edi
-	jz	L(ret_null)
-
-	bsr	%edi, %edi
-	add	%edi, %eax
-	xor	%ch, %ch
-	add	%ecx, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(length_less16_part2_return):
-	bsr	%edi, %edi
-	lea	16(%eax, %edi), %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ret_null):
-	xor	%eax, %eax
-	POP	(%edi)
-	ret
-
-END (MEMCHR)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S
deleted file mode 100644
index 5f7853f683..0000000000
--- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S
+++ /dev/null
@@ -1,724 +0,0 @@
-/* Optimized memrchr with sse2 without bsf
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS  4
-# define STR1  PARMS
-# define STR2  STR1+4
-# define LEN   STR2+4
-
-	atom_text_section
-ENTRY (__memrchr_sse2)
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-	mov	LEN(%esp), %edx
-
-	sub	$16, %edx
-	jbe	L(length_less16)
-
-	punpcklbw %xmm1, %xmm1
-	add	%edx, %ecx
-	punpcklbw %xmm1, %xmm1
-
-	movdqu	(%ecx), %xmm0
-	pshufd	$0, %xmm1, %xmm1
-	pcmpeqb	%xmm1, %xmm0
-
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	sub	$64, %ecx
-	mov	%ecx, %eax
-	and	$15, %eax
-	jz	L(loop_prolog)
-
-	lea	16(%ecx), %ecx
-	lea	16(%edx), %edx
-	sub	%eax, %edx
-	and	$-16, %ecx
-
-	.p2align 4
-/* Loop start on aligned string.  */
-L(loop_prolog):
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	(%ecx), %xmm4
-	pcmpeqb	%xmm1, %xmm4
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	sub	$64, %ecx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	mov	%ecx, %eax
-	and	$63, %eax
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	lea	64(%ecx), %ecx
-	lea	64(%edx), %edx
-	and	$-64, %ecx
-	sub	%eax, %edx
-
-	.p2align 4
-L(align64_loop):
-	sub	$64, %ecx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	(%ecx), %xmm0
-	movdqa	16(%ecx), %xmm2
-	movdqa	32(%ecx), %xmm3
-	movdqa	48(%ecx), %xmm4
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
-
-	pmaxub	%xmm3, %xmm0
-	pmaxub	%xmm4, %xmm2
-	pmaxub	%xmm0, %xmm2
-	pmovmskb %xmm2, %eax
-
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm2
-
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	(%ecx), %xmm1
-
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	pmovmskb %xmm1, %eax
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_loop):
-	add	$64, %edx
-	cmp	$32, %edx
-	jbe	L(exit_loop_32)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16_1)
-	cmp	$48, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	(%ecx), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches0_1)
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(exit_loop_32):
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48_1)
-	cmp	$16, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	32(%ecx), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches32_1)
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(matches16):
-	lea	16(%ecx), %ecx
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches32):
-	lea	32(%ecx), %ecx
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches48):
-	lea	48(%ecx), %ecx
-
-	.p2align 4
-L(exit_dispatch):
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_8):
-	test	$0x80, %al
-	jnz	L(exit_8)
-	test	$0x40, %al
-	jnz	L(exit_7)
-	test	$0x20, %al
-	jnz	L(exit_6)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_high):
-	mov	%ah, %dh
-	and	$15 << 4, %dh
-	jnz	L(exit_dispatch_high_8)
-	test	$0x08, %ah
-	jnz	L(exit_12)
-	test	$0x04, %ah
-	jnz	L(exit_11)
-	test	$0x02, %ah
-	jnz	L(exit_10)
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_high_8):
-	test	$0x80, %ah
-	jnz	L(exit_16)
-	test	$0x40, %ah
-	jnz	L(exit_15)
-	test	$0x20, %ah
-	jnz	L(exit_14)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_2):
-	lea	1(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_3):
-	lea	2(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_4):
-	lea	3(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_6):
-	lea	5(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_7):
-	lea	6(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_8):
-	lea	7(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_10):
-	lea	9(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_11):
-	lea	10(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_12):
-	lea	11(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_14):
-	lea	13(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_15):
-	lea	14(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_16):
-	lea	15(%ecx), %eax
-	ret
-
-	.p2align 4
-L(matches0_1):
-	lea	-64(%edx), %edx
-
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches16_1):
-	lea	-48(%edx), %edx
-	lea	16(%ecx), %ecx
-
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches32_1):
-	lea	-32(%edx), %edx
-	lea	32(%ecx), %ecx
-
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches48_1):
-	lea	-16(%edx), %edx
-	lea	48(%ecx), %ecx
-
-	.p2align 4
-L(exit_dispatch_1):
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_1_8):
-	test	$0x80, %al
-	jnz	L(exit_1_8)
-	test	$0x40, %al
-	jnz	L(exit_1_7)
-	test	$0x20, %al
-	jnz	L(exit_1_6)
-	add	$4, %edx
-	jl	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_1_high):
-	mov	%ah, %al
-	and	$15 << 4, %al
-	jnz	L(exit_dispatch_1_high_8)
-	test	$0x08, %ah
-	jnz	L(exit_1_12)
-	test	$0x04, %ah
-	jnz	L(exit_1_11)
-	test	$0x02, %ah
-	jnz	L(exit_1_10)
-	add	$8, %edx
-	jl	L(return_null)
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_1_high_8):
-	test	$0x80, %ah
-	jnz	L(exit_1_16)
-	test	$0x40, %ah
-	jnz	L(exit_1_15)
-	test	$0x20, %ah
-	jnz	L(exit_1_14)
-	add	$12, %edx
-	jl	L(return_null)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_2):
-	add	$1, %edx
-	jl	L(return_null)
-	lea	1(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_3):
-	add	$2, %edx
-	jl	L(return_null)
-	lea	2(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_4):
-	add	$3, %edx
-	jl	L(return_null)
-	lea	3(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_6):
-	add	$5, %edx
-	jl	L(return_null)
-	lea	5(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_7):
-	add	$6, %edx
-	jl	L(return_null)
-	lea	6(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_8):
-	add	$7, %edx
-	jl	L(return_null)
-	lea	7(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_10):
-	add	$9, %edx
-	jl	L(return_null)
-	lea	9(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_11):
-	add	$10, %edx
-	jl	L(return_null)
-	lea	10(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_12):
-	add	$11, %edx
-	jl	L(return_null)
-	lea	11(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_14):
-	add	$13, %edx
-	jl	L(return_null)
-	lea	13(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_15):
-	add	$14, %edx
-	jl	L(return_null)
-	lea	14(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_16):
-	add	$15, %edx
-	jl	L(return_null)
-	lea	15(%ecx), %eax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(length_less16_offset0):
-	mov	%dl, %cl
-	pcmpeqb	(%eax), %xmm1
-
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	mov	%eax, %ecx
-	pmovmskb %xmm1, %eax
-
-	and	%edx, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(length_less16):
-	punpcklbw %xmm1, %xmm1
-	add	$16, %edx
-	je	L(return_null)
-	punpcklbw %xmm1, %xmm1
-
-	mov	%ecx, %eax
-	pshufd	$0, %xmm1, %xmm1
-
-	and	$15, %ecx
-	jz	L(length_less16_offset0)
-
-	PUSH	(%edi)
-
-	mov	%cl, %dh
-	add	%dl, %dh
-	and	$-16, %eax
-
-	sub	$16, %dh
-	ja	L(length_less16_part2)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edi
-
-	sar	%cl, %edi
-	add	%ecx, %eax
-	mov	%dl, %cl
-
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	and	%edx, %edi
-	test	%edi, %edi
-	jz	L(ret_null)
-
-	bsr	%edi, %edi
-	add	%edi, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(length_less16_part2):
-	movdqa	16(%eax), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %edi
-
-	mov	%cl, %ch
-
-	mov	%dh, %cl
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	and	%edx, %edi
-
-	test	%edi, %edi
-	jnz	L(length_less16_part2_return)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edi
-
-	mov	%ch, %cl
-	sar	%cl, %edi
-	test	%edi, %edi
-	jz	L(ret_null)
-
-	bsr	%edi, %edi
-	add	%edi, %eax
-	xor	%ch, %ch
-	add	%ecx, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(length_less16_part2_return):
-	bsr	%edi, %edi
-	lea	16(%eax, %edi), %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ret_null):
-	xor	%eax, %eax
-	POP	(%edi)
-	ret
-
-END (__memrchr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memrchr.S b/sysdeps/i386/i686/multiarch/memrchr.S
deleted file mode 100644
index d4253a553b..0000000000
--- a/sysdeps/i386/i686/multiarch/memrchr.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Multiple versions of memrchr
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
-	.text
-ENTRY(__memrchr)
-	.type	__memrchr, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	HAS_ARCH_FEATURE (Slow_BSF)
-	jz	3f
-
-	LOAD_FUNC_GOT_EAX (__memrchr_sse2)
-	ret
-
-2:	LOAD_FUNC_GOT_EAX (__memrchr_ia32)
-	ret
-
-3:	LOAD_FUNC_GOT_EAX (__memrchr_sse2_bsf)
-	ret
-END(__memrchr)
-
-weak_alias(__memrchr, memrchr)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
deleted file mode 100644
index 3221077e49..0000000000
--- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
+++ /dev/null
@@ -1,811 +0,0 @@
-/* memset with SSE2 and REP string.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_BZERO
-# define DEST		PARMS
-# define LEN		DEST+4
-# define SETRTNVAL
-#else
-# define DEST		PARMS
-# define CHR		DEST+4
-# define LEN		CHR+4
-# define SETRTNVAL	movl DEST(%esp), %eax
-#endif
-
-#ifdef SHARED
-# define ENTRANCE	PUSH (%ebx);
-# define RETURN_END	POP (%ebx); ret
-# define RETURN		RETURN_END; CFI_PUSH (%ebx)
-# define PARMS		8		/* Preserve EBX.  */
-# define JMPTBL(I, B)	I - B
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-   jump table with relative offsets.   */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
-    /* We first load PC into EBX.  */				\
-    SETUP_PIC_REG(bx);						\
-    /* Get the address of the jump table.  */			\
-    add		$(TABLE - .), %ebx;				\
-    /* Get the entry and convert the relative offset to the	\
-       absolute address.  */					\
-    add		(%ebx,%ecx,4), %ebx;				\
-    add		%ecx, %edx;					\
-    /* We loaded the jump table and adjusted EDX. Go.  */	\
-    jmp		*%ebx
-#else
-# define ENTRANCE
-# define RETURN_END	ret
-# define RETURN		RETURN_END
-# define PARMS		4
-# define JMPTBL(I, B)	I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-   absolute offsets.  */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
-    add		%ecx, %edx;					\
-    jmp		*TABLE(,%ecx,4)
-#endif
-
-	.section .text.sse2,"ax",@progbits
-#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
-ENTRY (__memset_chk_sse2_rep)
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END (__memset_chk_sse2_rep)
-#endif
-ENTRY (__memset_sse2_rep)
-	ENTRANCE
-
-	movl	LEN(%esp), %ecx
-#ifdef USE_AS_BZERO
-	xor	%eax, %eax
-#else
-	movzbl	CHR(%esp), %eax
-	movb	%al, %ah
-	/* Fill the whole EAX with pattern.  */
-	movl	%eax, %edx
-	shl	$16, %eax
-	or	%edx, %eax
-#endif
-	movl	DEST(%esp), %edx
-	cmp	$32, %ecx
-	jae	L(32bytesormore)
-
-L(write_less32bytes):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
-
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN (2)
-L(table_less_32bytes):
-	.int	JMPTBL (L(write_0bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_1bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_2bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_3bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_4bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_5bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_6bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_7bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_8bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_9bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_10bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_11bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_12bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_13bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_14bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_15bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_16bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_17bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_18bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_19bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_20bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_21bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_22bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_23bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_24bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_25bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_26bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_27bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_28bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_29bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_30bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_31bytes), L(table_less_32bytes))
-	.popsection
-
-	ALIGN (4)
-L(write_28bytes):
-	movl	%eax, -28(%edx)
-L(write_24bytes):
-	movl	%eax, -24(%edx)
-L(write_20bytes):
-	movl	%eax, -20(%edx)
-L(write_16bytes):
-	movl	%eax, -16(%edx)
-L(write_12bytes):
-	movl	%eax, -12(%edx)
-L(write_8bytes):
-	movl	%eax, -8(%edx)
-L(write_4bytes):
-	movl	%eax, -4(%edx)
-L(write_0bytes):
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(write_29bytes):
-	movl	%eax, -29(%edx)
-L(write_25bytes):
-	movl	%eax, -25(%edx)
-L(write_21bytes):
-	movl	%eax, -21(%edx)
-L(write_17bytes):
-	movl	%eax, -17(%edx)
-L(write_13bytes):
-	movl	%eax, -13(%edx)
-L(write_9bytes):
-	movl	%eax, -9(%edx)
-L(write_5bytes):
-	movl	%eax, -5(%edx)
-L(write_1bytes):
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(write_30bytes):
-	movl	%eax, -30(%edx)
-L(write_26bytes):
-	movl	%eax, -26(%edx)
-L(write_22bytes):
-	movl	%eax, -22(%edx)
-L(write_18bytes):
-	movl	%eax, -18(%edx)
-L(write_14bytes):
-	movl	%eax, -14(%edx)
-L(write_10bytes):
-	movl	%eax, -10(%edx)
-L(write_6bytes):
-	movl	%eax, -6(%edx)
-L(write_2bytes):
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(write_31bytes):
-	movl	%eax, -31(%edx)
-L(write_27bytes):
-	movl	%eax, -27(%edx)
-L(write_23bytes):
-	movl	%eax, -23(%edx)
-L(write_19bytes):
-	movl	%eax, -19(%edx)
-L(write_15bytes):
-	movl	%eax, -15(%edx)
-L(write_11bytes):
-	movl	%eax, -11(%edx)
-L(write_7bytes):
-	movl	%eax, -7(%edx)
-L(write_3bytes):
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-/* ECX > 32 and EDX is 4 byte aligned.  */
-L(32bytesormore):
-	/* Fill xmm0 with the pattern.  */
-#ifdef USE_AS_BZERO
-	pxor	%xmm0, %xmm0
-#else
-	movd	%eax, %xmm0
-	pshufd	$0, %xmm0, %xmm0
-#endif
-	testl	$0xf, %edx
-	jz	L(aligned_16)
-/* ECX > 32 and EDX is not 16 byte aligned.  */
-L(not_aligned_16):
-	movdqu	%xmm0, (%edx)
-	movl	%edx, %eax
-	and	$-16, %edx
-	add	$16, %edx
-	sub	%edx, %eax
-	add	%eax, %ecx
-	movd	%xmm0, %eax
-
-	ALIGN (4)
-L(aligned_16):
-	cmp	$128, %ecx
-	jae	L(128bytesormore)
-
-L(aligned_16_less128bytes):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	ALIGN (4)
-L(128bytesormore):
-	PUSH (%edi)
-#ifdef DATA_CACHE_SIZE
-	PUSH (%ebx)
-	mov	$DATA_CACHE_SIZE, %ebx
-#else
-# ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	mov	__x86_data_cache_size@GOTOFF(%ebx), %ebx
-# else
-	PUSH (%ebx)
-	mov	__x86_data_cache_size, %ebx
-# endif
-#endif
-	mov	%ebx, %edi
-	shr	$4, %ebx
-	sub	%ebx, %edi
-#if defined DATA_CACHE_SIZE || !defined SHARED
-	POP (%ebx)
-#endif
-/*
- * When data size approximate the end of L1 cache,
- * fast string will prefetch and combine data efficiently.
- */
-	cmp	%edi, %ecx
-	jae	L(128bytesormore_endof_L1)
-	subl	$128, %ecx
-L(128bytesormore_normal):
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jb	L(128bytesless_normal)
-
-
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jae	L(128bytesormore_normal)
-
-L(128bytesless_normal):
-	POP (%edi)
-	add	$128, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	CFI_PUSH (%edi)
-	ALIGN (4)
-L(128bytesormore_endof_L1):
-	mov	%edx, %edi
-	mov	%ecx, %edx
-	shr	$2, %ecx
-	and	$3, %edx
-	rep stosl
-	jz	L(copy_page_by_rep_exit)
-	cmp	$2, %edx
-	jb	L(copy_page_by_rep_left_1)
-	movw	%ax, (%edi)
-	add	$2, %edi
-	sub	$2, %edx
-	jz	L(copy_page_by_rep_exit)
-L(copy_page_by_rep_left_1):
-	movb	%al, (%edi)
-L(copy_page_by_rep_exit):
-	POP (%edi)
-	SETRTNVAL
-	RETURN
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN (2)
-L(table_16_128bytes):
-	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
-	.popsection
-
-	ALIGN (4)
-L(aligned_16_112bytes):
-	movdqa	%xmm0, -112(%edx)
-L(aligned_16_96bytes):
-	movdqa	%xmm0, -96(%edx)
-L(aligned_16_80bytes):
-	movdqa	%xmm0, -80(%edx)
-L(aligned_16_64bytes):
-	movdqa	%xmm0, -64(%edx)
-L(aligned_16_48bytes):
-	movdqa	%xmm0, -48(%edx)
-L(aligned_16_32bytes):
-	movdqa	%xmm0, -32(%edx)
-L(aligned_16_16bytes):
-	movdqa	%xmm0, -16(%edx)
-L(aligned_16_0bytes):
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_113bytes):
-	movdqa	%xmm0, -113(%edx)
-L(aligned_16_97bytes):
-	movdqa	%xmm0, -97(%edx)
-L(aligned_16_81bytes):
-	movdqa	%xmm0, -81(%edx)
-L(aligned_16_65bytes):
-	movdqa	%xmm0, -65(%edx)
-L(aligned_16_49bytes):
-	movdqa	%xmm0, -49(%edx)
-L(aligned_16_33bytes):
-	movdqa	%xmm0, -33(%edx)
-L(aligned_16_17bytes):
-	movdqa	%xmm0, -17(%edx)
-L(aligned_16_1bytes):
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_114bytes):
-	movdqa	%xmm0, -114(%edx)
-L(aligned_16_98bytes):
-	movdqa	%xmm0, -98(%edx)
-L(aligned_16_82bytes):
-	movdqa	%xmm0, -82(%edx)
-L(aligned_16_66bytes):
-	movdqa	%xmm0, -66(%edx)
-L(aligned_16_50bytes):
-	movdqa	%xmm0, -50(%edx)
-L(aligned_16_34bytes):
-	movdqa	%xmm0, -34(%edx)
-L(aligned_16_18bytes):
-	movdqa	%xmm0, -18(%edx)
-L(aligned_16_2bytes):
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_115bytes):
-	movdqa	%xmm0, -115(%edx)
-L(aligned_16_99bytes):
-	movdqa	%xmm0, -99(%edx)
-L(aligned_16_83bytes):
-	movdqa	%xmm0, -83(%edx)
-L(aligned_16_67bytes):
-	movdqa	%xmm0, -67(%edx)
-L(aligned_16_51bytes):
-	movdqa	%xmm0, -51(%edx)
-L(aligned_16_35bytes):
-	movdqa	%xmm0, -35(%edx)
-L(aligned_16_19bytes):
-	movdqa	%xmm0, -19(%edx)
-L(aligned_16_3bytes):
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_116bytes):
-	movdqa	%xmm0, -116(%edx)
-L(aligned_16_100bytes):
-	movdqa	%xmm0, -100(%edx)
-L(aligned_16_84bytes):
-	movdqa	%xmm0, -84(%edx)
-L(aligned_16_68bytes):
-	movdqa	%xmm0, -68(%edx)
-L(aligned_16_52bytes):
-	movdqa	%xmm0, -52(%edx)
-L(aligned_16_36bytes):
-	movdqa	%xmm0, -36(%edx)
-L(aligned_16_20bytes):
-	movdqa	%xmm0, -20(%edx)
-L(aligned_16_4bytes):
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_117bytes):
-	movdqa	%xmm0, -117(%edx)
-L(aligned_16_101bytes):
-	movdqa	%xmm0, -101(%edx)
-L(aligned_16_85bytes):
-	movdqa	%xmm0, -85(%edx)
-L(aligned_16_69bytes):
-	movdqa	%xmm0, -69(%edx)
-L(aligned_16_53bytes):
-	movdqa	%xmm0, -53(%edx)
-L(aligned_16_37bytes):
-	movdqa	%xmm0, -37(%edx)
-L(aligned_16_21bytes):
-	movdqa	%xmm0, -21(%edx)
-L(aligned_16_5bytes):
-	movl	%eax, -5(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_118bytes):
-	movdqa	%xmm0, -118(%edx)
-L(aligned_16_102bytes):
-	movdqa	%xmm0, -102(%edx)
-L(aligned_16_86bytes):
-	movdqa	%xmm0, -86(%edx)
-L(aligned_16_70bytes):
-	movdqa	%xmm0, -70(%edx)
-L(aligned_16_54bytes):
-	movdqa	%xmm0, -54(%edx)
-L(aligned_16_38bytes):
-	movdqa	%xmm0, -38(%edx)
-L(aligned_16_22bytes):
-	movdqa	%xmm0, -22(%edx)
-L(aligned_16_6bytes):
-	movl	%eax, -6(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_119bytes):
-	movdqa	%xmm0, -119(%edx)
-L(aligned_16_103bytes):
-	movdqa	%xmm0, -103(%edx)
-L(aligned_16_87bytes):
-	movdqa	%xmm0, -87(%edx)
-L(aligned_16_71bytes):
-	movdqa	%xmm0, -71(%edx)
-L(aligned_16_55bytes):
-	movdqa	%xmm0, -55(%edx)
-L(aligned_16_39bytes):
-	movdqa	%xmm0, -39(%edx)
-L(aligned_16_23bytes):
-	movdqa	%xmm0, -23(%edx)
-L(aligned_16_7bytes):
-	movl	%eax, -7(%edx)
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_120bytes):
-	movdqa	%xmm0, -120(%edx)
-L(aligned_16_104bytes):
-	movdqa	%xmm0, -104(%edx)
-L(aligned_16_88bytes):
-	movdqa	%xmm0, -88(%edx)
-L(aligned_16_72bytes):
-	movdqa	%xmm0, -72(%edx)
-L(aligned_16_56bytes):
-	movdqa	%xmm0, -56(%edx)
-L(aligned_16_40bytes):
-	movdqa	%xmm0, -40(%edx)
-L(aligned_16_24bytes):
-	movdqa	%xmm0, -24(%edx)
-L(aligned_16_8bytes):
-	movq	%xmm0, -8(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_121bytes):
-	movdqa	%xmm0, -121(%edx)
-L(aligned_16_105bytes):
-	movdqa	%xmm0, -105(%edx)
-L(aligned_16_89bytes):
-	movdqa	%xmm0, -89(%edx)
-L(aligned_16_73bytes):
-	movdqa	%xmm0, -73(%edx)
-L(aligned_16_57bytes):
-	movdqa	%xmm0, -57(%edx)
-L(aligned_16_41bytes):
-	movdqa	%xmm0, -41(%edx)
-L(aligned_16_25bytes):
-	movdqa	%xmm0, -25(%edx)
-L(aligned_16_9bytes):
-	movq	%xmm0, -9(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_122bytes):
-	movdqa	%xmm0, -122(%edx)
-L(aligned_16_106bytes):
-	movdqa	%xmm0, -106(%edx)
-L(aligned_16_90bytes):
-	movdqa	%xmm0, -90(%edx)
-L(aligned_16_74bytes):
-	movdqa	%xmm0, -74(%edx)
-L(aligned_16_58bytes):
-	movdqa	%xmm0, -58(%edx)
-L(aligned_16_42bytes):
-	movdqa	%xmm0, -42(%edx)
-L(aligned_16_26bytes):
-	movdqa	%xmm0, -26(%edx)
-L(aligned_16_10bytes):
-	movq	%xmm0, -10(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_123bytes):
-	movdqa	%xmm0, -123(%edx)
-L(aligned_16_107bytes):
-	movdqa	%xmm0, -107(%edx)
-L(aligned_16_91bytes):
-	movdqa	%xmm0, -91(%edx)
-L(aligned_16_75bytes):
-	movdqa	%xmm0, -75(%edx)
-L(aligned_16_59bytes):
-	movdqa	%xmm0, -59(%edx)
-L(aligned_16_43bytes):
-	movdqa	%xmm0, -43(%edx)
-L(aligned_16_27bytes):
-	movdqa	%xmm0, -27(%edx)
-L(aligned_16_11bytes):
-	movq	%xmm0, -11(%edx)
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_124bytes):
-	movdqa	%xmm0, -124(%edx)
-L(aligned_16_108bytes):
-	movdqa	%xmm0, -108(%edx)
-L(aligned_16_92bytes):
-	movdqa	%xmm0, -92(%edx)
-L(aligned_16_76bytes):
-	movdqa	%xmm0, -76(%edx)
-L(aligned_16_60bytes):
-	movdqa	%xmm0, -60(%edx)
-L(aligned_16_44bytes):
-	movdqa	%xmm0, -44(%edx)
-L(aligned_16_28bytes):
-	movdqa	%xmm0, -28(%edx)
-L(aligned_16_12bytes):
-	movq	%xmm0, -12(%edx)
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_125bytes):
-	movdqa	%xmm0, -125(%edx)
-L(aligned_16_109bytes):
-	movdqa	%xmm0, -109(%edx)
-L(aligned_16_93bytes):
-	movdqa	%xmm0, -93(%edx)
-L(aligned_16_77bytes):
-	movdqa	%xmm0, -77(%edx)
-L(aligned_16_61bytes):
-	movdqa	%xmm0, -61(%edx)
-L(aligned_16_45bytes):
-	movdqa	%xmm0, -45(%edx)
-L(aligned_16_29bytes):
-	movdqa	%xmm0, -29(%edx)
-L(aligned_16_13bytes):
-	movq	%xmm0, -13(%edx)
-	movl	%eax, -5(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_126bytes):
-	movdqa	%xmm0, -126(%edx)
-L(aligned_16_110bytes):
-	movdqa	%xmm0, -110(%edx)
-L(aligned_16_94bytes):
-	movdqa	%xmm0, -94(%edx)
-L(aligned_16_78bytes):
-	movdqa	%xmm0, -78(%edx)
-L(aligned_16_62bytes):
-	movdqa	%xmm0, -62(%edx)
-L(aligned_16_46bytes):
-	movdqa	%xmm0, -46(%edx)
-L(aligned_16_30bytes):
-	movdqa	%xmm0, -30(%edx)
-L(aligned_16_14bytes):
-	movq	%xmm0, -14(%edx)
-	movl	%eax, -6(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_127bytes):
-	movdqa	%xmm0, -127(%edx)
-L(aligned_16_111bytes):
-	movdqa	%xmm0, -111(%edx)
-L(aligned_16_95bytes):
-	movdqa	%xmm0, -95(%edx)
-L(aligned_16_79bytes):
-	movdqa	%xmm0, -79(%edx)
-L(aligned_16_63bytes):
-	movdqa	%xmm0, -63(%edx)
-L(aligned_16_47bytes):
-	movdqa	%xmm0, -47(%edx)
-L(aligned_16_31bytes):
-	movdqa	%xmm0, -31(%edx)
-L(aligned_16_15bytes):
-	movq	%xmm0, -15(%edx)
-	movl	%eax, -7(%edx)
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN_END
-
-END (__memset_sse2_rep)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S
deleted file mode 100644
index d7b8be9114..0000000000
--- a/sysdeps/i386/i686/multiarch/memset-sse2.S
+++ /dev/null
@@ -1,860 +0,0 @@
-/* memset with SSE2
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_BZERO
-# define DEST		PARMS
-# define LEN		DEST+4
-# define SETRTNVAL
-#else
-# define DEST		PARMS
-# define CHR		DEST+4
-# define LEN		CHR+4
-# define SETRTNVAL	movl DEST(%esp), %eax
-#endif
-
-#ifdef SHARED
-# define ENTRANCE	PUSH (%ebx);
-# define RETURN_END	POP (%ebx); ret
-# define RETURN		RETURN_END; CFI_PUSH (%ebx)
-# define PARMS		8		/* Preserve EBX.  */
-# define JMPTBL(I, B)	I - B
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-   jump table with relative offsets.   */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
-    /* We first load PC into EBX.  */				\
-    SETUP_PIC_REG(bx);						\
-    /* Get the address of the jump table.  */			\
-    add		$(TABLE - .), %ebx;				\
-    /* Get the entry and convert the relative offset to the	\
-       absolute address.  */					\
-    add		(%ebx,%ecx,4), %ebx;				\
-    add		%ecx, %edx;					\
-    /* We loaded the jump table and adjusted EDX. Go.  */	\
-    jmp		*%ebx
-#else
-# define ENTRANCE
-# define RETURN_END	ret
-# define RETURN		RETURN_END
-# define PARMS		4
-# define JMPTBL(I, B)	I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-   absolute offsets.  */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
-    add		%ecx, %edx;					\
-    jmp		*TABLE(,%ecx,4)
-#endif
-
-	.section .text.sse2,"ax",@progbits
-#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
-ENTRY (__memset_chk_sse2)
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END (__memset_chk_sse2)
-#endif
-ENTRY (__memset_sse2)
-	ENTRANCE
-
-	movl	LEN(%esp), %ecx
-#ifdef USE_AS_BZERO
-	xor	%eax, %eax
-#else
-	movzbl	CHR(%esp), %eax
-	movb	%al, %ah
-	/* Fill the whole EAX with pattern.  */
-	movl	%eax, %edx
-	shl	$16, %eax
-	or	%edx, %eax
-#endif
-	movl	DEST(%esp), %edx
-	cmp	$32, %ecx
-	jae	L(32bytesormore)
-
-L(write_less32bytes):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
-
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN (2)
-L(table_less_32bytes):
-	.int	JMPTBL (L(write_0bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_1bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_2bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_3bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_4bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_5bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_6bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_7bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_8bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_9bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_10bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_11bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_12bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_13bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_14bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_15bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_16bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_17bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_18bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_19bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_20bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_21bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_22bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_23bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_24bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_25bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_26bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_27bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_28bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_29bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_30bytes), L(table_less_32bytes))
-	.int	JMPTBL (L(write_31bytes), L(table_less_32bytes))
-	.popsection
-
-	ALIGN (4)
-L(write_28bytes):
-	movl	%eax, -28(%edx)
-L(write_24bytes):
-	movl	%eax, -24(%edx)
-L(write_20bytes):
-	movl	%eax, -20(%edx)
-L(write_16bytes):
-	movl	%eax, -16(%edx)
-L(write_12bytes):
-	movl	%eax, -12(%edx)
-L(write_8bytes):
-	movl	%eax, -8(%edx)
-L(write_4bytes):
-	movl	%eax, -4(%edx)
-L(write_0bytes):
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(write_29bytes):
-	movl	%eax, -29(%edx)
-L(write_25bytes):
-	movl	%eax, -25(%edx)
-L(write_21bytes):
-	movl	%eax, -21(%edx)
-L(write_17bytes):
-	movl	%eax, -17(%edx)
-L(write_13bytes):
-	movl	%eax, -13(%edx)
-L(write_9bytes):
-	movl	%eax, -9(%edx)
-L(write_5bytes):
-	movl	%eax, -5(%edx)
-L(write_1bytes):
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(write_30bytes):
-	movl	%eax, -30(%edx)
-L(write_26bytes):
-	movl	%eax, -26(%edx)
-L(write_22bytes):
-	movl	%eax, -22(%edx)
-L(write_18bytes):
-	movl	%eax, -18(%edx)
-L(write_14bytes):
-	movl	%eax, -14(%edx)
-L(write_10bytes):
-	movl	%eax, -10(%edx)
-L(write_6bytes):
-	movl	%eax, -6(%edx)
-L(write_2bytes):
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(write_31bytes):
-	movl	%eax, -31(%edx)
-L(write_27bytes):
-	movl	%eax, -27(%edx)
-L(write_23bytes):
-	movl	%eax, -23(%edx)
-L(write_19bytes):
-	movl	%eax, -19(%edx)
-L(write_15bytes):
-	movl	%eax, -15(%edx)
-L(write_11bytes):
-	movl	%eax, -11(%edx)
-L(write_7bytes):
-	movl	%eax, -7(%edx)
-L(write_3bytes):
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-/* ECX > 32 and EDX is 4 byte aligned.  */
-L(32bytesormore):
-	/* Fill xmm0 with the pattern.  */
-#ifdef USE_AS_BZERO
-	pxor	%xmm0, %xmm0
-#else
-	movd	%eax, %xmm0
-	pshufd	$0, %xmm0, %xmm0
-#endif
-	testl	$0xf, %edx
-	jz	L(aligned_16)
-/* ECX > 32 and EDX is not 16 byte aligned.  */
-L(not_aligned_16):
-	movdqu	%xmm0, (%edx)
-	movl	%edx, %eax
-	and	$-16, %edx
-	add	$16, %edx
-	sub	%edx, %eax
-	add	%eax, %ecx
-	movd	%xmm0, %eax
-
-	ALIGN (4)
-L(aligned_16):
-	cmp	$128, %ecx
-	jae	L(128bytesormore)
-
-L(aligned_16_less128bytes):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	ALIGN (4)
-L(128bytesormore):
-#ifdef SHARED_CACHE_SIZE
-	PUSH (%ebx)
-	mov	$SHARED_CACHE_SIZE, %ebx
-#else
-# ifdef SHARED
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
-# else
-	PUSH (%ebx)
-	mov	__x86_shared_cache_size, %ebx
-# endif
-#endif
-	cmp	%ebx, %ecx
-	jae	L(128bytesormore_nt_start)
-
-
-#ifdef DATA_CACHE_SIZE
-	POP (%ebx)
-# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
-	cmp	$DATA_CACHE_SIZE, %ecx
-#else
-# ifdef SHARED
-#  define RESTORE_EBX_STATE
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
-# else
-	POP (%ebx)
-#  define RESTORE_EBX_STATE CFI_PUSH (%ebx)
-	cmp	__x86_data_cache_size, %ecx
-# endif
-#endif
-
-	jae	L(128bytes_L2_normal)
-	subl	$128, %ecx
-L(128bytesormore_normal):
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jb	L(128bytesless_normal)
-
-
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jae	L(128bytesormore_normal)
-
-L(128bytesless_normal):
-	add	$128, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	ALIGN (4)
-L(128bytes_L2_normal):
-	prefetcht0	0x380(%edx)
-	prefetcht0	0x3c0(%edx)
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movaps	%xmm0, 0x10(%edx)
-	movaps	%xmm0, 0x20(%edx)
-	movaps	%xmm0, 0x30(%edx)
-	movaps	%xmm0, 0x40(%edx)
-	movaps	%xmm0, 0x50(%edx)
-	movaps	%xmm0, 0x60(%edx)
-	movaps	%xmm0, 0x70(%edx)
-	add	$128, %edx
-	cmp	$128, %ecx
-	jae	L(128bytes_L2_normal)
-
-L(128bytesless_L2_normal):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	RESTORE_EBX_STATE
-L(128bytesormore_nt_start):
-	sub	%ebx, %ecx
-	ALIGN (4)
-L(128bytesormore_shared_cache_loop):
-	prefetcht0	0x3c0(%edx)
-	prefetcht0	0x380(%edx)
-	sub	$0x80, %ebx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	add	$0x80, %edx
-	cmp	$0x80, %ebx
-	jae	L(128bytesormore_shared_cache_loop)
-	cmp	$0x80, %ecx
-	jb	L(shared_cache_loop_end)
-	ALIGN (4)
-L(128bytesormore_nt):
-	sub	$0x80, %ecx
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm0, 0x10(%edx)
-	movntdq	%xmm0, 0x20(%edx)
-	movntdq	%xmm0, 0x30(%edx)
-	movntdq	%xmm0, 0x40(%edx)
-	movntdq	%xmm0, 0x50(%edx)
-	movntdq	%xmm0, 0x60(%edx)
-	movntdq	%xmm0, 0x70(%edx)
-	add	$0x80, %edx
-	cmp	$0x80, %ecx
-	jae	L(128bytesormore_nt)
-	sfence
-L(shared_cache_loop_end):
-#if defined DATA_CACHE_SIZE || !defined SHARED
-	POP (%ebx)
-#endif
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN (2)
-L(table_16_128bytes):
-	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
-	.popsection
-
-	ALIGN (4)
-L(aligned_16_112bytes):
-	movdqa	%xmm0, -112(%edx)
-L(aligned_16_96bytes):
-	movdqa	%xmm0, -96(%edx)
-L(aligned_16_80bytes):
-	movdqa	%xmm0, -80(%edx)
-L(aligned_16_64bytes):
-	movdqa	%xmm0, -64(%edx)
-L(aligned_16_48bytes):
-	movdqa	%xmm0, -48(%edx)
-L(aligned_16_32bytes):
-	movdqa	%xmm0, -32(%edx)
-L(aligned_16_16bytes):
-	movdqa	%xmm0, -16(%edx)
-L(aligned_16_0bytes):
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_113bytes):
-	movdqa	%xmm0, -113(%edx)
-L(aligned_16_97bytes):
-	movdqa	%xmm0, -97(%edx)
-L(aligned_16_81bytes):
-	movdqa	%xmm0, -81(%edx)
-L(aligned_16_65bytes):
-	movdqa	%xmm0, -65(%edx)
-L(aligned_16_49bytes):
-	movdqa	%xmm0, -49(%edx)
-L(aligned_16_33bytes):
-	movdqa	%xmm0, -33(%edx)
-L(aligned_16_17bytes):
-	movdqa	%xmm0, -17(%edx)
-L(aligned_16_1bytes):
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_114bytes):
-	movdqa	%xmm0, -114(%edx)
-L(aligned_16_98bytes):
-	movdqa	%xmm0, -98(%edx)
-L(aligned_16_82bytes):
-	movdqa	%xmm0, -82(%edx)
-L(aligned_16_66bytes):
-	movdqa	%xmm0, -66(%edx)
-L(aligned_16_50bytes):
-	movdqa	%xmm0, -50(%edx)
-L(aligned_16_34bytes):
-	movdqa	%xmm0, -34(%edx)
-L(aligned_16_18bytes):
-	movdqa	%xmm0, -18(%edx)
-L(aligned_16_2bytes):
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_115bytes):
-	movdqa	%xmm0, -115(%edx)
-L(aligned_16_99bytes):
-	movdqa	%xmm0, -99(%edx)
-L(aligned_16_83bytes):
-	movdqa	%xmm0, -83(%edx)
-L(aligned_16_67bytes):
-	movdqa	%xmm0, -67(%edx)
-L(aligned_16_51bytes):
-	movdqa	%xmm0, -51(%edx)
-L(aligned_16_35bytes):
-	movdqa	%xmm0, -35(%edx)
-L(aligned_16_19bytes):
-	movdqa	%xmm0, -19(%edx)
-L(aligned_16_3bytes):
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_116bytes):
-	movdqa	%xmm0, -116(%edx)
-L(aligned_16_100bytes):
-	movdqa	%xmm0, -100(%edx)
-L(aligned_16_84bytes):
-	movdqa	%xmm0, -84(%edx)
-L(aligned_16_68bytes):
-	movdqa	%xmm0, -68(%edx)
-L(aligned_16_52bytes):
-	movdqa	%xmm0, -52(%edx)
-L(aligned_16_36bytes):
-	movdqa	%xmm0, -36(%edx)
-L(aligned_16_20bytes):
-	movdqa	%xmm0, -20(%edx)
-L(aligned_16_4bytes):
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_117bytes):
-	movdqa	%xmm0, -117(%edx)
-L(aligned_16_101bytes):
-	movdqa	%xmm0, -101(%edx)
-L(aligned_16_85bytes):
-	movdqa	%xmm0, -85(%edx)
-L(aligned_16_69bytes):
-	movdqa	%xmm0, -69(%edx)
-L(aligned_16_53bytes):
-	movdqa	%xmm0, -53(%edx)
-L(aligned_16_37bytes):
-	movdqa	%xmm0, -37(%edx)
-L(aligned_16_21bytes):
-	movdqa	%xmm0, -21(%edx)
-L(aligned_16_5bytes):
-	movl	%eax, -5(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_118bytes):
-	movdqa	%xmm0, -118(%edx)
-L(aligned_16_102bytes):
-	movdqa	%xmm0, -102(%edx)
-L(aligned_16_86bytes):
-	movdqa	%xmm0, -86(%edx)
-L(aligned_16_70bytes):
-	movdqa	%xmm0, -70(%edx)
-L(aligned_16_54bytes):
-	movdqa	%xmm0, -54(%edx)
-L(aligned_16_38bytes):
-	movdqa	%xmm0, -38(%edx)
-L(aligned_16_22bytes):
-	movdqa	%xmm0, -22(%edx)
-L(aligned_16_6bytes):
-	movl	%eax, -6(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_119bytes):
-	movdqa	%xmm0, -119(%edx)
-L(aligned_16_103bytes):
-	movdqa	%xmm0, -103(%edx)
-L(aligned_16_87bytes):
-	movdqa	%xmm0, -87(%edx)
-L(aligned_16_71bytes):
-	movdqa	%xmm0, -71(%edx)
-L(aligned_16_55bytes):
-	movdqa	%xmm0, -55(%edx)
-L(aligned_16_39bytes):
-	movdqa	%xmm0, -39(%edx)
-L(aligned_16_23bytes):
-	movdqa	%xmm0, -23(%edx)
-L(aligned_16_7bytes):
-	movl	%eax, -7(%edx)
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_120bytes):
-	movdqa	%xmm0, -120(%edx)
-L(aligned_16_104bytes):
-	movdqa	%xmm0, -104(%edx)
-L(aligned_16_88bytes):
-	movdqa	%xmm0, -88(%edx)
-L(aligned_16_72bytes):
-	movdqa	%xmm0, -72(%edx)
-L(aligned_16_56bytes):
-	movdqa	%xmm0, -56(%edx)
-L(aligned_16_40bytes):
-	movdqa	%xmm0, -40(%edx)
-L(aligned_16_24bytes):
-	movdqa	%xmm0, -24(%edx)
-L(aligned_16_8bytes):
-	movq	%xmm0, -8(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_121bytes):
-	movdqa	%xmm0, -121(%edx)
-L(aligned_16_105bytes):
-	movdqa	%xmm0, -105(%edx)
-L(aligned_16_89bytes):
-	movdqa	%xmm0, -89(%edx)
-L(aligned_16_73bytes):
-	movdqa	%xmm0, -73(%edx)
-L(aligned_16_57bytes):
-	movdqa	%xmm0, -57(%edx)
-L(aligned_16_41bytes):
-	movdqa	%xmm0, -41(%edx)
-L(aligned_16_25bytes):
-	movdqa	%xmm0, -25(%edx)
-L(aligned_16_9bytes):
-	movq	%xmm0, -9(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_122bytes):
-	movdqa	%xmm0, -122(%edx)
-L(aligned_16_106bytes):
-	movdqa	%xmm0, -106(%edx)
-L(aligned_16_90bytes):
-	movdqa	%xmm0, -90(%edx)
-L(aligned_16_74bytes):
-	movdqa	%xmm0, -74(%edx)
-L(aligned_16_58bytes):
-	movdqa	%xmm0, -58(%edx)
-L(aligned_16_42bytes):
-	movdqa	%xmm0, -42(%edx)
-L(aligned_16_26bytes):
-	movdqa	%xmm0, -26(%edx)
-L(aligned_16_10bytes):
-	movq	%xmm0, -10(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_123bytes):
-	movdqa	%xmm0, -123(%edx)
-L(aligned_16_107bytes):
-	movdqa	%xmm0, -107(%edx)
-L(aligned_16_91bytes):
-	movdqa	%xmm0, -91(%edx)
-L(aligned_16_75bytes):
-	movdqa	%xmm0, -75(%edx)
-L(aligned_16_59bytes):
-	movdqa	%xmm0, -59(%edx)
-L(aligned_16_43bytes):
-	movdqa	%xmm0, -43(%edx)
-L(aligned_16_27bytes):
-	movdqa	%xmm0, -27(%edx)
-L(aligned_16_11bytes):
-	movq	%xmm0, -11(%edx)
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_124bytes):
-	movdqa	%xmm0, -124(%edx)
-L(aligned_16_108bytes):
-	movdqa	%xmm0, -108(%edx)
-L(aligned_16_92bytes):
-	movdqa	%xmm0, -92(%edx)
-L(aligned_16_76bytes):
-	movdqa	%xmm0, -76(%edx)
-L(aligned_16_60bytes):
-	movdqa	%xmm0, -60(%edx)
-L(aligned_16_44bytes):
-	movdqa	%xmm0, -44(%edx)
-L(aligned_16_28bytes):
-	movdqa	%xmm0, -28(%edx)
-L(aligned_16_12bytes):
-	movq	%xmm0, -12(%edx)
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_125bytes):
-	movdqa	%xmm0, -125(%edx)
-L(aligned_16_109bytes):
-	movdqa	%xmm0, -109(%edx)
-L(aligned_16_93bytes):
-	movdqa	%xmm0, -93(%edx)
-L(aligned_16_77bytes):
-	movdqa	%xmm0, -77(%edx)
-L(aligned_16_61bytes):
-	movdqa	%xmm0, -61(%edx)
-L(aligned_16_45bytes):
-	movdqa	%xmm0, -45(%edx)
-L(aligned_16_29bytes):
-	movdqa	%xmm0, -29(%edx)
-L(aligned_16_13bytes):
-	movq	%xmm0, -13(%edx)
-	movl	%eax, -5(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_126bytes):
-	movdqa	%xmm0, -126(%edx)
-L(aligned_16_110bytes):
-	movdqa	%xmm0, -110(%edx)
-L(aligned_16_94bytes):
-	movdqa	%xmm0, -94(%edx)
-L(aligned_16_78bytes):
-	movdqa	%xmm0, -78(%edx)
-L(aligned_16_62bytes):
-	movdqa	%xmm0, -62(%edx)
-L(aligned_16_46bytes):
-	movdqa	%xmm0, -46(%edx)
-L(aligned_16_30bytes):
-	movdqa	%xmm0, -30(%edx)
-L(aligned_16_14bytes):
-	movq	%xmm0, -14(%edx)
-	movl	%eax, -6(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_127bytes):
-	movdqa	%xmm0, -127(%edx)
-L(aligned_16_111bytes):
-	movdqa	%xmm0, -111(%edx)
-L(aligned_16_95bytes):
-	movdqa	%xmm0, -95(%edx)
-L(aligned_16_79bytes):
-	movdqa	%xmm0, -79(%edx)
-L(aligned_16_63bytes):
-	movdqa	%xmm0, -63(%edx)
-L(aligned_16_47bytes):
-	movdqa	%xmm0, -47(%edx)
-L(aligned_16_31bytes):
-	movdqa	%xmm0, -31(%edx)
-L(aligned_16_15bytes):
-	movq	%xmm0, -15(%edx)
-	movl	%eax, -7(%edx)
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN_END
-
-END (__memset_sse2)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memset.S b/sysdeps/i386/i686/multiarch/memset.S
deleted file mode 100644
index f601663a9f..0000000000
--- a/sysdeps/i386/i686/multiarch/memset.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Multiple versions of memset
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#if IS_IN (libc)
-	.text
-ENTRY(memset)
-	.type	memset, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__memset_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memset_sse2)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memset_sse2_rep)
-2:	ret
-END(memset)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __memset_ia32, @function; \
-	.globl __memset_ia32; \
-	.p2align 4; \
-	__memset_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __memset_ia32, .-__memset_ia32
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
-	.type __memset_chk_ia32, @function; \
-	.globl __memset_chk_ia32; \
-	.p2align 4; \
-	__memset_chk_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
-	cfi_endproc; .size __memset_chk_ia32, .-__memset_chk_ia32
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_memset; __GI_memset = __memset_ia32
-# endif
-
-# undef strong_alias
-# define strong_alias(original, alias)
-#endif
-
-#include "../memset.S"
diff --git a/sysdeps/i386/i686/multiarch/memset_chk.S b/sysdeps/i386/i686/multiarch/memset_chk.S
deleted file mode 100644
index 573cf4208a..0000000000
--- a/sysdeps/i386/i686/multiarch/memset_chk.S
+++ /dev/null
@@ -1,82 +0,0 @@
-/* Multiple versions of __memset_chk
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#if IS_IN (libc)
-	.text
-ENTRY(__memset_chk)
-	.type	__memset_chk, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__memset_chk_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memset_chk_sse2)
-	HAS_ARCH_FEATURE (Fast_Rep_String)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__memset_chk_sse2_rep)
-2:	ret
-END(__memset_chk)
-
-# ifdef SHARED
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
-	.section .gnu.warning.__memset_zero_constant_len_parameter
-	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
-# else
-	.text
-	.type __memset_chk_sse2, @function
-	.p2align 4;
-__memset_chk_sse2:
-	cfi_startproc
-	CALL_MCOUNT
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	__chk_fail
-	jmp	__memset_sse2
-	cfi_endproc
-	.size __memset_chk_sse2, .-__memset_chk_sse2
-
-	.type __memset_chk_sse2_rep, @function
-	.p2align 4;
-__memset_chk_sse2_rep:
-	cfi_startproc
-	CALL_MCOUNT
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	__chk_fail
-	jmp	__memset_sse2_rep
-	cfi_endproc
-	.size __memset_chk_sse2_rep, .-__memset_chk_sse2_rep
-
-	.type __memset_chk_ia32, @function
-	.p2align 4;
-__memset_chk_ia32:
-	cfi_startproc
-	CALL_MCOUNT
-	movl	12(%esp), %eax
-	cmpl	%eax, 16(%esp)
-	jb	__chk_fail
-	jmp	__memset_ia32
-	cfi_endproc
-	.size __memset_chk_ia32, .-__memset_chk_ia32
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S
deleted file mode 100644
index 88c0e5776c..0000000000
--- a/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_RAWMEMCHR
-#define MEMCHR __rawmemchr_sse2_bsf
-#include "memchr-sse2-bsf.S"
diff --git a/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S b/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S
deleted file mode 100644
index 038c74896b..0000000000
--- a/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_RAWMEMCHR
-#define MEMCHR __rawmemchr_sse2
-#include "memchr-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/rawmemchr.S b/sysdeps/i386/i686/multiarch/rawmemchr.S
deleted file mode 100644
index 0a41d63ee8..0000000000
--- a/sysdeps/i386/i686/multiarch/rawmemchr.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Multiple versions of rawmemchr
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
-	.text
-ENTRY(__rawmemchr)
-	.type	__rawmemchr, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	HAS_ARCH_FEATURE (Slow_BSF)
-	jz	3f
-
-	LOAD_FUNC_GOT_EAX (__rawmemchr_sse2)
-	ret
-
-2:	LOAD_FUNC_GOT_EAX (__rawmemchr_ia32)
-	ret
-
-3:	LOAD_FUNC_GOT_EAX (__rawmemchr_sse2_bsf)
-	ret
-END(__rawmemchr)
-
-weak_alias(__rawmemchr, rawmemchr)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __rawmemchr_ia32, @function; \
-	.globl __rawmemchr_ia32; \
-	.p2align 4; \
-	__rawmemchr_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __rawmemchr_ia32, .-__rawmemchr_ia32
-
-# undef libc_hidden_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-# define libc_hidden_def(name) \
-	.globl __GI___rawmemchr; __GI___rawmemchr = __rawmemchr_ia32
-
-#endif
-#include "../../rawmemchr.S"
diff --git a/sysdeps/i386/i686/multiarch/rtld-strnlen.c b/sysdeps/i386/i686/multiarch/rtld-strnlen.c
deleted file mode 100644
index 1aa5440644..0000000000
--- a/sysdeps/i386/i686/multiarch/rtld-strnlen.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <string/strnlen.c>
diff --git a/sysdeps/i386/i686/multiarch/s_fma-fma.c b/sysdeps/i386/i686/multiarch/s_fma-fma.c
deleted file mode 100644
index 2e9619f97c..0000000000
--- a/sysdeps/i386/i686/multiarch/s_fma-fma.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* FMA version of fma.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-
-double
-__fma_fma (double x, double y, double z)
-{
-  asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
-  return x;
-}
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c
deleted file mode 100644
index 411ebb2ba9..0000000000
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Multiple versions of fma.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-
-#include <math.h>
-#include <init-arch.h>
-
-extern double __fma_ia32 (double x, double y, double z) attribute_hidden;
-extern double __fma_fma (double x, double y, double z) attribute_hidden;
-
-libm_ifunc (__fma,
-	    HAS_ARCH_FEATURE (FMA_Usable) ? __fma_fma : __fma_ia32);
-weak_alias (__fma, fma)
-
-#define __fma __fma_ia32
-
-#include <sysdeps/ieee754/ldbl-96/s_fma.c>
diff --git a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c
deleted file mode 100644
index ee57abfda2..0000000000
--- a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* FMA version of fmaf.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-
-float
-__fmaf_fma (float x, float y, float z)
-{
-  asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
-  return x;
-}
diff --git a/sysdeps/i386/i686/multiarch/s_fmaf.c b/sysdeps/i386/i686/multiarch/s_fmaf.c
deleted file mode 100644
index 00b0fbcfc5..0000000000
--- a/sysdeps/i386/i686/multiarch/s_fmaf.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Multiple versions of fmaf.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-
-#include <math.h>
-#include <init-arch.h>
-
-extern float __fmaf_ia32 (float x, float y, float z) attribute_hidden;
-extern float __fmaf_fma (float x, float y, float z) attribute_hidden;
-
-libm_ifunc (__fmaf,
-	    HAS_ARCH_FEATURE (FMA_Usable) ? __fmaf_fma : __fmaf_ia32);
-weak_alias (__fmaf, fmaf)
-
-#define __fmaf __fmaf_ia32
-
-#include <sysdeps/ieee754/dbl-64/s_fmaf.c>
diff --git a/sysdeps/i386/i686/multiarch/sched_cpucount.c b/sysdeps/i386/i686/multiarch/sched_cpucount.c
deleted file mode 100644
index 7db31b02f8..0000000000
--- a/sysdeps/i386/i686/multiarch/sched_cpucount.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/sched_cpucount.c>
diff --git a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S b/sysdeps/i386/i686/multiarch/stpcpy-sse2.S
deleted file mode 100644
index 46ca1b3074..0000000000
--- a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy_sse2
-#include "strcpy-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S
deleted file mode 100644
index d971c2da38..0000000000
--- a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy_ssse3
-#include "strcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/stpcpy.S b/sysdeps/i386/i686/multiarch/stpcpy.S
deleted file mode 100644
index ee81ab6ae3..0000000000
--- a/sysdeps/i386/i686/multiarch/stpcpy.S
+++ /dev/null
@@ -1,9 +0,0 @@
-/* Multiple versions of stpcpy
-   All versions must be listed in ifunc-impl-list.c.  */
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy
-#include "strcpy.S"
-
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
diff --git a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S b/sysdeps/i386/i686/multiarch/stpncpy-sse2.S
deleted file mode 100644
index 37a703cb76..0000000000
--- a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#define STRCPY __stpncpy_sse2
-#include "strcpy-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S
deleted file mode 100644
index 14ed16f6b5..0000000000
--- a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#define STRCPY __stpncpy_ssse3
-#include "strcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/stpncpy.S b/sysdeps/i386/i686/multiarch/stpncpy.S
deleted file mode 100644
index 2698ca6a8c..0000000000
--- a/sysdeps/i386/i686/multiarch/stpncpy.S
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Multiple versions of stpncpy
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCPY __stpncpy
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#include "strcpy.S"
-
-weak_alias (__stpncpy, stpncpy)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp-c.c b/sysdeps/i386/i686/multiarch/strcasecmp-c.c
deleted file mode 100644
index 753c6ec84a..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp-c.c
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <string.h>
-
-extern __typeof (strcasecmp) __strcasecmp_nonascii;
-
-#define __strcasecmp __strcasecmp_nonascii
-#include <string/strcasecmp.c>
-
-strong_alias (__strcasecmp_nonascii, __strcasecmp_ia32)
-
-/* The needs of strcasecmp in libc are minimal, no need to go through
-   the IFUNC.  */
-strong_alias (__strcasecmp_nonascii, __GI___strcasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S
deleted file mode 100644
index ec59276408..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Entry point for multi-version x86 strcasecmp.
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY(__strcasecmp)
-	.type	__strcasecmp, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__strcasecmp_ia32)
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strcasecmp_ssse3)
-	HAS_CPU_FEATURE (SSE4_2)
-	jz	2f
-	HAS_ARCH_FEATURE (Slow_SSE4_2)
-	jnz	2f
-	LOAD_FUNC_GOT_EAX (__strcasecmp_sse4_2)
-2:	ret
-END(__strcasecmp)
-
-weak_alias (__strcasecmp, strcasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c b/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c
deleted file mode 100644
index d4fcd2b4a1..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <string.h>
-
-extern __typeof (strcasecmp_l) __strcasecmp_l_nonascii;
-
-#define __strcasecmp_l __strcasecmp_l_nonascii
-#define USE_IN_EXTENDED_LOCALE_MODEL    1
-#include <string/strcasecmp.c>
-
-strong_alias (__strcasecmp_l_nonascii, __strcasecmp_l_ia32)
-
-/* The needs of strcasecmp in libc are minimal, no need to go through
-   the IFUNC.  */
-strong_alias (__strcasecmp_l_nonascii, __GI___strcasecmp_l)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S
deleted file mode 100644
index 411d4153f2..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define USE_AS_STRCASECMP_L 1
-#include "strcmp-sse4.S"
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S
deleted file mode 100644
index a22b93c518..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define USE_AS_STRCASECMP_L 1
-#include "strcmp-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l.S b/sysdeps/i386/i686/multiarch/strcasecmp_l.S
deleted file mode 100644
index 711c09b0dc..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp_l.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* Multiple versions of strcasecmp_l
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCMP __strcasecmp_l
-#define USE_AS_STRCASECMP_L
-#include "strcmp.S"
-
-weak_alias (__strcasecmp_l, strcasecmp_l)
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S
deleted file mode 100644
index 6359c7330c..0000000000
--- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
+++ /dev/null
@@ -1,1245 +0,0 @@
-/* strcat with SSE2
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifdef SHARED
-#  define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into ECX and branch to it.  TABLE is a
-	jump table with relative offsets.  INDEX is a register contains the
-	index into the jump table.   SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
-	/* We first load PC into ECX.  */	\
-	SETUP_PIC_REG(cx);	\
-	/* Get the address of the jump table.  */	\
-	addl	$(TABLE - .), %ecx;	\
-	/* Get the entry and convert the relative offset to the	\
-	absolute address.  */	\
-	addl	(%ecx,INDEX,SCALE), %ecx;	\
-	/* We loaded the jump table and adjusted ECX. Go.  */	\
-	jmp	*%ecx
-# else
-#  define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-	absolute offsets.  INDEX is a register contains the index into the
-	jump table.  SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)	\
-	jmp	*TABLE(,INDEX,SCALE)
-# endif
-
-# ifndef STRCAT
-#  define STRCAT  __strcat_sse2
-# endif
-
-# define PARMS  4
-# define STR1  PARMS+4
-# define STR2  STR1+4
-
-# ifdef USE_AS_STRNCAT
-#  define LEN    STR2+8
-#  define STR3   STR1+4
-# else
-#  define STR3   STR1
-# endif
-
-# define USE_AS_STRCAT
-# ifdef USE_AS_STRNCAT
-#  define RETURN  POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
-# else
-#  define RETURN  POP(%esi); ret; CFI_PUSH(%esi);
-# endif
-
-.text
-ENTRY (STRCAT)
-	PUSH	(%esi)
-	mov	STR1(%esp), %eax
-	mov	STR2(%esp), %esi
-# ifdef USE_AS_STRNCAT
-	PUSH	(%ebx)
-	movl	LEN(%esp), %ebx
-	test	%ebx, %ebx
-	jz	L(ExitZero)
-# endif
-	cmpb	$0, (%esi)
-	mov	%esi, %ecx
-	mov	%eax, %edx
-	jz	L(ExitZero)
-
-	and	$63, %ecx
-	and	$63, %edx
-	cmp	$32, %ecx
-	ja	L(StrlenCore7_1)
-	cmp	$48, %edx
-	ja	L(alignment_prolog)
-
-	pxor	%xmm0, %xmm0
-	pxor	%xmm4, %xmm4
-	pxor	%xmm7, %xmm7
-	movdqu	(%eax), %xmm1
-	movdqu	(%esi), %xmm5
-	pcmpeqb	%xmm1, %xmm0
-	movdqu	16(%esi), %xmm6
-	pmovmskb %xmm0, %ecx
-	pcmpeqb	%xmm5, %xmm4
-	pcmpeqb	%xmm6, %xmm7
-	test	%ecx, %ecx
-	jnz	L(exit_less16_)
-	mov	%eax, %ecx
-	and	$-16, %eax
-	jmp	L(loop_prolog)
-
-L(alignment_prolog):
-	pxor	%xmm0, %xmm0
-	pxor	%xmm4, %xmm4
-	mov	%edx, %ecx
-	pxor	%xmm7, %xmm7
-	and	$15, %ecx
-	and	$-16, %eax
-	pcmpeqb	(%eax), %xmm0
-	movdqu	(%esi), %xmm5
-	movdqu	16(%esi), %xmm6
-	pmovmskb %xmm0, %edx
-	pcmpeqb	%xmm5, %xmm4
-	shr	%cl, %edx
-	pcmpeqb	%xmm6, %xmm7
-	test	%edx, %edx
-	jnz	L(exit_less16)
-	add	%eax, %ecx
-
-	pxor	%xmm0, %xmm0
-L(loop_prolog):
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-	.p2align 4
-L(align16_loop):
-	pcmpeqb	16(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(align16_loop)
-	bsf	%edx, %edx
-	add	%edx, %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit16):
-	bsf	%edx, %edx
-	lea	16(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit32):
-	bsf	%edx, %edx
-	lea	32(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit48):
-	bsf	%edx, %edx
-	lea	48(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit_less16):
-	bsf	%edx, %edx
-	add	%ecx, %eax
-	add	%edx, %eax
-	jmp	L(StartStrcpyPart)
-
-	.p2align 4
-L(exit_less16_):
-	bsf	%ecx, %ecx
-	add	%ecx, %eax
-
-	.p2align 4
-L(StartStrcpyPart):
-	pmovmskb %xmm4, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1)
-
-	movdqu	%xmm5, (%eax)
-	pmovmskb %xmm7, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes1)
-
-	mov	%esi, %ecx
-	and	$-16, %esi
-	and	$15, %ecx
-	pxor	%xmm0, %xmm0
-# ifdef USE_AS_STRNCAT
-	add	%ecx, %ebx
-	sbb	%edx, %edx
-	or	%edx, %ebx
-# endif
-	sub	%ecx, %eax
-	jmp	L(Unalign16Both)
-
-L(StrlenCore7_1):
-	mov	%eax, %ecx
-	pxor	%xmm0, %xmm0
-	and	$15, %ecx
-	and	$-16, %eax
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	shr	%cl, %edx
-	test	%edx, %edx
-	jnz	L(exit_less16_1)
-	add	%eax, %ecx
-
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-
-	.p2align 4
-L(align16_loop_1):
-	pcmpeqb	16(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16_1)
-
-	pcmpeqb	32(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32_1)
-
-	pcmpeqb	48(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48_1)
-
-	pcmpeqb	64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(align16_loop_1)
-	bsf	%edx, %edx
-	add	%edx, %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit16_1):
-	bsf	%edx, %edx
-	lea	16(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit32_1):
-	bsf	%edx, %edx
-	lea	32(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit48_1):
-	bsf	%edx, %edx
-	lea	48(%eax, %edx), %eax
-	jmp	L(StartStrcpyPart_1)
-
-	.p2align 4
-L(exit_less16_1):
-	bsf	%edx, %edx
-	add	%ecx, %eax
-	add	%edx, %eax
-
-	.p2align 4
-L(StartStrcpyPart_1):
-	mov	%esi, %ecx
-	and	$15, %ecx
-	and	$-16, %esi
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-
-# ifdef USE_AS_STRNCAT
-	cmp	$48, %ebx
-	ja      L(BigN)
-# endif
-	pcmpeqb	(%esi), %xmm1
-# ifdef USE_AS_STRNCAT
-	add	%ecx, %ebx
-# endif
-	pmovmskb %xmm1, %edx
-	shr	%cl, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail)
-
-	pcmpeqb	16(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-# ifdef USE_AS_STRNCAT
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes)
-
-	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
-	movdqu	%xmm1, (%eax)
-	sub	%ecx, %eax
-
-	.p2align 4
-L(Unalign16Both):
-	mov	$16, %ecx
-	movdqa	(%esi, %ecx), %xmm1
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%eax, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$48, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-L(Unalign16BothBigN):
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%eax, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm4
-	movdqu	%xmm3, (%eax, %ecx)
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm1
-	movdqu	%xmm4, (%eax, %ecx)
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%eax, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%eax, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-# ifdef USE_AS_STRNCAT
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movdqu	%xmm3, (%eax, %ecx)
-	mov	%esi, %edx
-	lea	16(%esi, %ecx), %esi
-	and	$-0x40, %esi
-	sub	%esi, %edx
-	sub	%edx, %eax
-# ifdef USE_AS_STRNCAT
-	lea	128(%ebx, %edx), %ebx
-# endif
-	movaps	(%esi), %xmm2
-	movaps	%xmm2, %xmm4
-	movaps	16(%esi), %xmm5
-	movaps	32(%esi), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	48(%esi), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(Unaligned64Leave)
-
-	.p2align 4
-L(Unaligned64Loop_start):
-	add	$64, %eax
-	add	$64, %esi
-	movdqu	%xmm4, -64(%eax)
-	movaps	(%esi), %xmm2
-	movdqa	%xmm2, %xmm4
-	movdqu	%xmm5, -48(%eax)
-	movaps	16(%esi), %xmm5
-	pminub	%xmm5, %xmm2
-	movaps	32(%esi), %xmm3
-	movdqu	%xmm6, -32(%eax)
-	movaps	%xmm3, %xmm6
-	movdqu	%xmm7, -16(%eax)
-	movaps	48(%esi), %xmm7
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jz	L(Unaligned64Loop_start)
-
-L(Unaligned64Leave):
-	pxor	%xmm1, %xmm1
-
-	pcmpeqb	%xmm4, %xmm0
-	pcmpeqb	%xmm5, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_0)
-	test	%ecx, %ecx
-	jnz	L(CopyFrom1To16BytesUnaligned_16)
-
-	pcmpeqb	%xmm6, %xmm0
-	pcmpeqb	%xmm7, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_32)
-
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%eax)
-	movdqu	%xmm5, 16(%eax)
-	movdqu	%xmm6, 32(%eax)
-	add	$48, %esi
-	add	$48, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
-	.p2align 4
-L(BigN):
-	pcmpeqb	(%esi), %xmm1
-	pmovmskb %xmm1, %edx
-	shr	%cl, %edx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail)
-
-	pcmpeqb	16(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes)
-
-	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
-	movdqu	%xmm1, (%eax)
-	sub	%ecx, %eax
-	sub     $48, %ebx
-	add     %ecx, %ebx
-
-	mov	$16, %ecx
-	movdqa	(%esi, %ecx), %xmm1
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%eax, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-	jmp	L(Unalign16BothBigN)
-# endif
-
-/*------------end of main part-------------------------------*/
-
-/* Case1 */
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%ecx, %eax
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesTail):
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1):
-	add	$16, %esi
-	add	$16, %eax
-L(CopyFrom1To16BytesTail1):
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes):
-	bsf	%edx, %edx
-	add	%ecx, %esi
-	add	$16, %edx
-	sub	%ecx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_0):
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_16):
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%eax)
-	add	$16, %esi
-	add	$16, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_32):
-	bsf	%edx, %edx
-	movdqu	%xmm4, (%eax)
-	movdqu	%xmm5, 16(%eax)
-	add	$32, %esi
-	add	$32, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
-
-	.p2align 4
-L(CopyFrom1To16BytesExit):
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-/* Case2 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%ecx, %eax
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	add	$16, %edx
-	sub	%ecx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTailCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTail1Case2):
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-/* Case2 or Case3,  Case3 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesCase2)
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%ecx, %eax
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32BytesCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTailCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
-	add	$16, %eax
-	add	$16, %esi
-	sub	$16, %ebx
-L(CopyFrom1To16BytesTail1Case2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1Case2)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-# endif
-
-# ifdef USE_AS_STRNCAT
-	.p2align 4
-L(StrncatExit0):
-	movb	%bh, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-# endif
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit1):
-	movb	%bh, 1(%eax)
-# endif
-L(Exit1):
-# ifdef USE_AS_STRNCAT
-	movb	(%esi), %dh
-# endif
-	movb	%dh, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit2):
-	movb	%bh, 2(%eax)
-# endif
-L(Exit2):
-	movw	(%esi), %dx
-	movw	%dx, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit3):
-	movb	%bh, 3(%eax)
-# endif
-L(Exit3):
-	movw	(%esi), %cx
-	movw	%cx, (%eax)
-# ifdef USE_AS_STRNCAT
-	movb	2(%esi), %dh
-# endif
-	movb	%dh, 2(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit4):
-	movb	%bh, 4(%eax)
-# endif
-L(Exit4):
-	movl	(%esi), %edx
-	movl	%edx, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit5):
-	movb	%bh, 5(%eax)
-# endif
-L(Exit5):
-	movl	(%esi), %ecx
-# ifdef USE_AS_STRNCAT
-	movb	4(%esi), %dh
-# endif
-	movb	%dh, 4(%eax)
-	movl	%ecx, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit6):
-	movb	%bh, 6(%eax)
-# endif
-L(Exit6):
-	movl	(%esi), %ecx
-	movw	4(%esi), %dx
-	movl	%ecx, (%eax)
-	movw	%dx, 4(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit7):
-	movb	%bh, 7(%eax)
-# endif
-L(Exit7):
-	movl	(%esi), %ecx
-	movl	3(%esi), %edx
-	movl	%ecx, (%eax)
-	movl	%edx, 3(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit8):
-	movb	%bh, 8(%eax)
-# endif
-L(Exit8):
-	movlpd	(%esi), %xmm0
-	movlpd	%xmm0, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit9):
-	movb	%bh, 9(%eax)
-# endif
-L(Exit9):
-	movlpd	(%esi), %xmm0
-# ifdef USE_AS_STRNCAT
-	movb	8(%esi), %dh
-# endif
-	movb	%dh, 8(%eax)
-	movlpd	%xmm0, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit10):
-	movb	%bh, 10(%eax)
-# endif
-L(Exit10):
-	movlpd	(%esi), %xmm0
-	movw	8(%esi), %dx
-	movlpd	%xmm0, (%eax)
-	movw	%dx, 8(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit11):
-	movb	%bh, 11(%eax)
-# endif
-L(Exit11):
-	movlpd	(%esi), %xmm0
-	movl	7(%esi), %edx
-	movlpd	%xmm0, (%eax)
-	movl	%edx, 7(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit12):
-	movb	%bh, 12(%eax)
-# endif
-L(Exit12):
-	movlpd	(%esi), %xmm0
-	movl	8(%esi), %edx
-	movlpd	%xmm0, (%eax)
-	movl	%edx, 8(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit13):
-	movb	%bh, 13(%eax)
-# endif
-L(Exit13):
-	movlpd	(%esi), %xmm0
-	movlpd	5(%esi), %xmm1
-	movlpd	%xmm0, (%eax)
-	movlpd	%xmm1, 5(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit14):
-	movb	%bh, 14(%eax)
-# endif
-L(Exit14):
-	movlpd	(%esi), %xmm0
-	movlpd	6(%esi), %xmm1
-	movlpd	%xmm0, (%eax)
-	movlpd	%xmm1, 6(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit15):
-	movb	%bh, 15(%eax)
-# endif
-L(Exit15):
-	movlpd	(%esi), %xmm0
-	movlpd	7(%esi), %xmm1
-	movlpd	%xmm0, (%eax)
-	movlpd	%xmm1, 7(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit16):
-	movb	%bh, 16(%eax)
-# endif
-L(Exit16):
-	movdqu	(%esi), %xmm0
-	movdqu	%xmm0, (%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit17):
-	movb	%bh, 17(%eax)
-# endif
-L(Exit17):
-	movdqu	(%esi), %xmm0
-# ifdef USE_AS_STRNCAT
-	movb	16(%esi), %dh
-# endif
-	movdqu	%xmm0, (%eax)
-	movb	%dh, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit18):
-	movb	%bh, 18(%eax)
-# endif
-L(Exit18):
-	movdqu	(%esi), %xmm0
-	movw	16(%esi), %cx
-	movdqu	%xmm0, (%eax)
-	movw	%cx, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit19):
-	movb	%bh, 19(%eax)
-# endif
-L(Exit19):
-	movdqu	(%esi), %xmm0
-	movl	15(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movl	%ecx, 15(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit20):
-	movb	%bh, 20(%eax)
-# endif
-L(Exit20):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movl	%ecx, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit21):
-	movb	%bh, 21(%eax)
-# endif
-L(Exit21):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-# ifdef USE_AS_STRNCAT
-	movb	20(%esi), %dh
-# endif
-	movdqu	%xmm0, (%eax)
-	movl	%ecx, 16(%eax)
-	movb	%dh, 20(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit22):
-	movb	%bh, 22(%eax)
-# endif
-L(Exit22):
-	movdqu	(%esi), %xmm0
-	movlpd	14(%esi), %xmm3
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm3, 14(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit23):
-	movb	%bh, 23(%eax)
-# endif
-L(Exit23):
-	movdqu	(%esi), %xmm0
-	movlpd	15(%esi), %xmm3
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm3, 15(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit24):
-	movb	%bh, 24(%eax)
-# endif
-L(Exit24):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit25):
-	movb	%bh, 25(%eax)
-# endif
-L(Exit25):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-# ifdef USE_AS_STRNCAT
-	movb	24(%esi), %dh
-# endif
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movb	%dh, 24(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit26):
-	movb	%bh, 26(%eax)
-# endif
-L(Exit26):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movw	24(%esi), %cx
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movw	%cx, 24(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit27):
-	movb	%bh, 27(%eax)
-# endif
-L(Exit27):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	23(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movl	%ecx, 23(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit28):
-	movb	%bh, 28(%eax)
-# endif
-L(Exit28):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	24(%esi), %ecx
-	movdqu	%xmm0, (%eax)
-	movlpd	%xmm2, 16(%eax)
-	movl	%ecx, 24(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit29):
-	movb	%bh, 29(%eax)
-# endif
-L(Exit29):
-	movdqu	(%esi), %xmm0
-	movdqu	13(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 13(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit30):
-	movb	%bh, 30(%eax)
-# endif
-L(Exit30):
-	movdqu	(%esi), %xmm0
-	movdqu	14(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 14(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit31):
-	movb	%bh, 31(%eax)
-# endif
-L(Exit31):
-	movdqu	(%esi), %xmm0
-	movdqu	15(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 15(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit32):
-	movb	%bh, 32(%eax)
-# endif
-L(Exit32):
-	movdqu	(%esi), %xmm0
-	movdqu	16(%esi), %xmm2
-	movdqu	%xmm0, (%eax)
-	movdqu	%xmm2, 16(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-# ifdef USE_AS_STRNCAT
-
-	.p2align 4
-L(UnalignedLeaveCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(Unaligned64LeaveCase2)
-L(Unaligned64LeaveCase3):
-	lea	64(%ebx), %ecx
-	and	$-16, %ecx
-	add	$48, %ebx
-	jl	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm4, (%eax)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm5, 16(%eax)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm6, 32(%eax)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm7, 48(%eax)
-	xor	%bh, %bh
-	movb	%bh, 64(%eax)
-	mov	STR3(%esp), %eax
-	RETURN
-
-	.p2align 4
-L(Unaligned64LeaveCase2):
-	xor	%ecx, %ecx
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$48, %ebx
-	jle	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm5, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm4, (%eax)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm6, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm5, 16(%eax)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm6, 32(%eax)
-	lea	16(%eax, %ecx), %eax
-	lea	16(%esi, %ecx), %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-# endif
-	.p2align 4
-L(ExitZero):
-	RETURN
-
-END (STRCAT)
-
-	.p2align 4
-	.section .rodata
-L(ExitTable):
-	.int	JMPTBL(L(Exit1), L(ExitTable))
-	.int	JMPTBL(L(Exit2), L(ExitTable))
-	.int	JMPTBL(L(Exit3), L(ExitTable))
-	.int	JMPTBL(L(Exit4), L(ExitTable))
-	.int	JMPTBL(L(Exit5), L(ExitTable))
-	.int	JMPTBL(L(Exit6), L(ExitTable))
-	.int	JMPTBL(L(Exit7), L(ExitTable))
-	.int	JMPTBL(L(Exit8), L(ExitTable))
-	.int	JMPTBL(L(Exit9), L(ExitTable))
-	.int	JMPTBL(L(Exit10), L(ExitTable))
-	.int	JMPTBL(L(Exit11), L(ExitTable))
-	.int	JMPTBL(L(Exit12), L(ExitTable))
-	.int	JMPTBL(L(Exit13), L(ExitTable))
-	.int	JMPTBL(L(Exit14), L(ExitTable))
-	.int	JMPTBL(L(Exit15), L(ExitTable))
-	.int	JMPTBL(L(Exit16), L(ExitTable))
-	.int	JMPTBL(L(Exit17), L(ExitTable))
-	.int	JMPTBL(L(Exit18), L(ExitTable))
-	.int	JMPTBL(L(Exit19), L(ExitTable))
-	.int	JMPTBL(L(Exit20), L(ExitTable))
-	.int	JMPTBL(L(Exit21), L(ExitTable))
-	.int	JMPTBL(L(Exit22), L(ExitTable))
-	.int	JMPTBL(L(Exit23), L(ExitTable))
-	.int	JMPTBL(L(Exit24), L(ExitTable))
-	.int	JMPTBL(L(Exit25), L(ExitTable))
-	.int	JMPTBL(L(Exit26), L(ExitTable))
-	.int	JMPTBL(L(Exit27), L(ExitTable))
-	.int	JMPTBL(L(Exit28), L(ExitTable))
-	.int	JMPTBL(L(Exit29), L(ExitTable))
-	.int	JMPTBL(L(Exit30), L(ExitTable))
-	.int	JMPTBL(L(Exit31), L(ExitTable))
-	.int	JMPTBL(L(Exit32), L(ExitTable))
-# ifdef USE_AS_STRNCAT
-L(ExitStrncatTable):
-	.int	JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
-	.int	JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcat-ssse3.S b/sysdeps/i386/i686/multiarch/strcat-ssse3.S
deleted file mode 100644
index 59ffbc60a5..0000000000
--- a/sysdeps/i386/i686/multiarch/strcat-ssse3.S
+++ /dev/null
@@ -1,572 +0,0 @@
-/* strcat with SSSE3
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-	MERCHANTABILITY	or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCAT
-#  define STRCAT  __strcat_ssse3
-# endif
-
-# define PARMS  4
-# define STR1  PARMS+4
-# define STR2  STR1+4
-
-# ifdef USE_AS_STRNCAT
-#  define LEN STR2+8
-# endif
-
-# define USE_AS_STRCAT
-
-.text
-ENTRY (STRCAT)
-	PUSH	(%edi)
-	mov	STR1(%esp), %edi
-	mov	%edi, %edx
-
-# define RETURN  jmp L(StartStrcpyPart)
-# include "strlen-sse2.S"
-
-L(StartStrcpyPart):
-	mov	STR2(%esp), %ecx
-	lea	(%edi, %eax), %edx
-# ifdef USE_AS_STRNCAT
-	PUSH	(%ebx)
-	mov	LEN(%esp), %ebx
-	test	%ebx, %ebx
-	jz	L(StrncatExit0)
-	cmp	$8, %ebx
-	jbe	L(StrncatExit8Bytes)
-# endif
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmpb	$0, 7(%ecx)
-	jz	L(Exit8)
-	cmpb	$0, 8(%ecx)
-	jz	L(Exit9)
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	jb	L(StrncatExit15Bytes)
-# endif
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmpb	$0, 14(%ecx)
-	jz	L(Exit15)
-	cmpb	$0, 15(%ecx)
-	jz	L(Exit16)
-# ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	je	L(StrncatExit16)
-
-#  define RETURN1	\
-	POP	(%ebx);	\
-	POP	(%edi);	\
-	ret;	\
-	CFI_PUSH	(%ebx);	\
-	CFI_PUSH	(%edi)
-#  define USE_AS_STRNCPY
-# else
-#  define RETURN1  POP (%edi); ret; CFI_PUSH (%edi)
-# endif
-# include "strcpy-ssse3.S"
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit1):
-	movb	%bh, 1(%edx)
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit2):
-	movb	%bh, 2(%edx)
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit3):
-	movb	%bh, 3(%edx)
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit4):
-	movb	%bh, 4(%edx)
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit5):
-	movb	%bh, 5(%edx)
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit6):
-	movb	%bh, 6(%edx)
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit7):
-	movb	%bh, 7(%edx)
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit8):
-	movb	%bh, 8(%edx)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit9):
-	movb	%bh, 9(%edx)
-L(Exit9):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit10):
-	movb	%bh, 10(%edx)
-L(Exit10):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit11):
-	movb	%bh, 11(%edx)
-L(Exit11):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit12):
-	movb	%bh, 12(%edx)
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit13):
-	movb	%bh, 13(%edx)
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit14):
-	movb	%bh, 14(%edx)
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit15):
-	movb	%bh, 15(%edx)
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit16):
-	movb	%bh, 16(%edx)
-L(Exit16):
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-# ifdef USE_AS_STRNCPY
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%esi, %ecx
-	lea	(%esi, %edx), %esi
-	lea	-9(%ebx), %edx
-	and	$1<<7, %dh
-	or	%al, %dh
-	test	%dh, %dh
-	lea	(%esi), %edx
-	POP	(%esi)
-	jz	L(ExitHighCase2)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrncatExit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrncatExit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrncatExit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrncatExit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrncatExit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrncatExit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrncatExit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	lea	7(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-	xor	%cl, %cl
-	movb	%cl, (%eax)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHighCase2):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(StrncatExit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrncatExit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrncatExit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrncatExit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrncatExit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrncatExit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %ebx
-	je	L(StrncatExit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm1, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	CFI_PUSH(%esi)
-
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-
-	cmp	$8, %ebx
-	ja	L(ExitHighCase3)
-	cmp	$1, %ebx
-	je	L(StrncatExit1)
-	cmp	$2, %ebx
-	je	L(StrncatExit2)
-	cmp	$3, %ebx
-	je	L(StrncatExit3)
-	cmp	$4, %ebx
-	je	L(StrncatExit4)
-	cmp	$5, %ebx
-	je	L(StrncatExit5)
-	cmp	$6, %ebx
-	je	L(StrncatExit6)
-	cmp	$7, %ebx
-	je	L(StrncatExit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	%bh, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHighCase3):
-	cmp	$9, %ebx
-	je	L(StrncatExit9)
-	cmp	$10, %ebx
-	je	L(StrncatExit10)
-	cmp	$11, %ebx
-	je	L(StrncatExit11)
-	cmp	$12, %ebx
-	je	L(StrncatExit12)
-	cmp	$13, %ebx
-	je	L(StrncatExit13)
-	cmp	$14, %ebx
-	je	L(StrncatExit14)
-	cmp	$15, %ebx
-	je	L(StrncatExit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm1, 8(%edx)
-	movb	%bh, 16(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit0):
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit15Bytes):
-	cmp	$9, %ebx
-	je	L(StrncatExit9)
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrncatExit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrncatExit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrncatExit12)
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrncatExit13)
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrncatExit14)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-	lea	14(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-	movb	%bh, (%eax)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit8Bytes):
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrncatExit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrncatExit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrncatExit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrncatExit4)
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrncatExit5)
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrncatExit6)
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrncatExit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	lea	7(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-	movb	%bh, (%eax)
-	movl	%edi, %eax
-	RETURN1
-
-# endif
-END (STRCAT)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcat.S b/sysdeps/i386/i686/multiarch/strcat.S
deleted file mode 100644
index 8412cb6f23..0000000000
--- a/sysdeps/i386/i686/multiarch/strcat.S
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Multiple versions of strcat
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifndef USE_AS_STRNCAT
-# ifndef STRCAT
-#  define STRCAT strcat
-# endif
-#endif
-
-#ifdef USE_AS_STRNCAT
-# define STRCAT_SSSE3	__strncat_ssse3
-# define STRCAT_SSE2		__strncat_sse2
-# define STRCAT_IA32		__strncat_ia32
-# define __GI_STRCAT		__GI_strncat
-#else
-# define STRCAT_SSSE3	__strcat_ssse3
-# define STRCAT_SSE2		__strcat_sse2
-# define STRCAT_IA32		__strcat_ia32
-# define __GI_STRCAT		__GI_strcat
-#endif
-
-
-/* Define multiple versions only for the definition in libc.  Don't
-   define multiple versions for strncat in static library since we
-   need strncat before the initialization happened.  */
-#if IS_IN (libc)
-
-	.text
-ENTRY(STRCAT)
-	.type	STRCAT, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (STRCAT_IA32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (STRCAT_SSE2)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (STRCAT_SSSE3)
-2:	ret
-END(STRCAT)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCAT_IA32, @function; \
-	.align 16; \
-	.globl STRCAT_IA32; \
-	.hidden STRCAT_IA32; \
-	STRCAT_IA32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32
-#  undef libc_hidden_def
-#  define libc_hidden_def(name) \
-	.globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32
-
-# endif
-#endif
-
-#ifndef USE_AS_STRNCAT
-# include "../../strcat.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
deleted file mode 100644
index 95fd7c084e..0000000000
--- a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
+++ /dev/null
@@ -1,158 +0,0 @@
-/* strchr with SSE2 with bsf
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS  8
-# define ENTRANCE PUSH(%edi)
-# define RETURN  POP(%edi); ret; CFI_PUSH(%edi);
-
-# define STR1  PARMS
-# define STR2  STR1+4
-
-	.text
-ENTRY (__strchr_sse2_bsf)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	pxor	%xmm2, %xmm2
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-	/* ECX has OFFSET. */
-	and	$15, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	je	L(loop)
-
-/* Handle unaligned string.  */
-	and	$-16, %edi
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	/* Remove the leading bytes.  */
-	sarl	%cl, %edx
-	sarl	%cl, %eax
-	test	%eax, %eax
-	je	L(unaligned_no_match)
-	/* Check which byte is a match.  */
-	bsf	%eax, %eax
-	/* Is there a NULL? */
-	test	%edx, %edx
-	je	L(unaligned_match)
-	bsf	%edx, %edx
-	cmpl	%edx, %eax
-	/* Return NULL if NULL comes first.  */
-	ja	L(return_null)
-L(unaligned_match):
-	add	%edi, %eax
-	add	%ecx, %eax
-	RETURN
-
-	.p2align 4
-L(unaligned_no_match):
-	test	%edx, %edx
-	jne	L(return_null)
-	pxor	%xmm2, %xmm2
-
-	add	$16, %edi
-
-	.p2align 4
-/* Loop start on aligned string.  */
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	jmp	L(loop)
-
-L(matches):
-	pmovmskb %xmm2, %edx
-	test	%eax, %eax
-	jz	L(return_null)
-	bsf	%eax, %eax
-	/* There is a match.  First find where NULL is.  */
-	test	%edx, %edx
-	je	L(match)
-	bsf	%edx, %ecx
-	/* Check if NULL comes first.  */
-	cmpl	%ecx, %eax
-	ja	L(return_null)
-L(match):
-	sub	$16, %edi
-	add	%edi, %eax
-	RETURN
-
-/* Return NULL.  */
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-END (__strchr_sse2_bsf)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2.S b/sysdeps/i386/i686/multiarch/strchr-sse2.S
deleted file mode 100644
index 1f9e875b04..0000000000
--- a/sysdeps/i386/i686/multiarch/strchr-sse2.S
+++ /dev/null
@@ -1,348 +0,0 @@
-/* strchr SSE2 without bsf
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS  8
-# define ENTRANCE PUSH(%edi)
-# define RETURN  POP(%edi); ret; CFI_PUSH(%edi);
-
-# define STR1  PARMS
-# define STR2  STR1+4
-
-	atom_text_section
-ENTRY (__strchr_sse2)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	pxor	%xmm2, %xmm2
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-	/* ECX has OFFSET. */
-	and	$15, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	je	L(loop)
-
-/* Handle unaligned string.  */
-	and	$-16, %edi
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	/* Remove the leading bytes.  */
-	sarl	%cl, %edx
-	sarl	%cl, %eax
-	test	%eax, %eax
-	jz	L(unaligned_no_match)
-	/* Check which byte is a match.  */
-	/* Is there a NULL? */
-	add	%ecx, %edi
-	test	%edx, %edx
-	jz	L(match_case1)
-	jmp	L(match_case2)
-
-	.p2align 4
-L(unaligned_no_match):
-	test	%edx, %edx
-	jne	L(return_null)
-
-	pxor	%xmm2, %xmm2
-	add	$16, %edi
-
-	.p2align 4
-/* Loop start on aligned string.  */
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-	jmp	L(loop)
-
-L(matches):
-	/* There is a match.  First find where NULL is.  */
-	test	%edx, %edx
-	jz	L(match_case1)
-
-	.p2align 4
-L(match_case2):
-	test	%al, %al
-	jz	L(match_higth_case2)
-
-	mov	%al, %cl
-	and	$15, %cl
-	jnz	L(match_case2_4)
-
-	mov	%dl, %ch
-	and	$15, %ch
-	jnz	L(return_null)
-
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x10, %dl
-	jnz	L(return_null)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x20, %dl
-	jnz	L(return_null)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	test	$0x40, %dl
-	jnz	L(return_null)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_4):
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x01, %dl
-	jnz	L(return_null)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x02, %dl
-	jnz	L(return_null)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x04, %dl
-	jnz	L(return_null)
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_higth_case2):
-	test	%dl, %dl
-	jnz	L(return_null)
-
-	mov	%ah, %cl
-	and	$15, %cl
-	jnz	L(match_case2_12)
-
-	mov	%dh, %ch
-	and	$15, %ch
-	jnz	L(return_null)
-
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x10, %dh
-	jnz	L(return_null)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x20, %dh
-	jnz	L(return_null)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	test	$0x40, %dh
-	jnz	L(return_null)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_12):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x01, %dh
-	jnz	L(return_null)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x02, %dh
-	jnz	L(return_null)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x04, %dh
-	jnz	L(return_null)
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1):
-	test	%al, %al
-	jz	L(match_higth_case1)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_higth_case1):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit1):
-	lea	(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	lea	1(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	lea	2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit5):
-	lea	4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	lea	5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	lea	6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit9):
-	lea	8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	lea	9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	lea	10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit13):
-	lea	12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	lea	13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	lea	14(%edi), %eax
-	RETURN
-
-/* Return NULL.  */
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-END (__strchr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strchr.S b/sysdeps/i386/i686/multiarch/strchr.S
deleted file mode 100644
index 5b97b1c767..0000000000
--- a/sysdeps/i386/i686/multiarch/strchr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Multiple versions of strchr
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
-	.text
-ENTRY(strchr)
-	.type	strchr, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__strchr_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strchr_sse2_bsf)
-	HAS_ARCH_FEATURE (Slow_BSF)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strchr_sse2)
-2:	ret
-END(strchr)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strchr_ia32, @function; \
-	.globl __strchr_ia32; \
-	.p2align 4; \
-	__strchr_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strchr_ia32, .-__strchr_ia32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strchr; __GI_strchr = __strchr_ia32
-#endif
-
-#include "../../i586/strchr.S"
diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
deleted file mode 100644
index cd26058671..0000000000
--- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S
+++ /dev/null
@@ -1,804 +0,0 @@
-/* strcmp with SSE4.2
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_STRNCMP
-# ifndef STRCMP
-#  define STRCMP	__strncmp_sse4_2
-# endif
-# define STR1		8
-# define STR2		STR1+4
-# define CNT		STR2+4
-# define RETURN		POP (REM); ret; .p2align 4; CFI_PUSH (REM)
-# define REM		%ebp
-#elif defined USE_AS_STRCASECMP_L
-# include "locale-defines.h"
-# ifndef STRCMP
-#  define STRCMP	__strcasecmp_l_sse4_2
-# endif
-# ifdef PIC
-#  define STR1		12
-# else
-#  define STR1		8
-# endif
-# define STR2		STR1+4
-# define LOCALE		12	/* Loaded before the adjustment.  */
-# ifdef PIC
-#  define RETURN	POP (%edi); POP (%ebx); ret; \
-			.p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi)
-# else
-#  define RETURN	POP (%edi); ret; .p2align 4; CFI_PUSH (%edi)
-# endif
-# define NONASCII	__strcasecmp_nonascii
-#elif defined USE_AS_STRNCASECMP_L
-# include "locale-defines.h"
-# ifndef STRCMP
-#  define STRCMP	__strncasecmp_l_sse4_2
-# endif
-# ifdef PIC
-#  define STR1		16
-# else
-#  define STR1		12
-# endif
-# define STR2		STR1+4
-# define CNT		STR2+4
-# define LOCALE		16	/* Loaded before the adjustment.  */
-# ifdef PIC
-#  define RETURN	POP (%edi); POP (REM); POP (%ebx); ret; \
-			.p2align 4; \
-			CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi)
-# else
-#  define RETURN	POP (%edi); POP (REM); ret; \
-			.p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi)
-# endif
-# define REM		%ebp
-# define NONASCII	__strncasecmp_nonascii
-#else
-# ifndef STRCMP
-#  define STRCMP	__strcmp_sse4_2
-# endif
-# define STR1		4
-# define STR2		STR1+4
-# define RETURN		ret; .p2align 4
-#endif
-
-	.section .text.sse4.2,"ax",@progbits
-
-#ifdef USE_AS_STRCASECMP_L
-ENTRY (__strcasecmp_sse4_2)
-# ifdef PIC
-	PUSH	(%ebx)
-	LOAD_PIC_REG(bx)
-	movl	__libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
-#  ifdef NO_TLS_DIRECT_SEG_REFS
-	addl	%gs:0, %eax
-	movl	(%eax), %eax
-#  else
-	movl	%gs:(%eax), %eax
-#  endif
-# else
-#  ifdef NO_TLS_DIRECT_SEG_REFS
-	movl	%gs:0, %eax
-	movl	__libc_tsd_LOCALE@NTPOFF(%eax), %eax
-#  else
-	movl	%gs:__libc_tsd_LOCALE@NTPOFF, %eax
-#  endif
-# endif
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
-	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
-	movl	(%eax), %eax
-# endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-# ifdef PIC
-	je	L(ascii)
-	POP	(%ebx)
-	jmp	__strcasecmp_nonascii
-# else
-	jne	__strcasecmp_nonascii
-	jmp	L(ascii)
-# endif
-END (__strcasecmp_sse4_2)
-#endif
-
-#ifdef USE_AS_STRNCASECMP_L
-ENTRY (__strncasecmp_sse4_2)
-# ifdef PIC
-	PUSH	(%ebx)
-	LOAD_PIC_REG(bx)
-	movl	__libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
-#  ifdef NO_TLS_DIRECT_SEG_REFS
-	addl	%gs:0, %eax
-	movl	(%eax), %eax
-#  else
-	movl	%gs:(%eax), %eax
-#  endif
-# else
-#  ifdef NO_TLS_DIRECT_SEG_REFS
-	movl	%gs:0, %eax
-	movl	__libc_tsd_LOCALE@NTPOFF(%eax), %eax
-#  else
-	movl	%gs:__libc_tsd_LOCALE@NTPOFF, %eax
-#  endif
-# endif
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
-	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
-	movl	(%eax), %eax
-# endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-# ifdef PIC
-	je	L(ascii)
-	POP	(%ebx)
-	jmp	__strncasecmp_nonascii
-# else
-	jne	__strncasecmp_nonascii
-	jmp	L(ascii)
-# endif
-END (__strncasecmp_sse4_2)
-#endif
-
-	ENTRY (STRCMP)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movl	LOCALE(%esp), %eax
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
-	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
-	movl	(%eax), %eax
-# endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-	jne	NONASCII
-
-# ifdef PIC
-	PUSH	(%ebx)
-	LOAD_PIC_REG(bx)
-# endif
-L(ascii):
-	.section .rodata.cst16,"aM",@progbits,16
-	.align 16
-.Lbelowupper:
-	.quad	0x4040404040404040
-	.quad	0x4040404040404040
-.Ltopupper:
-	.quad	0x5b5b5b5b5b5b5b5b
-	.quad	0x5b5b5b5b5b5b5b5b
-.Ltouppermask:
-	.quad	0x2020202020202020
-	.quad	0x2020202020202020
-	.previous
-
-# ifdef PIC
-#  define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
-#  define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
-#  define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
-# else
-#  define UCLOW_reg .Lbelowupper
-#  define UCHIGH_reg .Ltopupper
-#  define LCQWORD_reg .Ltouppermask
-# endif
-#endif
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	PUSH	(REM)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	PUSH	(%edi)
-#endif
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %eax
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	movl	CNT(%esp), REM
-	test	REM, REM
-	je	L(eq)
-#endif
-	mov	%dx, %cx
-	and	$0xfff, %cx
-	cmp	$0xff0, %cx
-	ja	L(first4bytes)
-	movdqu	(%edx), %xmm2
-	mov	%eax, %ecx
-	and	$0xfff, %ecx
-	cmp	$0xff0, %ecx
-	ja	L(first4bytes)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# define TOLOWER(reg1, reg2) \
-	movdqa	reg1, %xmm3;						      \
-	movdqa	UCHIGH_reg, %xmm4;					      \
-	movdqa	reg2, %xmm5;						      \
-	movdqa	UCHIGH_reg, %xmm6;					      \
-	pcmpgtb	UCLOW_reg, %xmm3;					      \
-	pcmpgtb	reg1, %xmm4;						      \
-	pcmpgtb	UCLOW_reg, %xmm5;					      \
-	pcmpgtb	reg2, %xmm6;						      \
-	pand	%xmm4, %xmm3;						      \
-	pand	%xmm6, %xmm5;						      \
-	pand	LCQWORD_reg, %xmm3;					      \
-	pand	LCQWORD_reg, %xmm5;					      \
-	por	%xmm3, reg1;						      \
-	por	%xmm5, reg2
-
-	movdqu	(%eax), %xmm1
-	TOLOWER (%xmm2, %xmm1)
-	movd	%xmm2, %ecx
-	movd	%xmm1, %edi
-	movdqa	%xmm2, %xmm3
-	movdqa	%xmm1, %xmm4
-	cmpl	%edi, %ecx
-#else
-# define TOLOWER(reg1, reg)
-
-	movd	%xmm2, %ecx
-	cmp	(%eax), %ecx
-#endif
-	jne	L(less4bytes)
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
-	movdqu	(%eax), %xmm1
-#endif
-	pxor	%xmm2, %xmm1
-	pxor	%xmm0, %xmm0
-	ptest	%xmm1, %xmm0
-	jnc	L(less16bytes)
-	pcmpeqb	%xmm0, %xmm2
-	ptest	%xmm2, %xmm0
-	jnc	L(less16bytes)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	sub	$16, REM
-	jbe	L(eq)
-#endif
-	add	$16, %edx
-	add	$16, %eax
-L(first4bytes):
-	movzbl	(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, (%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$1, REM
-	je	L(eq)
-#endif
-
-	movzbl	1(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	1(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 1(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$2, REM
-	je	L(eq)
-#endif
-	movzbl	2(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	2(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 2(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$3, REM
-	je	L(eq)
-#endif
-	movzbl	3(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	3(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 3(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$4, REM
-	je	L(eq)
-#endif
-	movzbl	4(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	4(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 4(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$5, REM
-	je	L(eq)
-#endif
-	movzbl	5(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	5(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 5(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$6, REM
-	je	L(eq)
-#endif
-	movzbl	6(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	6(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 6(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$7, REM
-	je	L(eq)
-#endif
-	movzbl	7(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	7(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 7(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	sub	$8, REM
-	je	L(eq)
-#endif
-	add	$8, %eax
-	add	$8, %edx
-
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
-	PUSH	(%edi)
-#endif
-	PUSH	(%esi)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cfi_remember_state
-#endif
-	mov	%edx, %edi
-	mov	%eax, %esi
-	xorl	%eax, %eax
-L(check_offset):
-	movl	%edi, %edx
-	movl	%esi, %ecx
-	andl	$0xfff, %edx
-	andl	$0xfff, %ecx
-	cmpl	%edx, %ecx
-	cmovl	%edx, %ecx
-	lea	-0xff0(%ecx), %edx
-	sub	%edx, %edi
-	sub	%edx, %esi
-	testl	%edx, %edx
-	jg	L(crosspage)
-L(loop):
-	movdqu	(%esi,%edx), %xmm2
-	movdqu	(%edi,%edx), %xmm1
-	TOLOWER (%xmm2, %xmm1)
-	pcmpistri	$0x1a, %xmm2, %xmm1
-	jbe	L(end)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	sub	$16, REM
-	jbe	L(more16byteseq)
-#endif
-
-	add	$16, %edx
-	jle	L(loop)
-L(crosspage):
-	movzbl	(%edi,%edx), %eax
-	movzbl	(%esi,%edx), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-# endif
-#endif
-	subl	%ecx, %eax
-	jne	L(ret)
-	testl	%ecx, %ecx
-	je	L(ret)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	sub	$1, REM
-	jbe	L(more16byteseq)
-#endif
-	inc	%edx
-	cmp	$15, %edx
-	jle	L(crosspage)
-	add	%edx, %edi
-	add	%edx, %esi
-	jmp	L(check_offset)
-
-	.p2align 4
-L(end):
-	jnc	L(ret)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	sub	%ecx, REM
-	jbe	L(more16byteseq)
-#endif
-	lea	(%ecx,%edx), %ecx
-	movzbl	(%edi,%ecx), %eax
-	movzbl	(%esi,%ecx), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-# endif
-#endif
-	subl	%ecx, %eax
-L(ret):
-	POP	(%esi)
-	POP	(%edi)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	POP	(REM)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	POP	(%ebx)
-# endif
-#endif
-	ret
-
-	.p2align 4
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cfi_restore_state
-L(more16byteseq):
-	POP	(%esi)
-# ifdef USE_AS_STRNCMP
-	POP	(%edi)
-# endif
-#endif
-L(eq):
-	xorl	%eax, %eax
-	RETURN
-
-L(neq):
-	mov	$1, %eax
-	ja	L(neq_bigger)
-	neg	%eax
-L(neq_bigger):
-	RETURN
-
-L(less16bytes):
-	add	$0xfefefeff, %ecx
-	jnc	L(less4bytes)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movd	%xmm3, %edi
-	xor	%edi, %ecx
-#else
-	xor	(%edx), %ecx
-#endif
-	or	$0xfefefeff, %ecx
-	add	$1, %ecx
-	jnz	L(less4bytes)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$4, REM
-	jbe	L(eq)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	psrldq	$4, %xmm3
-	psrldq	$4, %xmm4
-	movd	%xmm3, %ecx
-	movd	%xmm4, %edi
-	cmp	%edi, %ecx
-	mov	%ecx, %edi
-#else
-	mov	4(%edx), %ecx
-	cmp	4(%eax), %ecx
-#endif
-	jne	L(more4bytes)
-	add	$0xfefefeff, %ecx
-	jnc	L(more4bytes)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	xor	%edi, %ecx
-#else
-	xor	4(%edx), %ecx
-#endif
-	or	$0xfefefeff, %ecx
-	add	$1, %ecx
-	jnz	L(more4bytes)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	sub	$8, REM
-	jbe	L(eq)
-#endif
-
-	add	$8, %edx
-	add	$8, %eax
-L(less4bytes):
-
-	movzbl	(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, (%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$1, REM
-	je	L(eq)
-#endif
-	movzbl	1(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	1(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 1(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$2, REM
-	je	L(eq)
-#endif
-
-	movzbl	2(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	2(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 2(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$3, REM
-	je	L(eq)
-#endif
-	movzbl	3(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	3(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 3(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-L(more4bytes):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$4, REM
-	je	L(eq)
-#endif
-	movzbl	4(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	4(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 4(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$5, REM
-	je	L(eq)
-#endif
-	movzbl	5(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	5(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 5(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$6, REM
-	je	L(eq)
-#endif
-	movzbl	6(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	6(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 6(%edx)
-#endif
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$7, REM
-	je	L(eq)
-#endif
-	movzbl	7(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movzbl	7(%edx), %edi
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
-	cmpl	%ecx, %edi
-#else
-	cmpb	%cl, 7(%edx)
-#endif
-	jne	L(neq)
-	jmp	L(eq)
-
-END (STRCMP)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
deleted file mode 100644
index b25cc3e068..0000000000
--- a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
+++ /dev/null
@@ -1,2810 +0,0 @@
-/* strcmp with SSSE3
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_STRNCMP
-# ifndef STRCMP
-#  define STRCMP	__strncmp_ssse3
-# endif
-# define STR1		8
-# define STR2		STR1+4
-# define CNT		STR2+4
-# define RETURN		POP (REM); ret; .p2align 4; CFI_PUSH (REM)
-# define UPDATE_STRNCMP_COUNTER				\
-	/* calculate left number to compare */		\
-	mov	$16, %esi;				\
-	sub	%ecx, %esi;				\
-	cmp	%esi, REM;				\
-	jbe	L(more8byteseq);			\
-	sub	%esi, REM
-# define FLAGS		%ebx
-# define REM		%ebp
-#elif defined USE_AS_STRCASECMP_L
-# include "locale-defines.h"
-# ifndef STRCMP
-#  define STRCMP	__strcasecmp_l_ssse3
-# endif
-# ifdef PIC
-#  define STR1		8
-# else
-#  define STR1		4
-# endif
-# define STR2		STR1+4
-# define LOCALE		12	/* Loaded before the adjustment.  */
-# ifdef PIC
-#  define RETURN	POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx)
-# else
-#  define RETURN	ret; .p2align 4
-# endif
-# define UPDATE_STRNCMP_COUNTER
-# define FLAGS		(%esp)
-# define NONASCII	__strcasecmp_nonascii
-#elif defined USE_AS_STRNCASECMP_L
-# include "locale-defines.h"
-# ifndef STRCMP
-#  define STRCMP	__strncasecmp_l_ssse3
-# endif
-# ifdef PIC
-#  define STR1		12
-# else
-#  define STR1		8
-# endif
-# define STR2		STR1+4
-# define CNT		STR2+4
-# define LOCALE		16	/* Loaded before the adjustment.  */
-# ifdef PIC
-#  define RETURN	POP (REM); POP (%ebx); ret; \
-			.p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM)
-# else
-#  define RETURN	POP (REM); ret; .p2align 4; CFI_PUSH (REM)
-# endif
-# define UPDATE_STRNCMP_COUNTER				\
-	/* calculate left number to compare */		\
-	mov	$16, %esi;				\
-	sub	%ecx, %esi;				\
-	cmp	%esi, REM;				\
-	jbe	L(more8byteseq);			\
-	sub	%esi, REM
-# define FLAGS		(%esp)
-# define REM		%ebp
-# define NONASCII	__strncasecmp_nonascii
-#else
-# ifndef STRCMP
-#  define STRCMP	__strcmp_ssse3
-# endif
-# define STR1		4
-# define STR2		STR1+4
-# define RETURN		ret; .p2align 4
-# define UPDATE_STRNCMP_COUNTER
-# define FLAGS		%ebx
-#endif
-
-	.section .text.ssse3,"ax",@progbits
-
-#ifdef USE_AS_STRCASECMP_L
-ENTRY (__strcasecmp_ssse3)
-# ifdef PIC
-	PUSH	(%ebx)
-	LOAD_PIC_REG(bx)
-	movl	__libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
-#  ifdef NO_TLS_DIRECT_SEG_REFS
-	addl	%gs:0, %eax
-	movl	(%eax), %eax
-#  else
-	movl	%gs:(%eax), %eax
-#  endif
-# else
-#  ifdef NO_TLS_DIRECT_SEG_REFS
-	movl	%gs:0, %eax
-	movl	__libc_tsd_LOCALE@NTPOFF(%eax), %eax
-#  else
-	movl	%gs:__libc_tsd_LOCALE@NTPOFF, %eax
-#  endif
-# endif
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
-	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
-	movl	(%eax), %eax
-# endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-# ifdef PIC
-	je	L(ascii)
-	POP	(%ebx)
-	jmp	__strcasecmp_nonascii
-# else
-	jne	__strcasecmp_nonascii
-	jmp	L(ascii)
-# endif
-END (__strcasecmp_ssse3)
-#endif
-
-#ifdef USE_AS_STRNCASECMP_L
-ENTRY (__strncasecmp_ssse3)
-# ifdef PIC
-	PUSH	(%ebx)
-	LOAD_PIC_REG(bx)
-	movl	__libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
-#  ifdef NO_TLS_DIRECT_SEG_REFS
-	addl	%gs:0, %eax
-	movl	(%eax), %eax
-#  else
-	movl	%gs:(%eax), %eax
-#  endif
-# else
-#  ifdef NO_TLS_DIRECT_SEG_REFS
-	movl	%gs:0, %eax
-	movl	__libc_tsd_LOCALE@NTPOFF(%eax), %eax
-#  else
-	movl	%gs:__libc_tsd_LOCALE@NTPOFF, %eax
-#  endif
-# endif
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
-	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
-	movl	(%eax), %eax
-# endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-# ifdef PIC
-	je	L(ascii)
-	POP	(%ebx)
-	jmp	__strncasecmp_nonascii
-# else
-	jne	__strncasecmp_nonascii
-	jmp	L(ascii)
-# endif
-END (__strncasecmp_ssse3)
-#endif
-
-ENTRY (STRCMP)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movl	LOCALE(%esp), %eax
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
-	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
-	movl	(%eax), %eax
-# endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-	jne	NONASCII
-
-# ifdef PIC
-	PUSH	(%ebx)
-	LOAD_PIC_REG(bx)
-# endif
-L(ascii):
-	.section .rodata.cst16,"aM",@progbits,16
-	.align 16
-.Lbelowupper:
-	.quad	0x4040404040404040
-	.quad	0x4040404040404040
-.Ltopupper:
-	.quad	0x5b5b5b5b5b5b5b5b
-	.quad	0x5b5b5b5b5b5b5b5b
-.Ltouppermask:
-	.quad	0x2020202020202020
-	.quad	0x2020202020202020
-	.previous
-
-# ifdef PIC
-#  define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
-#  define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
-#  define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
-# else
-#  define UCLOW_reg .Lbelowupper
-#  define UCHIGH_reg .Ltopupper
-#  define LCQWORD_reg .Ltouppermask
-# endif
-#endif
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	PUSH	(REM)
-#endif
-
-	movl	STR1(%esp), %edx
-	movl	STR2(%esp), %eax
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	movl	CNT(%esp), REM
-	cmp	$16, REM
-	jb	L(less16bytes_sncmp)
-#elif !defined USE_AS_STRCASECMP_L
-	movzbl	(%eax), %ecx
-	cmpb	%cl, (%edx)
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-	movzbl	1(%eax), %ecx
-	cmpb	%cl, 1(%edx)
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-	movzbl	2(%eax), %ecx
-	cmpb	%cl, 2(%edx)
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-	movzbl	3(%eax), %ecx
-	cmpb	%cl, 3(%edx)
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-	movzbl	4(%eax), %ecx
-	cmpb	%cl, 4(%edx)
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-	movzbl	5(%eax), %ecx
-	cmpb	%cl, 5(%edx)
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-	movzbl	6(%eax), %ecx
-	cmpb	%cl, 6(%edx)
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-	movzbl	7(%eax), %ecx
-	cmpb	%cl, 7(%edx)
-	jne	L(neq)
-	cmpl	$0, %ecx
-	je	L(eq)
-
-	add	$8, %edx
-	add	$8, %eax
-#endif
-	movl	%edx, %ecx
-	and	$0xfff, %ecx
-	cmp	$0xff0, %ecx
-	ja	L(crosspage)
-	mov	%eax, %ecx
-	and	$0xfff, %ecx
-	cmp	$0xff0, %ecx
-	ja	L(crosspage)
-	pxor	%xmm0, %xmm0
-	movlpd	(%eax), %xmm1
-	movlpd	(%edx), %xmm2
-	movhpd	8(%eax), %xmm1
-	movhpd	8(%edx), %xmm2
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# define TOLOWER(reg1, reg2) \
-	movdqa	reg1, %xmm5;					\
-	movdqa	reg2, %xmm7;					\
-	movdqa	UCHIGH_reg, %xmm6;				\
-	pcmpgtb	UCLOW_reg, %xmm5;				\
-	pcmpgtb	UCLOW_reg, %xmm7;				\
-	pcmpgtb	reg1, %xmm6;					\
-	pand	%xmm6, %xmm5;					\
-	movdqa	UCHIGH_reg, %xmm6;				\
-	pcmpgtb	reg2, %xmm6;					\
-	pand	%xmm6, %xmm7;					\
-	pand	LCQWORD_reg, %xmm5;				\
-	por	%xmm5, reg1;					\
-	pand	LCQWORD_reg, %xmm7;				\
-	por	%xmm7, reg2
-	TOLOWER (%xmm1, %xmm2)
-#else
-# define TOLOWER(reg1, reg2)
-#endif
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %ecx
-	sub	$0xffff, %ecx
-	jnz	L(less16bytes)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(eq)
-#endif
-	add	$16, %eax
-	add	$16, %edx
-
-L(crosspage):
-
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
-	PUSH	(FLAGS)
-#endif
-	PUSH	(%edi)
-	PUSH	(%esi)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	pushl	$0
-	cfi_adjust_cfa_offset (4)
-#endif
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cfi_remember_state
-#endif
-
-	movl	%edx, %edi
-	movl	%eax, %ecx
-	and	$0xf, %ecx
-	and	$0xf, %edi
-	xor	%ecx, %eax
-	xor	%edi, %edx
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
-	xor	FLAGS, FLAGS
-#endif
-	cmp	%edi, %ecx
-	je	L(ashr_0)
-	ja	L(bigger)
-	orl	$0x20, FLAGS
-	xchg	%edx, %eax
-	xchg	%ecx, %edi
-L(bigger):
-	lea	15(%edi), %edi
-	sub	%ecx, %edi
-	cmp	$8, %edi
-	jle	L(ashr_less_8)
-	cmp	$14, %edi
-	je	L(ashr_15)
-	cmp	$13, %edi
-	je	L(ashr_14)
-	cmp	$12, %edi
-	je	L(ashr_13)
-	cmp	$11, %edi
-	je	L(ashr_12)
-	cmp	$10, %edi
-	je	L(ashr_11)
-	cmp	$9, %edi
-	je	L(ashr_10)
-L(ashr_less_8):
-	je	L(ashr_9)
-	cmp	$7, %edi
-	je	L(ashr_8)
-	cmp	$6, %edi
-	je	L(ashr_7)
-	cmp	$5, %edi
-	je	L(ashr_6)
-	cmp	$4, %edi
-	je	L(ashr_5)
-	cmp	$3, %edi
-	je	L(ashr_4)
-	cmp	$2, %edi
-	je	L(ashr_3)
-	cmp	$1, %edi
-	je	L(ashr_2)
-	cmp	$0, %edi
-	je	L(ashr_1)
-
-/*
- * The following cases will be handled by ashr_0
- *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case
- *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
- */
-	.p2align 4
-L(ashr_0):
-	mov	$0xffff, %esi
-	movdqa	(%eax), %xmm1
-	pxor	%xmm0, %xmm0
-	pcmpeqb	%xmm1, %xmm0
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movdqa	(%edx), %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm2, %xmm1
-#else
-	pcmpeqb	(%edx), %xmm1
-#endif
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	mov	%ecx, %edi
-	jne	L(less32bytes)
-	UPDATE_STRNCMP_COUNTER
-	movl	$0x10, FLAGS
-	mov	$0x10, %ecx
-	pxor	%xmm0, %xmm0
-	.p2align 4
-L(loop_ashr_0):
-	movdqa	(%eax, %ecx), %xmm1
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	movdqa	(%edx, %ecx), %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-#else
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	(%edx, %ecx), %xmm1
-#endif
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	jmp	L(loop_ashr_0)
-
-/*
- * The following cases will be handled by ashr_1
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
- */
-	.p2align 4
-L(ashr_1):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$15, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-15(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$1, FLAGS
-	lea	1(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_1):
-	add	$16, %edi
-	jg	L(nibble_ashr_1)
-
-L(gobble_ashr_1):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$1, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_1)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$1, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_1)
-
-	.p2align 4
-L(nibble_ashr_1):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xfffe, %esi
-	jnz	L(ashr_1_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$15, REM
-	jbe	L(ashr_1_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_1)
-
-	.p2align 4
-L(ashr_1_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$1, %xmm0
-	psrldq	$1, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_2
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2
- */
-	.p2align 4
-L(ashr_2):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$14, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-14(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$2, FLAGS
-	lea	2(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_2):
-	add	$16, %edi
-	jg	L(nibble_ashr_2)
-
-L(gobble_ashr_2):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$2, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_2)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$2, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_2)
-
-	.p2align 4
-L(nibble_ashr_2):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xfffc, %esi
-	jnz	L(ashr_2_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$14, REM
-	jbe	L(ashr_2_exittail)
-#endif
-
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_2)
-
-	.p2align 4
-L(ashr_2_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$2, %xmm0
-	psrldq	$2, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_3
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3
- */
-	.p2align 4
-L(ashr_3):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$13, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-13(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$3, FLAGS
-	lea	3(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_3):
-	add	$16, %edi
-	jg	L(nibble_ashr_3)
-
-L(gobble_ashr_3):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$3, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_3)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$3, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_3)
-
-	.p2align 4
-L(nibble_ashr_3):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xfff8, %esi
-	jnz	L(ashr_3_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$13, REM
-	jbe	L(ashr_3_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_3)
-
-	.p2align 4
-L(ashr_3_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$3, %xmm0
-	psrldq	$3, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_4
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4
- */
-	.p2align 4
-L(ashr_4):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$12, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-12(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$4, FLAGS
-	lea	4(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_4):
-	add	$16, %edi
-	jg	L(nibble_ashr_4)
-
-L(gobble_ashr_4):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$4, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_4)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$4, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_4)
-
-	.p2align 4
-L(nibble_ashr_4):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xfff0, %esi
-	jnz	L(ashr_4_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$12, REM
-	jbe	L(ashr_4_exittail)
-#endif
-
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_4)
-
-	.p2align 4
-L(ashr_4_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$4, %xmm0
-	psrldq	$4, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_5
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5
- */
-	.p2align 4
-L(ashr_5):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$11, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-11(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$5, FLAGS
-	lea	5(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_5):
-	add	$16, %edi
-	jg	L(nibble_ashr_5)
-
-L(gobble_ashr_5):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$5, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_5)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$5, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_5)
-
-	.p2align 4
-L(nibble_ashr_5):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xffe0, %esi
-	jnz	L(ashr_5_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$11, REM
-	jbe	L(ashr_5_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_5)
-
-	.p2align 4
-L(ashr_5_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$5, %xmm0
-	psrldq	$5, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_6
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6
- */
-
-	.p2align 4
-L(ashr_6):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$10, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-10(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$6, FLAGS
-	lea	6(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_6):
-	add	$16, %edi
-	jg	L(nibble_ashr_6)
-
-L(gobble_ashr_6):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$6, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_6)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$6, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_6)
-
-	.p2align 4
-L(nibble_ashr_6):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xffc0, %esi
-	jnz	L(ashr_6_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$10, REM
-	jbe	L(ashr_6_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_6)
-
-	.p2align 4
-L(ashr_6_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$6, %xmm0
-	psrldq	$6, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_7
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7
- */
-
-	.p2align 4
-L(ashr_7):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$9, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-9(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$7, FLAGS
-	lea	8(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_7):
-	add	$16, %edi
-	jg	L(nibble_ashr_7)
-
-L(gobble_ashr_7):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$7, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_7)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$7, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_7)
-
-	.p2align 4
-L(nibble_ashr_7):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xff80, %esi
-	jnz	L(ashr_7_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$9, REM
-	jbe	L(ashr_7_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_7)
-
-	.p2align 4
-L(ashr_7_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$7, %xmm0
-	psrldq	$7, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_8
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8
- */
-	.p2align 4
-L(ashr_8):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$8, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-8(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$8, FLAGS
-	lea	8(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_8):
-	add	$16, %edi
-	jg	L(nibble_ashr_8)
-
-L(gobble_ashr_8):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$8, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_8)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$8, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_8)
-
-	.p2align 4
-L(nibble_ashr_8):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xff00, %esi
-	jnz	L(ashr_8_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$8, REM
-	jbe	L(ashr_8_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_8)
-
-	.p2align 4
-L(ashr_8_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$8, %xmm0
-	psrldq	$8, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_9
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9
- */
-	.p2align 4
-L(ashr_9):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$7, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-7(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$9, FLAGS
-	lea	9(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_9):
-	add	$16, %edi
-	jg	L(nibble_ashr_9)
-
-L(gobble_ashr_9):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$9, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_9)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$9, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_9)
-
-	.p2align 4
-L(nibble_ashr_9):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xfe00, %esi
-	jnz	L(ashr_9_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$7, REM
-	jbe	L(ashr_9_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_9)
-
-	.p2align 4
-L(ashr_9_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$9, %xmm0
-	psrldq	$9, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_10
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10
- */
-	.p2align 4
-L(ashr_10):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$6, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-6(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$10, FLAGS
-	lea	10(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_10):
-	add	$16, %edi
-	jg	L(nibble_ashr_10)
-
-L(gobble_ashr_10):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$10, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_10)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$10, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_10)
-
-	.p2align 4
-L(nibble_ashr_10):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xfc00, %esi
-	jnz	L(ashr_10_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$6, REM
-	jbe	L(ashr_10_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_10)
-
-	.p2align 4
-L(ashr_10_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$10, %xmm0
-	psrldq	$10, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_11
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11
- */
-	.p2align 4
-L(ashr_11):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$5, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-5(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$11, FLAGS
-	lea	11(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_11):
-	add	$16, %edi
-	jg	L(nibble_ashr_11)
-
-L(gobble_ashr_11):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$11, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_11)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$11, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_11)
-
-	.p2align 4
-L(nibble_ashr_11):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xf800, %esi
-	jnz	L(ashr_11_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$5, REM
-	jbe	L(ashr_11_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_11)
-
-	.p2align 4
-L(ashr_11_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$11, %xmm0
-	psrldq	$11, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_12
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12
- */
-	.p2align 4
-L(ashr_12):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$4, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-4(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$12, FLAGS
-	lea	12(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_12):
-	add	$16, %edi
-	jg	L(nibble_ashr_12)
-
-L(gobble_ashr_12):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$12, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_12)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$12, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_12)
-
-	.p2align 4
-L(nibble_ashr_12):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xf000, %esi
-	jnz	L(ashr_12_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$4, REM
-	jbe	L(ashr_12_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_12)
-
-	.p2align 4
-L(ashr_12_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$12, %xmm0
-	psrldq	$12, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_13
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13
- */
-	.p2align 4
-L(ashr_13):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-3(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$13, FLAGS
-	lea	13(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_13):
-	add	$16, %edi
-	jg	L(nibble_ashr_13)
-
-L(gobble_ashr_13):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$13, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_13)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$13, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_13)
-
-	.p2align 4
-L(nibble_ashr_13):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xe000, %esi
-	jnz	L(ashr_13_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$3, REM
-	jbe	L(ashr_13_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_13)
-
-	.p2align 4
-L(ashr_13_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$13, %xmm0
-	psrldq	$13, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_14
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14
- */
-	.p2align 4
-L(ashr_14):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$2, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-2(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$14, FLAGS
-	lea	14(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_14):
-	add	$16, %edi
-	jg	L(nibble_ashr_14)
-
-L(gobble_ashr_14):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$14, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_14)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$14, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_14)
-
-	.p2align 4
-L(nibble_ashr_14):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0xc000, %esi
-	jnz	L(ashr_14_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$2, REM
-	jbe	L(ashr_14_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_14)
-
-	.p2align 4
-L(ashr_14_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$14, %xmm0
-	psrldq	$14, %xmm3
-	jmp	L(aftertail)
-
-/*
- * The following cases will be handled by ashr_14
- * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
- *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15
- */
-
-	.p2align 4
-L(ashr_15):
-	mov	$0xffff, %esi
-	pxor	%xmm0, %xmm0
-	movdqa	(%edx), %xmm2
-	movdqa	(%eax), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$1, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
-	pmovmskb %xmm2, %edi
-	shr	%cl, %esi
-	shr	%cl, %edi
-	sub	%edi, %esi
-	lea	-1(%ecx), %edi
-	jnz	L(less32bytes)
-
-	UPDATE_STRNCMP_COUNTER
-
-	movdqa	(%edx), %xmm3
-	pxor	%xmm0, %xmm0
-	mov	$16, %ecx
-	orl	$15, FLAGS
-	lea	15(%edx), %edi
-	and	$0xfff, %edi
-	sub	$0x1000, %edi
-
-	.p2align 4
-L(loop_ashr_15):
-	add	$16, %edi
-	jg	L(nibble_ashr_15)
-
-L(gobble_ashr_15):
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$15, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-
-	add	$16, %edi
-	jg	L(nibble_ashr_15)
-
-	movdqa	(%eax, %ecx), %xmm1
-	movdqa	(%edx, %ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-
-	palignr	$15, %xmm3, %xmm2
-	TOLOWER (%xmm1, %xmm2)
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	sub	$0xffff, %esi
-	jnz	L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$16, REM
-	lea	-16(REM), REM
-	jbe	L(more8byteseq)
-#endif
-	add	$16, %ecx
-	movdqa	%xmm4, %xmm3
-	jmp	L(loop_ashr_15)
-
-	.p2align 4
-L(nibble_ashr_15):
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %esi
-	test	$0x8000, %esi
-	jnz	L(ashr_15_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$1, REM
-	jbe	L(ashr_15_exittail)
-#endif
-	pxor	%xmm0, %xmm0
-	sub	$0x1000, %edi
-	jmp	L(gobble_ashr_15)
-
-	.p2align 4
-L(ashr_15_exittail):
-	movdqa	(%eax, %ecx), %xmm1
-	psrldq	$15, %xmm0
-	psrldq	$15, %xmm3
-	jmp	L(aftertail)
-
-	.p2align 4
-L(aftertail):
-	TOLOWER (%xmm1, %xmm3)
-	pcmpeqb	%xmm3, %xmm1
-	psubb	%xmm0, %xmm1
-	pmovmskb %xmm1, %esi
-	not	%esi
-L(exit):
-	mov	FLAGS, %edi
-	and	$0x1f, %edi
-	lea	-16(%edi, %ecx), %edi
-L(less32bytes):
-	add	%edi, %edx
-	add	%ecx, %eax
-	testl	$0x20, FLAGS
-	jz	L(ret2)
-	xchg	%eax, %edx
-
-	.p2align 4
-L(ret2):
-	mov	%esi, %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	addl	$4, %esp
-	cfi_adjust_cfa_offset (-4)
-#endif
-	POP	(%esi)
-	POP	(%edi)
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
-	POP	(FLAGS)
-#endif
-L(less16bytes):
-	test	%cl, %cl
-	jz	L(2next_8_bytes)
-
-	test	$0x01, %cl
-	jnz	L(Byte0)
-
-	test	$0x02, %cl
-	jnz	L(Byte1)
-
-	test	$0x04, %cl
-	jnz	L(Byte2)
-
-	test	$0x08, %cl
-	jnz	L(Byte3)
-
-	test	$0x10, %cl
-	jnz	L(Byte4)
-
-	test	$0x20, %cl
-	jnz	L(Byte5)
-
-	test	$0x40, %cl
-	jnz	L(Byte6)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$7, REM
-	jbe	L(eq)
-#endif
-
-	movzx	7(%eax), %ecx
-	movzx	7(%edx), %eax
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
-	sub	%ecx, %eax
-	RETURN
-
-L(Byte0):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$0, REM
-	jbe	L(eq)
-#endif
-	movzx	(%eax), %ecx
-	movzx	(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
-	sub	%ecx, %eax
-	RETURN
-
-L(Byte1):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$1, REM
-	jbe	L(eq)
-#endif
-	movzx	1(%eax), %ecx
-	movzx	1(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
-	sub	%ecx, %eax
-	RETURN
-
-L(Byte2):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$2, REM
-	jbe	L(eq)
-#endif
-	movzx	2(%eax), %ecx
-	movzx	2(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
-	sub	%ecx, %eax
-	RETURN
-
-L(Byte3):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$3, REM
-	jbe	L(eq)
-#endif
-	movzx	3(%eax), %ecx
-	movzx	3(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
-	sub	%ecx, %eax
-	RETURN
-
-L(Byte4):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$4, REM
-	jbe	L(eq)
-#endif
-	movzx	4(%eax), %ecx
-	movzx	4(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
-	sub	%ecx, %eax
-	RETURN
-
-L(Byte5):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$5, REM
-	jbe	L(eq)
-#endif
-	movzx	5(%eax), %ecx
-	movzx	5(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
-	sub	%ecx, %eax
-	RETURN
-
-L(Byte6):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$6, REM
-	jbe	L(eq)
-#endif
-	movzx	6(%eax), %ecx
-	movzx	6(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
-	sub	%ecx, %eax
-	RETURN
-
-L(2next_8_bytes):
-	add	$8, %eax
-	add	$8, %edx
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$8, REM
-	lea	-8(REM), REM
-	jbe	L(eq)
-#endif
-
-	test	$0x01, %ch
-	jnz	L(Byte0)
-
-	test	$0x02, %ch
-	jnz	L(Byte1)
-
-	test	$0x04, %ch
-	jnz	L(Byte2)
-
-	test	$0x08, %ch
-	jnz	L(Byte3)
-
-	test	$0x10, %ch
-	jnz	L(Byte4)
-
-	test	$0x20, %ch
-	jnz	L(Byte5)
-
-	test	$0x40, %ch
-	jnz	L(Byte6)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	cmp	$7, REM
-	jbe	L(eq)
-#endif
-	movzx	7(%eax), %ecx
-	movzx	7(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
-	sub	%ecx, %eax
-	RETURN
-
-#ifdef USE_AS_STRNCMP
-L(neq_sncmp):
-#endif
-L(neq):
-	mov	$1, %eax
-	ja	L(neq_bigger)
-	neg	%eax
-L(neq_bigger):
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-	addl	$4, %esp
-	cfi_adjust_cfa_offset (-4)
-#endif
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	POP	(REM)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	POP	(%ebx)
-# endif
-#endif
-	ret
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	.p2align 4
-	cfi_restore_state
-L(more8byteseq):
-
-# ifdef USE_AS_STRNCASECMP_L
-	addl	$4, %esp
-	cfi_adjust_cfa_offset (-4)
-# endif
-	POP	(%esi)
-	POP	(%edi)
-# ifdef USE_AS_STRNCMP
-	POP	(FLAGS)
-# endif
-#endif
-
-#ifdef USE_AS_STRNCMP
-L(eq_sncmp):
-#endif
-L(eq):
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	POP	(REM)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
-	POP	(%ebx)
-# endif
-#endif
-	xorl	%eax, %eax
-	ret
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
-	.p2align 4
-# if defined USE_AS_STRNCASECMP_L && defined PIC
-	CFI_PUSH (%ebx)
-# endif
-	CFI_PUSH (REM)
-L(less16bytes_sncmp):
-# ifdef USE_AS_STRNCASECMP_L
-	PUSH	(%esi)
-# endif
-	test	REM, REM
-	jz	L(eq_sncmp)
-
-	movzbl	(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, (%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$1, REM
-	je	L(eq_sncmp)
-
-	movzbl	1(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	1(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 1(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$2, REM
-	je	L(eq_sncmp)
-
-	movzbl	2(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	2(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 2(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$3, REM
-	je	L(eq_sncmp)
-
-	movzbl	3(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	3(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 3(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$4, REM
-	je	L(eq_sncmp)
-
-	movzbl	4(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	4(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 4(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$5, REM
-	je	L(eq_sncmp)
-
-	movzbl	5(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	5(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 5(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$6, REM
-	je	L(eq_sncmp)
-
-	movzbl	6(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	6(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 6(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$7, REM
-	je	L(eq_sncmp)
-
-	movzbl	7(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	7(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 7(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-
-	cmp	$8, REM
-	je	L(eq_sncmp)
-
-	movzbl	8(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	8(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 8(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$9, REM
-	je	L(eq_sncmp)
-
-	movzbl	9(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	9(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 9(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$10, REM
-	je	L(eq_sncmp)
-
-	movzbl	10(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	10(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 10(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$11, REM
-	je	L(eq_sncmp)
-
-	movzbl	11(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	11(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 11(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-
-	cmp	$12, REM
-	je	L(eq_sncmp)
-
-	movzbl	12(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	12(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 12(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$13, REM
-	je	L(eq_sncmp)
-
-	movzbl	13(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	13(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 13(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$14, REM
-	je	L(eq_sncmp)
-
-	movzbl	14(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	14(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 14(%edx)
-# endif
-	jne	L(neq_sncmp)
-	test	%cl, %cl
-	je	L(eq_sncmp)
-
-	cmp	$15, REM
-	je	L(eq_sncmp)
-
-	movzbl	15(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
-	movzbl	15(%edx), %esi
-#  ifdef PIC
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-#  else
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-#  endif
-	cmpl	%ecx, %esi
-# else
-	cmpb	%cl, 15(%edx)
-# endif
-	jne	L(neq_sncmp)
-
-# ifdef USE_AS_STRNCASECMP_L
-L(eq_sncmp):
-	POP	(%esi)
-# endif
-	POP	(REM)
-# if defined USE_AS_STRNCASECMP_L && defined PIC
-	POP	(%ebx)
-# endif
-	xor	%eax, %eax
-	ret
-
-# ifdef USE_AS_STRNCASECMP_L
-	.p2align 4
-#  ifdef PIC
-	CFI_PUSH (%ebx)
-#  endif
-	CFI_PUSH (REM)
-	CFI_PUSH (%esi)
-L(neq_sncmp):
-	mov	$1, %eax
-	mov	$-1, %edx
-	cmovna	%edx, %eax
-	POP	(%esi)
-	POP	(REM)
-#  ifdef PIC
-	POP	(%ebx)
-#  endif
-	ret
-# endif
-#endif
-
-END (STRCMP)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S
deleted file mode 100644
index 56de25a4b7..0000000000
--- a/sysdeps/i386/i686/multiarch/strcmp.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/* Multiple versions of strcmp
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifdef USE_AS_STRNCMP
-# define STRCMP			strncmp
-# define __GI_STRCMP		__GI_strncmp
-# define __STRCMP_IA32		__strncmp_ia32
-# define __STRCMP_SSSE3		__strncmp_ssse3
-# define __STRCMP_SSE4_2	__strncmp_sse4_2
-#elif defined USE_AS_STRCASECMP_L
-# define STRCMP			__strcasecmp_l
-# define __GI_STRCMP		__GI_strcasecmp_l
-# define __STRCMP_IA32		__strcasecmp_l_ia32
-# define __STRCMP_SSSE3		__strcasecmp_l_ssse3
-# define __STRCMP_SSE4_2	__strcasecmp_l_sse4_2
-#elif defined USE_AS_STRNCASECMP_L
-# define STRCMP			__strncasecmp_l
-# define __GI_STRCMP		__GI_strncasecmp_l
-# define __STRCMP_IA32		__strncasecmp_l_ia32
-# define __STRCMP_SSSE3		__strncasecmp_l_ssse3
-# define __STRCMP_SSE4_2	__strncasecmp_l_sse4_2
-#else
-# define STRCMP			strcmp
-# define __GI_STRCMP		__GI_strcmp
-# define __STRCMP_IA32		__strcmp_ia32
-# define __STRCMP_SSSE3		__strcmp_ssse3
-# define __STRCMP_SSE4_2	__strcmp_sse4_2
-#endif
-
-/* Define multiple versions only for the definition in libc.  Don't
-   define multiple versions for strncmp in static library since we
-   need strncmp before the initialization happened.  */
-#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc)
-	.text
-ENTRY(STRCMP)
-	.type	STRCMP, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__STRCMP_IA32)
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__STRCMP_SSSE3)
-	HAS_CPU_FEATURE (SSE4_2)
-	jz	2f
-	HAS_ARCH_FEATURE (Slow_SSE4_2)
-	jnz	2f
-	LOAD_FUNC_GOT_EAX (__STRCMP_SSE4_2)
-2:	ret
-END(STRCMP)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __STRCMP_IA32, @function; \
-	.p2align 4; \
-	.globl __STRCMP_IA32; \
-	.hidden __STRCMP_IA32; \
-	__STRCMP_IA32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __STRCMP_IA32, .-__STRCMP_IA32
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCMP; __GI_STRCMP = __STRCMP_IA32
-# endif
-#endif
-
-#if !defined USE_AS_STRNCMP && !defined USE_AS_STRCASECMP_L \
-    && !defined USE_AS_STRNCASECMP_L
-# include "../strcmp.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
deleted file mode 100644
index ed627a5f62..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S
+++ /dev/null
@@ -1,2250 +0,0 @@
-/* strcpy with SSE2 and unaligned load
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-
-# define CFI_PUSH(REG)                  \
-	cfi_adjust_cfa_offset (4);     \
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)                   \
-	cfi_adjust_cfa_offset (-4);    \
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCPY
-#  define STRCPY  __strcpy_sse2
-# endif
-
-# define STR1  PARMS
-# define STR2  STR1+4
-# define LEN  STR2+4
-
-# ifdef USE_AS_STRNCPY
-#  define PARMS  16
-#  define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
-#  define RETURN  POP(%edi); POP(%esi); POP(%ebx); ret;          \
-	CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
-
-# ifdef SHARED
-#  define JMPTBL(I, B)	I - B
-
-/* Load an entry in a jump table into ECX and branch to it. TABLE is a
-	jump table with relative offsets.
-	INDEX is a register contains the index into the jump table.
-	SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)            \
-	/* We first load PC into ECX.  */                       \
-	SETUP_PIC_REG(cx);                                      \
-	/* Get the address of the jump table.  */               \
-	addl	$(TABLE - .), %ecx;                             \
-	/* Get the entry and convert the relative offset to the \
-	absolute	address.  */                            \
-	addl	(%ecx,INDEX,SCALE), %ecx;                       \
-	/* We loaded the jump table and adjusted ECX. Go.  */  \
-	jmp	*%ecx
-# else
-#  define JMPTBL(I, B)	I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-	absolute	offsets.  INDEX is a register contains the index into the
-	jump	table.  SCALE is the scale of INDEX. */
-
-#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-	jmp	*TABLE(,INDEX,SCALE)
-# endif
-
-.text
-ENTRY (STRCPY)
-	ENTRANCE
-	mov	STR1(%esp), %edi
-	mov	STR2(%esp), %esi
-	movl	LEN(%esp), %ebx
-	test	%ebx, %ebx
-	jz	L(ExitZero)
-
-	mov	%esi, %ecx
-# ifndef USE_AS_STPCPY
-	mov	%edi, %eax      /* save result */
-# endif
-	and	$15, %ecx
-	jz	L(SourceStringAlignmentZero)
-
-	and	$-16, %esi
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-
-	pcmpeqb	(%esi), %xmm1
-	add	%ecx, %ebx
-	pmovmskb %xmm1, %edx
-	shr	%cl, %edx
-# ifdef USE_AS_STPCPY
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
-# else
-	cmp	$17, %ebx
-	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail)
-
-	pcmpeqb	16(%esi), %xmm0
-	pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32BytesCase2OrCase3)
-# else
-	cmp	$33, %ebx
-	jbe	L(CopyFrom1To32BytesCase2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes)
-
-	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
-	movdqu	%xmm1, (%edi)
-
-	sub	%ecx, %edi
-
-/* If source address alignment != destination address alignment */
-	.p2align 4
-L(Unalign16Both):
-	mov	$16, %ecx
-	movdqa	(%esi, %ecx), %xmm1
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%edi, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$48, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
-
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%edi, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
-
-	movaps	16(%esi, %ecx), %xmm4
-	movdqu	%xmm3, (%edi, %ecx)
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
-
-	movaps	16(%esi, %ecx), %xmm1
-	movdqu	%xmm4, (%edi, %ecx)
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm1)
-
-	movaps	16(%esi, %ecx), %xmm2
-	movdqu	%xmm1, (%edi, %ecx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
-
-	movaps	16(%esi, %ecx), %xmm3
-	movdqu	%xmm2, (%edi, %ecx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
-
-	movdqu	%xmm3, (%edi, %ecx)
-	mov	%esi, %edx
-	lea	16(%esi, %ecx), %esi
-	and	$-0x40, %esi
-	sub	%esi, %edx
-	sub	%edx, %edi
-	lea	128(%ebx, %edx), %ebx
-
-L(Unaligned64Loop):
-	movaps	(%esi), %xmm2
-	movaps	%xmm2, %xmm4
-	movaps	16(%esi), %xmm5
-	movaps	32(%esi), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	48(%esi), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(Unaligned64Leave)
-L(Unaligned64Loop_start):
-	add	$64, %edi
-	add	$64, %esi
-	movdqu	%xmm4, -64(%edi)
-	movaps	(%esi), %xmm2
-	movdqa	%xmm2, %xmm4
-	movdqu	%xmm5, -48(%edi)
-	movaps	16(%esi), %xmm5
-	pminub	%xmm5, %xmm2
-	movaps	32(%esi), %xmm3
-	movdqu	%xmm6, -32(%edi)
-	movaps	%xmm3, %xmm6
-	movdqu	%xmm7, -16(%edi)
-	movaps	48(%esi), %xmm7
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %edx
-	sub	$64, %ebx
-	jbe	L(UnalignedLeaveCase2OrCase3)
-	test	%edx, %edx
-	jz	L(Unaligned64Loop_start)
-L(Unaligned64Leave):
-	pxor	%xmm1, %xmm1
-
-	pcmpeqb	%xmm4, %xmm0
-	pcmpeqb	%xmm5, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_0)
-	test	%ecx, %ecx
-	jnz	L(CopyFrom1To16BytesUnaligned_16)
-
-	pcmpeqb	%xmm6, %xmm0
-	pcmpeqb	%xmm7, %xmm1
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %ecx
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnaligned_32)
-
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%edi)
-	movdqu	%xmm5, 16(%edi)
-	movdqu	%xmm6, 32(%edi)
-# ifdef USE_AS_STPCPY
-	lea	48(%edi, %edx), %eax
-# endif
-	movdqu	%xmm7, 48(%edi)
-	add	$15, %ebx
-	sub	%edx, %ebx
-	lea	49(%edi, %edx), %edi
-	jmp	L(StrncpyFillTailWithZero)
-
-/* If source address alignment == destination address alignment */
-
-L(SourceStringAlignmentZero):
-	pxor	%xmm0, %xmm0
-	movdqa	(%esi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
-	cmp	$16, %ebx
-	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
-# else
-	cmp	$17, %ebx
-	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1)
-
-	pcmpeqb	16(%esi), %xmm0
-	movdqu	%xmm1, (%edi)
-	pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
-	cmp	$32, %ebx
-	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
-# else
-	cmp	$33, %ebx
-	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
-# endif
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32Bytes1)
-
-	jmp	L(Unalign16Both)
-
-/*-----------------End of main part---------------------------*/
-
-/* Case1 */
-	.p2align 4
-L(CopyFrom1To16BytesTail):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1):
-	add	$16, %esi
-	add	$16, %edi
-	sub	$16, %ebx
-L(CopyFrom1To16BytesTail1):
-	bsf	%edx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes):
-	sub	%ecx, %ebx
-	bsf	%edx, %edx
-	add	%ecx, %esi
-	add	$16, %edx
-	sub	%ecx, %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_0):
-	bsf	%edx, %edx
-# ifdef USE_AS_STPCPY
-	lea	(%edi, %edx), %eax
-# endif
-	movdqu	%xmm4, (%edi)
-	add	$63, %ebx
-	sub	%edx, %ebx
-	lea	1(%edi, %edx), %edi
-	jmp	L(StrncpyFillTailWithZero)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_16):
-	bsf	%ecx, %edx
-	movdqu	%xmm4, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	16(%edi, %edx), %eax
-# endif
-	movdqu	%xmm5, 16(%edi)
-	add	$47, %ebx
-	sub	%edx, %ebx
-	lea	17(%edi, %edx), %edi
-	jmp	L(StrncpyFillTailWithZero)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnaligned_32):
-	bsf	%edx, %edx
-	movdqu	%xmm4, (%edi)
-	movdqu	%xmm5, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	32(%edi, %edx), %eax
-# endif
-	movdqu	%xmm6, 32(%edi)
-	add	$31, %ebx
-	sub	%edx, %ebx
-	lea	33(%edi, %edx), %edi
-	jmp	L(StrncpyFillTailWithZero)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm6):
-	movdqu	%xmm6, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm5):
-	movdqu	%xmm5, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm4):
-	movdqu	%xmm4, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm3):
-	movdqu	%xmm3, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm1):
-	movdqu	%xmm1, (%edi, %ecx)
-	jmp	L(CopyFrom1To16BytesXmmExit)
-
-	.p2align 4
-L(CopyFrom1To16BytesExit):
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-/* Case2 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%ecx, %edi
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	add	$16, %edx
-	sub	%ecx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTailCase2):
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTail1Case2):
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-/* Case2 or Case3,  Case3 */
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesCase2)
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%ecx, %edi
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To32BytesCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTailCase2)
-	sub	%ecx, %ebx
-	add	%ecx, %esi
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
-	add	$16, %edi
-	add	$16, %esi
-	sub	$16, %ebx
-L(CopyFrom1To16BytesTail1Case2OrCase3):
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesTail1Case2)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(Exit0):
-# ifdef USE_AS_STPCPY
-	mov	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit1):
-	movb	%dh, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	(%edi), %eax
-# endif
-	sub	$1, %ebx
-	lea	1(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	movw	(%esi), %dx
-	movw	%dx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	1(%edi), %eax
-# endif
-	sub	$2, %ebx
-	lea	2(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	movw	(%esi), %cx
-	movw	%cx, (%edi)
-	movb	%dh, 2(%edi)
-# ifdef USE_AS_STPCPY
-	lea	2(%edi), %eax
-# endif
-	sub	$3, %ebx
-	lea	3(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	movl	(%esi), %edx
-	movl	%edx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	3(%edi), %eax
-# endif
-	sub	$4, %ebx
-	lea	4(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit5):
-	movl	(%esi), %ecx
-	movb	%dh, 4(%edi)
-	movl	%ecx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	4(%edi), %eax
-# endif
-	sub	$5, %ebx
-	lea	5(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	movl	(%esi), %ecx
-	movw	4(%esi), %dx
-	movl	%ecx, (%edi)
-	movw	%dx, 4(%edi)
-# ifdef USE_AS_STPCPY
-	lea	5(%edi), %eax
-# endif
-	sub	$6, %ebx
-	lea	6(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	movl	(%esi), %ecx
-	movl	3(%esi), %edx
-	movl	%ecx, (%edi)
-	movl	%edx, 3(%edi)
-# ifdef USE_AS_STPCPY
-	lea	6(%edi), %eax
-# endif
-	sub	$7, %ebx
-	lea	7(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit8):
-	movlpd	(%esi), %xmm0
-	movlpd	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	7(%edi), %eax
-# endif
-	sub	$8, %ebx
-	lea	8(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit9):
-	movlpd	(%esi), %xmm0
-	movb	%dh, 8(%edi)
-	movlpd	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	8(%edi), %eax
-# endif
-	sub	$9, %ebx
-	lea	9(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	movlpd	(%esi), %xmm0
-	movw	8(%esi), %dx
-	movlpd	%xmm0, (%edi)
-	movw	%dx, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	9(%edi), %eax
-# endif
-	sub	$10, %ebx
-	lea	10(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	movlpd	(%esi), %xmm0
-	movl	7(%esi), %edx
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 7(%edi)
-# ifdef USE_AS_STPCPY
-	lea	10(%edi), %eax
-# endif
-	sub	$11, %ebx
-	lea	11(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	movlpd	(%esi), %xmm0
-	movl	8(%esi), %edx
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	11(%edi), %eax
-# endif
-	sub	$12, %ebx
-	lea	12(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%esi), %xmm0
-	movlpd	5(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 5(%edi)
-# ifdef USE_AS_STPCPY
-	lea	12(%edi), %eax
-# endif
-	sub	$13, %ebx
-	lea	13(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%esi), %xmm0
-	movlpd	6(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 6(%edi)
-# ifdef USE_AS_STPCPY
-	lea	13(%edi), %eax
-# endif
-	sub	$14, %ebx
-	lea	14(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%esi), %xmm0
-	movlpd	7(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 7(%edi)
-# ifdef USE_AS_STPCPY
-	lea	14(%edi), %eax
-# endif
-	sub	$15, %ebx
-	lea	15(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit16):
-	movdqu	(%esi), %xmm0
-	movdqu	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	15(%edi), %eax
-# endif
-	sub	$16, %ebx
-	lea	16(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit17):
-	movdqu	(%esi), %xmm0
-	movdqu	%xmm0, (%edi)
-	movb	%dh, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	16(%edi), %eax
-# endif
-	sub	$17, %ebx
-	lea	17(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit18):
-	movdqu	(%esi), %xmm0
-	movw	16(%esi), %cx
-	movdqu	%xmm0, (%edi)
-	movw	%cx, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	17(%edi), %eax
-# endif
-	sub	$18, %ebx
-	lea	18(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit19):
-	movdqu	(%esi), %xmm0
-	movl	15(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	18(%edi), %eax
-# endif
-	sub	$19, %ebx
-	lea	19(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit20):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	19(%edi), %eax
-# endif
-	sub	$20, %ebx
-	lea	20(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit21):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 16(%edi)
-	movb	%dh, 20(%edi)
-# ifdef USE_AS_STPCPY
-	lea	20(%edi), %eax
-# endif
-	sub	$21, %ebx
-	lea	21(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit22):
-	movdqu	(%esi), %xmm0
-	movlpd	14(%esi), %xmm3
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm3, 14(%edi)
-# ifdef USE_AS_STPCPY
-	lea	21(%edi), %eax
-# endif
-	sub	$22, %ebx
-	lea	22(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit23):
-	movdqu	(%esi), %xmm0
-	movlpd	15(%esi), %xmm3
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm3, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	22(%edi), %eax
-# endif
-	sub	$23, %ebx
-	lea	23(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit24):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	23(%edi), %eax
-# endif
-	sub	$24, %ebx
-	lea	24(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit25):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movb	%dh, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	24(%edi), %eax
-# endif
-	sub	$25, %ebx
-	lea	25(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit26):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movw	24(%esi), %cx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movw	%cx, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	25(%edi), %eax
-# endif
-	sub	$26, %ebx
-	lea	26(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit27):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	23(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movl	%ecx, 23(%edi)
-# ifdef USE_AS_STPCPY
-	lea	26(%edi), %eax
-# endif
-	sub	$27, %ebx
-	lea	27(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit28):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	24(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movl	%ecx, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	27(%edi), %eax
-# endif
-	sub	$28, %ebx
-	lea	28(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit29):
-	movdqu	(%esi), %xmm0
-	movdqu	13(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 13(%edi)
-# ifdef USE_AS_STPCPY
-	lea	28(%edi), %eax
-# endif
-	sub	$29, %ebx
-	lea	29(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit30):
-	movdqu	(%esi), %xmm0
-	movdqu	14(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 14(%edi)
-# ifdef USE_AS_STPCPY
-	lea	29(%edi), %eax
-# endif
-	sub	$30, %ebx
-	lea	30(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-
-	.p2align 4
-L(Exit31):
-	movdqu	(%esi), %xmm0
-	movdqu	15(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	30(%edi), %eax
-# endif
-	sub	$31, %ebx
-	lea	31(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(Exit32):
-	movdqu	(%esi), %xmm0
-	movdqu	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	31(%edi), %eax
-# endif
-	sub	$32, %ebx
-	lea	32(%edi), %edi
-	jnz	L(StrncpyFillTailWithZero)
-	RETURN
-
-	.p2align 4
-L(StrncpyExit1):
-	movb	(%esi), %dl
-	movb	%dl, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	1(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit2):
-	movw	(%esi), %dx
-	movw	%dx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	2(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit3):
-	movw	(%esi), %cx
-	movb	2(%esi), %dl
-	movw	%cx, (%edi)
-	movb	%dl, 2(%edi)
-# ifdef USE_AS_STPCPY
-	lea	3(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit4):
-	movl	(%esi), %edx
-	movl	%edx, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	4(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit5):
-	movl	(%esi), %ecx
-	movb	4(%esi), %dl
-	movl	%ecx, (%edi)
-	movb	%dl, 4(%edi)
-# ifdef USE_AS_STPCPY
-	lea	5(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit6):
-	movl	(%esi), %ecx
-	movw	4(%esi), %dx
-	movl	%ecx, (%edi)
-	movw	%dx, 4(%edi)
-# ifdef USE_AS_STPCPY
-	lea	6(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit7):
-	movl	(%esi), %ecx
-	movl	3(%esi), %edx
-	movl	%ecx, (%edi)
-	movl	%edx, 3(%edi)
-# ifdef USE_AS_STPCPY
-	lea	7(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit8):
-	movlpd	(%esi), %xmm0
-	movlpd	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	8(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit9):
-	movlpd	(%esi), %xmm0
-	movb	8(%esi), %dl
-	movlpd	%xmm0, (%edi)
-	movb	%dl, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	9(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit10):
-	movlpd	(%esi), %xmm0
-	movw	8(%esi), %dx
-	movlpd	%xmm0, (%edi)
-	movw	%dx, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	10(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit11):
-	movlpd	(%esi), %xmm0
-	movl	7(%esi), %edx
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 7(%edi)
-# ifdef USE_AS_STPCPY
-	lea	11(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit12):
-	movlpd	(%esi), %xmm0
-	movl	8(%esi), %edx
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 8(%edi)
-# ifdef USE_AS_STPCPY
-	lea	12(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit13):
-	movlpd	(%esi), %xmm0
-	movlpd	5(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 5(%edi)
-# ifdef USE_AS_STPCPY
-	lea	13(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit14):
-	movlpd	(%esi), %xmm0
-	movlpd	6(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 6(%edi)
-# ifdef USE_AS_STPCPY
-	lea	14(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit15):
-	movlpd	(%esi), %xmm0
-	movlpd	7(%esi), %xmm1
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm1, 7(%edi)
-# ifdef USE_AS_STPCPY
-	lea	15(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit16):
-	movdqu	(%esi), %xmm0
-	movdqu	%xmm0, (%edi)
-# ifdef USE_AS_STPCPY
-	lea	16(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit17):
-	movdqu	(%esi), %xmm0
-	movb	16(%esi), %cl
-	movdqu	%xmm0, (%edi)
-	movb	%cl, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	17(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit18):
-	movdqu	(%esi), %xmm0
-	movw	16(%esi), %cx
-	movdqu	%xmm0, (%edi)
-	movw	%cx, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	18(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit19):
-	movdqu	(%esi), %xmm0
-	movl	15(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	19(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit20):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	20(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit21):
-	movdqu	(%esi), %xmm0
-	movl	16(%esi), %ecx
-	movb	20(%esi), %dl
-	movdqu	%xmm0, (%edi)
-	movl	%ecx, 16(%edi)
-	movb	%dl, 20(%edi)
-# ifdef USE_AS_STPCPY
-	lea	21(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit22):
-	movdqu	(%esi), %xmm0
-	movlpd	14(%esi), %xmm3
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm3, 14(%edi)
-# ifdef USE_AS_STPCPY
-	lea	22(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit23):
-	movdqu	(%esi), %xmm0
-	movlpd	15(%esi), %xmm3
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm3, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	23(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit24):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	24(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit25):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movb	24(%esi), %cl
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movb	%cl, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	25(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit26):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movw	24(%esi), %cx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movw	%cx, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	26(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit27):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	23(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movl	%ecx, 23(%edi)
-# ifdef USE_AS_STPCPY
-	lea	27(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit28):
-	movdqu	(%esi), %xmm0
-	movlpd	16(%esi), %xmm2
-	movl	24(%esi), %ecx
-	movdqu	%xmm0, (%edi)
-	movlpd	%xmm2, 16(%edi)
-	movl	%ecx, 24(%edi)
-# ifdef USE_AS_STPCPY
-	lea	28(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit29):
-	movdqu	(%esi), %xmm0
-	movdqu	13(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 13(%edi)
-# ifdef USE_AS_STPCPY
-	lea	29(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit30):
-	movdqu	(%esi), %xmm0
-	movdqu	14(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 14(%edi)
-# ifdef USE_AS_STPCPY
-	lea	30(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit31):
-	movdqu	(%esi), %xmm0
-	movdqu	15(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 15(%edi)
-# ifdef USE_AS_STPCPY
-	lea	31(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit32):
-	movdqu	(%esi), %xmm0
-	movdqu	16(%esi), %xmm2
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
-	lea	32(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit33):
-	movdqu	(%esi), %xmm0
-	movdqu	16(%esi), %xmm2
-	movb	32(%esi), %cl
-	movdqu	%xmm0, (%edi)
-	movdqu	%xmm2, 16(%edi)
-	movb	%cl, 32(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill0):
-	RETURN
-
-	.p2align 4
-L(Fill1):
-	movb	%dl, (%edi)
-	RETURN
-
-	.p2align 4
-L(Fill2):
-	movw	%dx, (%edi)
-	RETURN
-
-	.p2align 4
-L(Fill3):
-	movl	%edx, -1(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill4):
-	movl	%edx, (%edi)
-	RETURN
-
-	.p2align 4
-L(Fill5):
-	movl	%edx, (%edi)
-	movb	%dl, 4(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill6):
-	movl	%edx, (%edi)
-	movw	%dx, 4(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill7):
-	movlpd	%xmm0, -1(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill8):
-	movlpd	%xmm0, (%edi)
-	RETURN
-
-	.p2align 4
-L(Fill9):
-	movlpd	%xmm0, (%edi)
-	movb	%dl, 8(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill10):
-	movlpd	%xmm0, (%edi)
-	movw	%dx, 8(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill11):
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 7(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill12):
-	movlpd	%xmm0, (%edi)
-	movl	%edx, 8(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill13):
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm0, 5(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill14):
-	movlpd	%xmm0, (%edi)
-	movlpd	%xmm0, 6(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill15):
-	movdqu	%xmm0, -1(%edi)
-	RETURN
-
-	.p2align 4
-L(Fill16):
-	movdqu	%xmm0, (%edi)
-	RETURN
-
-	.p2align 4
-L(CopyFrom1To16BytesUnalignedXmm2):
-	movdqu	%xmm2, (%edi, %ecx)
-
-	.p2align 4
-L(CopyFrom1To16BytesXmmExit):
-	bsf	%edx, %edx
-	add	$15, %ebx
-	add	%ecx, %edi
-# ifdef USE_AS_STPCPY
-	lea	(%edi, %edx), %eax
-# endif
-	sub	%edx, %ebx
-	lea	1(%edi, %edx), %edi
-
-	.p2align 4
-L(StrncpyFillTailWithZero):
-	pxor	%xmm0, %xmm0
-	xor	%edx, %edx
-	sub	$16, %ebx
-	jbe	L(StrncpyFillExit)
-
-	movdqu	%xmm0, (%edi)
-	add	$16, %edi
-
-	mov	%edi, %esi
-	and	$0xf, %esi
-	sub	%esi, %edi
-	add	%esi, %ebx
-	sub	$64, %ebx
-	jb	L(StrncpyFillLess64)
-
-L(StrncpyFillLoopMovdqa):
-	movdqa	%xmm0, (%edi)
-	movdqa	%xmm0, 16(%edi)
-	movdqa	%xmm0, 32(%edi)
-	movdqa	%xmm0, 48(%edi)
-	add	$64, %edi
-	sub	$64, %ebx
-	jae	L(StrncpyFillLoopMovdqa)
-
-L(StrncpyFillLess64):
-	add	$32, %ebx
-	jl	L(StrncpyFillLess32)
-	movdqa	%xmm0, (%edi)
-	movdqa	%xmm0, 16(%edi)
-	add	$32, %edi
-	sub	$16, %ebx
-	jl	L(StrncpyFillExit)
-	movdqa	%xmm0, (%edi)
-	add	$16, %edi
-	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-L(StrncpyFillLess32):
-	add	$16, %ebx
-	jl	L(StrncpyFillExit)
-	movdqa	%xmm0, (%edi)
-	add	$16, %edi
-	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-L(StrncpyFillExit):
-	add	$16, %ebx
-	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-	.p2align 4
-L(UnalignedLeaveCase2OrCase3):
-	test	%edx, %edx
-	jnz	L(Unaligned64LeaveCase2)
-L(Unaligned64LeaveCase3):
-	lea	64(%ebx), %ecx
-	and	$-16, %ecx
-	add	$48, %ebx
-	jl	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm4, (%edi)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm5, 16(%edi)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm6, 32(%edi)
-	sub	$16, %ebx
-	jb	L(CopyFrom1To16BytesCase3)
-	movdqu	%xmm7, 48(%edi)
-# ifdef USE_AS_STPCPY
-	lea	64(%edi), %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Unaligned64LeaveCase2):
-	xor	%ecx, %ecx
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %edx
-	add	$48, %ebx
-	jle	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
-
-	pcmpeqb	%xmm5, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm4, (%edi)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm5)
-
-	pcmpeqb	%xmm6, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm5, 16(%edi)
-	add	$16, %ecx
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%edx, %edx
-	jnz	L(CopyFrom1To16BytesUnalignedXmm6)
-
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb %xmm0, %edx
-	movdqu	%xmm6, 32(%edi)
-	lea	16(%edi, %ecx), %edi
-	lea	16(%esi, %ecx), %esi
-	bsf	%edx, %edx
-	cmp	%ebx, %edx
-	jb	L(CopyFrom1To16BytesExit)
-	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-	.p2align 4
-L(ExitZero):
-	movl	%edi, %eax
-	RETURN
-
-END (STRCPY)
-
-	.p2align 4
-	.section .rodata
-L(ExitTable):
-	.int	JMPTBL(L(Exit1), L(ExitTable))
-	.int	JMPTBL(L(Exit2), L(ExitTable))
-	.int	JMPTBL(L(Exit3), L(ExitTable))
-	.int	JMPTBL(L(Exit4), L(ExitTable))
-	.int	JMPTBL(L(Exit5), L(ExitTable))
-	.int	JMPTBL(L(Exit6), L(ExitTable))
-	.int	JMPTBL(L(Exit7), L(ExitTable))
-	.int	JMPTBL(L(Exit8), L(ExitTable))
-	.int	JMPTBL(L(Exit9), L(ExitTable))
-	.int	JMPTBL(L(Exit10), L(ExitTable))
-	.int	JMPTBL(L(Exit11), L(ExitTable))
-	.int	JMPTBL(L(Exit12), L(ExitTable))
-	.int	JMPTBL(L(Exit13), L(ExitTable))
-	.int	JMPTBL(L(Exit14), L(ExitTable))
-	.int	JMPTBL(L(Exit15), L(ExitTable))
-	.int	JMPTBL(L(Exit16), L(ExitTable))
-	.int	JMPTBL(L(Exit17), L(ExitTable))
-	.int	JMPTBL(L(Exit18), L(ExitTable))
-	.int	JMPTBL(L(Exit19), L(ExitTable))
-	.int	JMPTBL(L(Exit20), L(ExitTable))
-	.int	JMPTBL(L(Exit21), L(ExitTable))
-	.int	JMPTBL(L(Exit22), L(ExitTable))
-	.int    JMPTBL(L(Exit23), L(ExitTable))
-	.int	JMPTBL(L(Exit24), L(ExitTable))
-	.int	JMPTBL(L(Exit25), L(ExitTable))
-	.int	JMPTBL(L(Exit26), L(ExitTable))
-	.int	JMPTBL(L(Exit27), L(ExitTable))
-	.int	JMPTBL(L(Exit28), L(ExitTable))
-	.int	JMPTBL(L(Exit29), L(ExitTable))
-	.int	JMPTBL(L(Exit30), L(ExitTable))
-	.int	JMPTBL(L(Exit31), L(ExitTable))
-	.int	JMPTBL(L(Exit32), L(ExitTable))
-
-L(ExitStrncpyTable):
-	.int	JMPTBL(L(Exit0), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
-	.int    JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
-	.int	JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
-
-	.p2align 4
-L(FillTable):
-	.int	JMPTBL(L(Fill0), L(FillTable))
-	.int	JMPTBL(L(Fill1), L(FillTable))
-	.int	JMPTBL(L(Fill2), L(FillTable))
-	.int	JMPTBL(L(Fill3), L(FillTable))
-	.int	JMPTBL(L(Fill4), L(FillTable))
-	.int	JMPTBL(L(Fill5), L(FillTable))
-	.int	JMPTBL(L(Fill6), L(FillTable))
-	.int	JMPTBL(L(Fill7), L(FillTable))
-	.int	JMPTBL(L(Fill8), L(FillTable))
-	.int	JMPTBL(L(Fill9), L(FillTable))
-	.int	JMPTBL(L(Fill10), L(FillTable))
-	.int	JMPTBL(L(Fill11), L(FillTable))
-	.int	JMPTBL(L(Fill12), L(FillTable))
-	.int	JMPTBL(L(Fill13), L(FillTable))
-	.int	JMPTBL(L(Fill14), L(FillTable))
-	.int	JMPTBL(L(Fill15), L(FillTable))
-	.int	JMPTBL(L(Fill16), L(FillTable))
-# else
-#  define PARMS  4
-#  define ENTRANCE
-#  define RETURN  POP (%edi); ret; CFI_PUSH (%edi)
-#  define RETURN1  ret
-
-	.text
-ENTRY (STRCPY)
-	ENTRANCE
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %ecx
-
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-	cmpb	$0, 4(%ecx)
-	jz	L(ExitTail5)
-	cmpb	$0, 5(%ecx)
-	jz	L(ExitTail6)
-	cmpb	$0, 6(%ecx)
-	jz	L(ExitTail7)
-	cmpb	$0, 7(%ecx)
-	jz	L(ExitTail8)
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-	cmpb	$0, 12(%ecx)
-	jz	L(ExitTail13)
-	cmpb	$0, 13(%ecx)
-	jz	L(ExitTail14)
-	cmpb	$0, 14(%ecx)
-	jz	L(ExitTail15)
-	cmpb	$0, 15(%ecx)
-	jz	L(ExitTail16)
-
-	PUSH	(%edi)
-	PUSH	(%ebx)
-
-	mov	%edx, %edi
-	lea	16(%ecx), %ebx
-	and	$-16, %ebx
-	pxor	%xmm0, %xmm0
-	movdqu	(%ecx), %xmm1
-	movdqu	%xmm1, (%edx)
-	pcmpeqb	(%ebx), %xmm0
-	pmovmskb %xmm0, %eax
-	sub	%ecx, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	%ecx, %eax
-	lea	16(%ecx), %ecx
-	and	$-16, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-	xor	%ebx, %ebx
-
-	.p2align 4
-	movdqa	(%ecx), %xmm1
-	movaps	16(%ecx), %xmm2
-	movdqu	%xmm1, (%edx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm3
-	movdqu	%xmm2, (%edx, %ebx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm4
-	movdqu	%xmm3, (%edx, %ebx)
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm1
-	movdqu	%xmm4, (%edx, %ebx)
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm2
-	movdqu	%xmm1, (%edx, %ebx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %ebx), %xmm3
-	movdqu	%xmm2, (%edx, %ebx)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$16, %ebx
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movdqu	%xmm3, (%edx, %ebx)
-	mov	%ecx, %eax
-	lea	16(%ecx, %ebx), %ecx
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-
-L(Aligned64Loop):
-	movaps	(%ecx), %xmm2
-	movaps	%xmm2, %xmm4
-	movaps	16(%ecx), %xmm5
-	movaps	32(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm5, %xmm2
-	add	$64, %ecx
-	pminub	%xmm7, %xmm3
-	add	$64, %edx
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm0, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(Aligned64Leave)
-L(Aligned64Loop_start):
-	movdqu	%xmm4, -64(%edx)
-	movaps	(%ecx), %xmm2
-	movdqa	%xmm2, %xmm4
-	movdqu	%xmm5, -48(%edx)
-	movaps	16(%ecx), %xmm5
-	pminub	%xmm5, %xmm2
-	movaps	32(%ecx), %xmm3
-	movdqu	%xmm6, -32(%edx)
-	movaps	%xmm3, %xmm6
-	movdqu	%xmm7, -16(%edx)
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$64, %edx
-	add	$64, %ecx
-	test	%eax, %eax
-	jz	L(Aligned64Loop_start)
-L(Aligned64Leave):
-	sub	$0xa0, %ebx
-	pxor	%xmm0, %xmm0
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm5, %xmm0
-	pmovmskb %xmm0, %eax
-	movdqu	%xmm4, -64(%edx)
-	test	%eax, %eax
-	lea	16(%ebx), %ebx
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm6, %xmm0
-	pmovmskb %xmm0, %eax
-	movdqu	%xmm5, -48(%edx)
-	test	%eax, %eax
-	lea	16(%ebx), %ebx
-	jnz	L(CopyFrom1To16Bytes)
-
-	movdqu	%xmm6, -32(%edx)
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%ebx), %ebx
-
-/*-----------------End of main part---------------------------*/
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%ebx, %edx
-	add	%ebx, %ecx
-
-	POP	(%ebx)
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	/* Exit 8 */
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	7(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	/* Exit 16 */
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm0
-	movlpd	%xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	15(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	1(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-# ifdef USE_AS_STPCPY
-	lea	2(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	3(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	4(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	5(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-# ifdef USE_AS_STPCPY
-	lea	6(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit9):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	8(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	9(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-# ifdef USE_AS_STPCPY
-	lea	10(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	11(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
-	lea	12(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
-	lea	13(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
-	lea	14(%edx), %eax
-# else
-	movl	%edi, %eax
-# endif
-	RETURN
-
-CFI_POP (%edi)
-
-	.p2align 4
-L(ExitTail1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-	movl	%edx, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitTail2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	1(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-# ifdef USE_AS_STPCPY
-	lea	2(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-# ifdef USE_AS_STPCPY
-	lea	3(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	4(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	5(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-# ifdef USE_AS_STPCPY
-	lea	6(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail8):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-# ifdef USE_AS_STPCPY
-	lea	7(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail9):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	8(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail10):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	9(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail11):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-# ifdef USE_AS_STPCPY
-	lea	10(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail12):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	4(%ecx), %eax
-	movl	%eax, 4(%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	11(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
-	lea	12(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
-	lea	13(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
-	lea	14(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-	.p2align 4
-L(ExitTail16):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm0
-	movlpd	%xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
-	lea	15(%edx), %eax
-# else
-	movl	%edx, %eax
-# endif
-	RETURN1
-
-END (STRCPY)
-# endif
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S b/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
deleted file mode 100644
index effd85da94..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
+++ /dev/null
@@ -1,3901 +0,0 @@
-/* strcpy with SSSE3
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-
-#if IS_IN (libc)
-
-# ifndef USE_AS_STRCAT
-#  include <sysdep.h>
-
-#  define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#  define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#  define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#  define POP(REG)	popl REG; CFI_POP (REG)
-
-#  ifndef STRCPY
-#   define STRCPY  __strcpy_ssse3
-#  endif
-
-#  ifdef USE_AS_STRNCPY
-#   define PARMS  8
-#   define ENTRANCE PUSH (%ebx)
-#   define RETURN  POP (%ebx); ret; CFI_PUSH (%ebx);
-#   define RETURN1  POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
-#  else
-#   define PARMS  4
-#   define ENTRANCE
-#   define RETURN  ret
-#   define RETURN1  POP (%edi); ret; CFI_PUSH (%edi)
-#  endif
-
-#  ifdef USE_AS_STPCPY
-#   define SAVE_RESULT(n)  lea	n(%edx), %eax
-#   define SAVE_RESULT_TAIL(n)  lea	n(%edx), %eax
-#  else
-#   define SAVE_RESULT(n)  movl	%edi, %eax
-#   define SAVE_RESULT_TAIL(n)  movl	%edx, %eax
-#  endif
-
-#  define STR1  PARMS
-#  define STR2  STR1+4
-#  define LEN  STR2+4
-
-/* In this code following instructions are used for copying:
-	movb	- 1 byte
-	movw	- 2 byte
-	movl	- 4 byte
-	movlpd	- 8 byte
-	movaps	- 16 byte - requires 16 byte alignment
-	of	sourse and destination adresses.
-*/
-
-.text
-ENTRY (STRCPY)
-	ENTRANCE
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %ecx
-#  ifdef USE_AS_STRNCPY
-	movl	LEN(%esp), %ebx
-	cmp	$8, %ebx
-	jbe	L(StrncpyExit8Bytes)
-#  endif
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-	cmpb	$0, 4(%ecx)
-	jz	L(ExitTail5)
-	cmpb	$0, 5(%ecx)
-	jz	L(ExitTail6)
-	cmpb	$0, 6(%ecx)
-	jz	L(ExitTail7)
-	cmpb	$0, 7(%ecx)
-	jz	L(ExitTail8)
-#  ifdef USE_AS_STRNCPY
-	cmp	$16, %ebx
-	jb	L(StrncpyExit15Bytes)
-#  endif
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-	cmpb	$0, 12(%ecx)
-	jz	L(ExitTail13)
-	cmpb	$0, 13(%ecx)
-	jz	L(ExitTail14)
-	cmpb	$0, 14(%ecx)
-	jz	L(ExitTail15)
-#  ifdef USE_AS_STRNCPY
-	cmp	$16, %ebx
-	je	L(ExitTail16)
-#  endif
-	cmpb	$0, 15(%ecx)
-	jz	L(ExitTail16)
-
-	PUSH	(%edi)
-	mov	%edx, %edi
-# endif
-	PUSH	(%esi)
-# ifdef USE_AS_STRNCPY
-	mov	%ecx, %esi
-	sub	$16, %ebx
-	and	$0xf, %esi
-
-/* add 16 bytes ecx_offset to ebx */
-
-	add	%esi, %ebx
-# endif
-	lea	16(%ecx), %esi
-	and	$-16, %esi
-	pxor	%xmm0, %xmm0
-	movlpd	(%ecx), %xmm1
-	movlpd	%xmm1, (%edx)
-
-	pcmpeqb	(%esi), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm1, 8(%edx)
-
-	pmovmskb %xmm0, %eax
-	sub	%ecx, %esi
-
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	%edx, %eax
-	lea	16(%edx), %edx
-	and	$-16, %edx
-	sub	%edx, %eax
-
-# ifdef USE_AS_STRNCPY
-	add	%eax, %esi
-	lea	-1(%esi), %esi
-	and	$1<<31, %esi
-	test	%esi, %esi
-	jnz	L(ContinueCopy)
-	lea	16(%ebx), %ebx
-
-L(ContinueCopy):
-# endif
-	sub	%eax, %ecx
-	mov	%ecx, %eax
-	and	$0xf, %eax
-	mov	$0, %esi
-
-/* case: ecx_offset == edx_offset */
-
-	jz	L(Align16Both)
-
-	cmp	$8, %eax
-	jae	L(ShlHigh8)
-	cmp	$1, %eax
-	je	L(Shl1)
-	cmp	$2, %eax
-	je	L(Shl2)
-	cmp	$3, %eax
-	je	L(Shl3)
-	cmp	$4, %eax
-	je	L(Shl4)
-	cmp	$5, %eax
-	je	L(Shl5)
-	cmp	$6, %eax
-	je	L(Shl6)
-	jmp	L(Shl7)
-
-L(ShlHigh8):
-	je	L(Shl8)
-	cmp	$9, %eax
-	je	L(Shl9)
-	cmp	$10, %eax
-	je	L(Shl10)
-	cmp	$11, %eax
-	je	L(Shl11)
-	cmp	$12, %eax
-	je	L(Shl12)
-	cmp	$13, %eax
-	je	L(Shl13)
-	cmp	$14, %eax
-	je	L(Shl14)
-	jmp	L(Shl15)
-
-L(Align16Both):
-	movaps	(%ecx), %xmm1
-	movaps	16(%ecx), %xmm2
-	movaps	%xmm1, (%edx)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm4
-	movaps	%xmm3, (%edx, %esi)
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm1
-	movaps	%xmm4, (%edx, %esi)
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm2
-	movaps	%xmm1, (%edx, %esi)
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqb	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm3, (%edx, %esi)
-	mov	%ecx, %eax
-	lea	16(%ecx, %esi), %ecx
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	lea	112(%ebx, %eax), %ebx
-# endif
-	mov	$-0x40, %esi
-
-L(Aligned64Loop):
-	movaps	(%ecx), %xmm2
-	movaps	32(%ecx), %xmm3
-	movaps	%xmm2, %xmm4
-	movaps	16(%ecx), %xmm5
-	movaps	%xmm3, %xmm6
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	lea	64(%edx), %edx
-	pcmpeqb	%xmm0, %xmm3
-	lea	64(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeaveCase2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Aligned64Leave)
-	movaps	%xmm4, -64(%edx)
-	movaps	%xmm5, -48(%edx)
-	movaps	%xmm6, -32(%edx)
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-L(Aligned64Leave):
-# ifdef USE_AS_STRNCPY
-	lea	48(%ebx), %ebx
-# endif
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm5, %xmm0
-# ifdef USE_AS_STRNCPY
-	lea	-16(%ebx), %ebx
-# endif
-	pmovmskb %xmm0, %eax
-	movaps	%xmm4, -64(%edx)
-	test	%eax, %eax
-	lea	16(%esi), %esi
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm6, %xmm0
-# ifdef USE_AS_STRNCPY
-	lea	-16(%ebx), %ebx
-# endif
-	pmovmskb %xmm0, %eax
-	movaps	%xmm5, -48(%edx)
-	test	%eax, %eax
-	lea	16(%esi), %esi
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm6, -32(%edx)
-	pcmpeqb	%xmm7, %xmm0
-# ifdef USE_AS_STRNCPY
-	lea	-16(%ebx), %ebx
-# endif
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl1):
-	movaps	-1(%ecx), %xmm1
-	movaps	15(%ecx), %xmm2
-L(Shl1Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl1LoopExit)
-
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	31(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl1LoopExit)
-
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	31(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl1LoopExit)
-
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	31(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl1LoopExit)
-
-	palignr	$1, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	31(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-15(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-1(%ecx), %xmm1
-
-L(Shl1LoopStart):
-	movaps	15(%ecx), %xmm2
-	movaps	31(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	47(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	63(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$1, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$1, %xmm3, %xmm4
-	jnz	L(Shl1Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave1)
-# endif
-	palignr	$1, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl1LoopStart)
-
-L(Shl1LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-	mov	$15, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl2):
-	movaps	-2(%ecx), %xmm1
-	movaps	14(%ecx), %xmm2
-L(Shl2Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl2LoopExit)
-
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	30(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl2LoopExit)
-
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	30(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl2LoopExit)
-
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	30(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl2LoopExit)
-
-	palignr	$2, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	30(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-14(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-2(%ecx), %xmm1
-
-L(Shl2LoopStart):
-	movaps	14(%ecx), %xmm2
-	movaps	30(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	46(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	62(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$2, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$2, %xmm3, %xmm4
-	jnz	L(Shl2Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave2)
-# endif
-	palignr	$2, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl2LoopStart)
-
-L(Shl2LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	6(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 6(%edx)
-	mov	$14, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl3):
-	movaps	-3(%ecx), %xmm1
-	movaps	13(%ecx), %xmm2
-L(Shl3Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl3LoopExit)
-
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	29(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl3LoopExit)
-
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	29(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl3LoopExit)
-
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	29(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl3LoopExit)
-
-	palignr	$3, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	29(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-13(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-3(%ecx), %xmm1
-
-L(Shl3LoopStart):
-	movaps	13(%ecx), %xmm2
-	movaps	29(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	45(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	61(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$3, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$3, %xmm3, %xmm4
-	jnz	L(Shl3Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave3)
-# endif
-	palignr	$3, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl3LoopStart)
-
-L(Shl3LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	5(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 5(%edx)
-	mov	$13, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl4):
-	movaps	-4(%ecx), %xmm1
-	movaps	12(%ecx), %xmm2
-L(Shl4Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	28(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-12(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-4(%ecx), %xmm1
-
-L(Shl4LoopStart):
-	movaps	12(%ecx), %xmm2
-	movaps	28(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	44(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	60(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$4, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$4, %xmm3, %xmm4
-	jnz	L(Shl4Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave4)
-# endif
-	palignr	$4, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl4LoopStart)
-
-L(Shl4LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 8(%edx)
-	mov	$12, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl5):
-	movaps	-5(%ecx), %xmm1
-	movaps	11(%ecx), %xmm2
-L(Shl5Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl5LoopExit)
-
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	27(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl5LoopExit)
-
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	27(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl5LoopExit)
-
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	27(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl5LoopExit)
-
-	palignr	$5, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	27(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-11(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-5(%ecx), %xmm1
-
-L(Shl5LoopStart):
-	movaps	11(%ecx), %xmm2
-	movaps	27(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	43(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	59(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$5, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$5, %xmm3, %xmm4
-	jnz	L(Shl5Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave5)
-# endif
-	palignr	$5, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl5LoopStart)
-
-L(Shl5LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	7(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 7(%edx)
-	mov	$11, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl6):
-	movaps	-6(%ecx), %xmm1
-	movaps	10(%ecx), %xmm2
-L(Shl6Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl6LoopExit)
-
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	26(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl6LoopExit)
-
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	26(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl6LoopExit)
-
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	26(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl6LoopExit)
-
-	palignr	$6, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	26(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-10(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-6(%ecx), %xmm1
-
-L(Shl6LoopStart):
-	movaps	10(%ecx), %xmm2
-	movaps	26(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	42(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	58(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$6, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$6, %xmm3, %xmm4
-	jnz	L(Shl6Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave6)
-# endif
-	palignr	$6, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl6LoopStart)
-
-L(Shl6LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	6(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 6(%edx)
-	mov	$10, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl7):
-	movaps	-7(%ecx), %xmm1
-	movaps	9(%ecx), %xmm2
-L(Shl7Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl7LoopExit)
-
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	25(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl7LoopExit)
-
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	25(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl7LoopExit)
-
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	25(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl7LoopExit)
-
-	palignr	$7, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	25(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-9(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-7(%ecx), %xmm1
-
-L(Shl7LoopStart):
-	movaps	9(%ecx), %xmm2
-	movaps	25(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	41(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	57(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$7, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$7, %xmm3, %xmm4
-	jnz	L(Shl7Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave7)
-# endif
-	palignr	$7, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl7LoopStart)
-
-L(Shl7LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	5(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 5(%edx)
-	mov	$9, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl8):
-	movaps	-8(%ecx), %xmm1
-	movaps	8(%ecx), %xmm2
-L(Shl8Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	24(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-8(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-8(%ecx), %xmm1
-
-L(Shl8LoopStart):
-	movaps	8(%ecx), %xmm2
-	movaps	24(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	40(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	56(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$8, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$8, %xmm3, %xmm4
-	jnz	L(Shl8Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave8)
-# endif
-	palignr	$8, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl8LoopStart)
-
-L(Shl8LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	mov	$8, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl9):
-	movaps	-9(%ecx), %xmm1
-	movaps	7(%ecx), %xmm2
-L(Shl9Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl9LoopExit)
-
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	23(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl9LoopExit)
-
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	23(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl9LoopExit)
-
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	23(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl9LoopExit)
-
-	palignr	$9, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	23(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-7(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-9(%ecx), %xmm1
-
-L(Shl9LoopStart):
-	movaps	7(%ecx), %xmm2
-	movaps	23(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	39(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	55(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$9, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$9, %xmm3, %xmm4
-	jnz	L(Shl9Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave9)
-# endif
-	palignr	$9, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl9LoopStart)
-
-L(Shl9LoopExit):
-	movlpd	-1(%ecx), %xmm0
-	movlpd	%xmm0, -1(%edx)
-	mov	$7, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl10):
-	movaps	-10(%ecx), %xmm1
-	movaps	6(%ecx), %xmm2
-L(Shl10Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl10LoopExit)
-
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	22(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl10LoopExit)
-
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	22(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl10LoopExit)
-
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	22(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl10LoopExit)
-
-	palignr	$10, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	22(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-6(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-10(%ecx), %xmm1
-
-L(Shl10LoopStart):
-	movaps	6(%ecx), %xmm2
-	movaps	22(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	38(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	54(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$10, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$10, %xmm3, %xmm4
-	jnz	L(Shl10Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave10)
-# endif
-	palignr	$10, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl10LoopStart)
-
-L(Shl10LoopExit):
-	movlpd	-2(%ecx), %xmm0
-	movlpd	%xmm0, -2(%edx)
-	mov	$6, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl11):
-	movaps	-11(%ecx), %xmm1
-	movaps	5(%ecx), %xmm2
-L(Shl11Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl11LoopExit)
-
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	21(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl11LoopExit)
-
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	21(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl11LoopExit)
-
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	21(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl11LoopExit)
-
-	palignr	$11, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	21(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-5(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-11(%ecx), %xmm1
-
-L(Shl11LoopStart):
-	movaps	5(%ecx), %xmm2
-	movaps	21(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	37(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	53(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$11, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$11, %xmm3, %xmm4
-	jnz	L(Shl11Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave11)
-# endif
-	palignr	$11, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl11LoopStart)
-
-L(Shl11LoopExit):
-	movlpd	-3(%ecx), %xmm0
-	movlpd	%xmm0, -3(%edx)
-	mov	$5, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl12):
-	movaps	-12(%ecx), %xmm1
-	movaps	4(%ecx), %xmm2
-L(Shl12Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	20(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-4(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-12(%ecx), %xmm1
-
-L(Shl12LoopStart):
-	movaps	4(%ecx), %xmm2
-	movaps	20(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	36(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	52(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$12, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$12, %xmm3, %xmm4
-	jnz	L(Shl12Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave12)
-# endif
-	palignr	$12, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl12LoopStart)
-
-L(Shl12LoopExit):
-	movl	(%ecx), %esi
-	movl	%esi, (%edx)
-	mov	$4, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl13):
-	movaps	-13(%ecx), %xmm1
-	movaps	3(%ecx), %xmm2
-L(Shl13Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl13LoopExit)
-
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	19(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl13LoopExit)
-
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	19(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl13LoopExit)
-
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	19(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl13LoopExit)
-
-	palignr	$13, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	19(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-3(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-13(%ecx), %xmm1
-
-L(Shl13LoopStart):
-	movaps	3(%ecx), %xmm2
-	movaps	19(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	35(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	51(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$13, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$13, %xmm3, %xmm4
-	jnz	L(Shl13Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave13)
-# endif
-	palignr	$13, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl13LoopStart)
-
-L(Shl13LoopExit):
-	movl	-1(%ecx), %esi
-	movl	%esi, -1(%edx)
-	mov	$3, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl14):
-	movaps	-14(%ecx), %xmm1
-	movaps	2(%ecx), %xmm2
-L(Shl14Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl14LoopExit)
-
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	18(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl14LoopExit)
-
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	18(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl14LoopExit)
-
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	18(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl14LoopExit)
-
-	palignr	$14, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	18(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-2(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-14(%ecx), %xmm1
-
-L(Shl14LoopStart):
-	movaps	2(%ecx), %xmm2
-	movaps	18(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	34(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	50(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$14, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$14, %xmm3, %xmm4
-	jnz	L(Shl14Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave14)
-# endif
-	palignr	$14, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl14LoopStart)
-
-L(Shl14LoopExit):
-	movl	-2(%ecx), %esi
-	movl	%esi, -2(%edx)
-	mov	$2, %esi
-	jmp	L(CopyFrom1To16Bytes)
-
-	.p2align 4
-L(Shl15):
-	movaps	-15(%ecx), %xmm1
-	movaps	1(%ecx), %xmm2
-L(Shl15Start):
-	pcmpeqb	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl15LoopExit)
-
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm3, %xmm1
-	movaps	%xmm2, (%edx)
-	movaps	17(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl15LoopExit)
-
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	17(%ecx), %xmm2
-	movaps	%xmm3, %xmm1
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl15LoopExit)
-
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	17(%ecx), %xmm2
-
-	pcmpeqb	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15Case2OrCase3)
-# endif
-	test	%eax, %eax
-	jnz	L(Shl15LoopExit)
-
-	palignr	$15, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	17(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-1(%ecx), %ecx
-	sub	%eax, %edx
-# ifdef USE_AS_STRNCPY
-	add	%eax, %ebx
-# endif
-	movaps	-15(%ecx), %xmm1
-
-L(Shl15LoopStart):
-	movaps	1(%ecx), %xmm2
-	movaps	17(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	33(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	49(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqb	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$15, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$15, %xmm3, %xmm4
-	jnz	L(Shl15Start)
-# ifdef USE_AS_STRNCPY
-	sub	$64, %ebx
-	jbe	L(StrncpyLeave15)
-# endif
-	palignr	$15, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl15LoopStart)
-
-L(Shl15LoopExit):
-	movl	-3(%ecx), %esi
-	movl	%esi, -3(%edx)
-	mov	$1, %esi
-# ifdef USE_AS_STRCAT
-	jmp	L(CopyFrom1To16Bytes)
-# endif
-
-
-# ifndef USE_AS_STRCAT
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-#  ifdef USE_AS_STRNCPY
-	add	$16, %ebx
-#  endif
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh8)
-
-L(CopyFrom1To16BytesLess8):
-	mov	%al, %ah
-	and	$15, %ah
-	jz	L(ExitHigh4)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-
-	.p2align 4
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	SAVE_RESULT	(3)
-#  ifdef USE_AS_STRNCPY
-	sub	$4, %ebx
-	lea	4(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(ExitHigh4):
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-
-	.p2align 4
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	SAVE_RESULT	(7)
-#  ifdef USE_AS_STRNCPY
-	sub	$8, %ebx
-	lea	8(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(ExitHigh8):
-	mov	%ah, %al
-	and	$15, %al
-	jz	L(ExitHigh12)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-
-	.p2align 4
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 8(%edx)
-	SAVE_RESULT	(11)
-#  ifdef USE_AS_STRNCPY
-	sub	$12, %ebx
-	lea	12(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(ExitHigh12):
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-
-	.p2align 4
-L(Exit16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	SAVE_RESULT	(15)
-#  ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	lea	16(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-#   ifdef USE_AS_STRNCPY
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%esi, %ecx
-	add	%esi, %edx
-
-	POP	(%esi)
-
-	test	%al, %al
-	jz	L(ExitHighCase2)
-
-	cmp	$8, %ebx
-	ja	L(CopyFrom1To16BytesLess8)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(Exit7)
-	jmp	L(Exit8)
-
-	.p2align 4
-L(ExitHighCase2):
-	cmp	$8, %ebx
-	jbe	L(CopyFrom1To16BytesLess8Case3)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(Exit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %ebx
-	je	L(Exit15)
-	jmp	L(Exit16)
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-
-	cmp	$8, %ebx
-	ja	L(ExitHigh8Case3)
-
-L(CopyFrom1To16BytesLess8Case3):
-	cmp	$4, %ebx
-	ja	L(ExitHigh4Case3)
-
-	cmp	$1, %ebx
-	je	L(Exit1)
-	cmp	$2, %ebx
-	je	L(Exit2)
-	cmp	$3, %ebx
-	je	L(Exit3)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	SAVE_RESULT	(4)
-	RETURN1
-
-	.p2align 4
-L(ExitHigh4Case3):
-	cmp	$5, %ebx
-	je	L(Exit5)
-	cmp	$6, %ebx
-	je	L(Exit6)
-	cmp	$7, %ebx
-	je	L(Exit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	SAVE_RESULT	(8)
-	RETURN1
-
-	.p2align 4
-L(ExitHigh8Case3):
-	cmp	$12, %ebx
-	ja	L(ExitHigh12Case3)
-
-	cmp	$9, %ebx
-	je	L(Exit9)
-	cmp	$10, %ebx
-	je	L(Exit10)
-	cmp	$11, %ebx
-	je	L(Exit11)
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 8(%edx)
-	SAVE_RESULT	(12)
-	RETURN1
-
-	.p2align 4
-L(ExitHigh12Case3):
-	cmp	$13, %ebx
-	je	L(Exit13)
-	cmp	$14, %ebx
-	je	L(Exit14)
-	cmp	$15, %ebx
-	je	L(Exit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-	SAVE_RESULT	(16)
-	RETURN1
-
-#  endif
-
-	.p2align 4
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-	SAVE_RESULT	(0)
-#  ifdef USE_AS_STRNCPY
-	sub	$1, %ebx
-	lea	1(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	SAVE_RESULT	(1)
-#  ifdef USE_AS_STRNCPY
-	sub	$2, %ebx
-	lea	2(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-	SAVE_RESULT	(2)
-#  ifdef USE_AS_STRNCPY
-	sub	$3, %ebx
-	lea	3(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-	SAVE_RESULT	(4)
-#  ifdef USE_AS_STRNCPY
-	sub	$5, %ebx
-	lea	5(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-	SAVE_RESULT	(5)
-#  ifdef USE_AS_STRNCPY
-	sub	$6, %ebx
-	lea	6(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-	SAVE_RESULT	(6)
-#  ifdef USE_AS_STRNCPY
-	sub	$7, %ebx
-	lea	7(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit9):
-	movlpd	(%ecx), %xmm0
-	movb	8(%ecx), %al
-	movlpd	%xmm0, (%edx)
-	movb	%al, 8(%edx)
-	SAVE_RESULT	(8)
-#  ifdef USE_AS_STRNCPY
-	sub	$9, %ebx
-	lea	9(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit10):
-	movlpd	(%ecx), %xmm0
-	movw	8(%ecx), %ax
-	movlpd	%xmm0, (%edx)
-	movw	%ax, 8(%edx)
-	SAVE_RESULT	(9)
-#  ifdef USE_AS_STRNCPY
-	sub	$10, %ebx
-	lea	10(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit11):
-	movlpd	(%ecx), %xmm0
-	movl	7(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 7(%edx)
-	SAVE_RESULT	(10)
-#  ifdef USE_AS_STRNCPY
-	sub	$11, %ebx
-	lea	11(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	5(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 5(%edx)
-	SAVE_RESULT	(12)
-#  ifdef USE_AS_STRNCPY
-	sub	$13, %ebx
-	lea	13(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	6(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 6(%edx)
-	SAVE_RESULT	(13)
-#  ifdef USE_AS_STRNCPY
-	sub	$14, %ebx
-	lea	14(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	7(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 7(%edx)
-	SAVE_RESULT	(14)
-#  ifdef USE_AS_STRNCPY
-	sub	$15, %ebx
-	lea	15(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero1)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN1
-
-CFI_POP	(%edi)
-
-#  ifdef USE_AS_STRNCPY
-	.p2align 4
-L(Fill0):
-	RETURN
-
-	.p2align 4
-L(Fill1):
-	movb	%dl, (%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill2):
-	movw	%dx, (%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill3):
-	movw	%dx, (%ecx)
-	movb	%dl, 2(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill4):
-	movl	%edx, (%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill5):
-	movl	%edx, (%ecx)
-	movb	%dl, 4(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill6):
-	movl	%edx, (%ecx)
-	movw	%dx, 4(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill7):
-	movl	%edx, (%ecx)
-	movl	%edx, 3(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill8):
-	movlpd	%xmm0, (%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill9):
-	movlpd	%xmm0, (%ecx)
-	movb	%dl, 8(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill10):
-	movlpd	%xmm0, (%ecx)
-	movw	%dx, 8(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill11):
-	movlpd	%xmm0, (%ecx)
-	movl	%edx, 7(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill12):
-	movlpd	%xmm0, (%ecx)
-	movl	%edx, 8(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill13):
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 5(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill14):
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 6(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill15):
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 7(%ecx)
-	RETURN
-
-	.p2align 4
-L(Fill16):
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 8(%ecx)
-	RETURN
-
-	.p2align 4
-L(StrncpyFillExit1):
-	lea	16(%ebx), %ebx
-L(FillFrom1To16Bytes):
-	test	%ebx, %ebx
-	jz	L(Fill0)
-	cmp	$16, %ebx
-	je	L(Fill16)
-	cmp	$8, %ebx
-	je	L(Fill8)
-	jg	L(FillMore8)
-	cmp	$4, %ebx
-	je	L(Fill4)
-	jg	L(FillMore4)
-	cmp	$2, %ebx
-	jl	L(Fill1)
-	je	L(Fill2)
-	jg	L(Fill3)
-L(FillMore8):	/* but less than 16 */
-	cmp	$12, %ebx
-	je	L(Fill12)
-	jl	L(FillLess12)
-	cmp	$14, %ebx
-	jl	L(Fill13)
-	je	L(Fill14)
-	jg	L(Fill15)
-L(FillMore4):	/* but less than 8 */
-	cmp	$6, %ebx
-	jl	L(Fill5)
-	je	L(Fill6)
-	jg	L(Fill7)
-L(FillLess12):	/* but more than 8 */
-	cmp	$10, %ebx
-	jl	L(Fill9)
-	je	L(Fill10)
-	jmp	L(Fill11)
-
-	CFI_PUSH(%edi)
-
-	.p2align 4
-L(StrncpyFillTailWithZero1):
-	POP	(%edi)
-L(StrncpyFillTailWithZero):
-	pxor	%xmm0, %xmm0
-	xor	%edx, %edx
-	sub	$16, %ebx
-	jbe	L(StrncpyFillExit1)
-
-	movlpd	%xmm0, (%ecx)
-	movlpd	%xmm0, 8(%ecx)
-
-	lea	16(%ecx), %ecx
-
-	mov	%ecx, %edx
-	and	$0xf, %edx
-	sub	%edx, %ecx
-	add	%edx, %ebx
-	xor	%edx, %edx
-	sub	$64, %ebx
-	jb	L(StrncpyFillLess64)
-
-L(StrncpyFillLoopMovdqa):
-	movdqa	%xmm0, (%ecx)
-	movdqa	%xmm0, 16(%ecx)
-	movdqa	%xmm0, 32(%ecx)
-	movdqa	%xmm0, 48(%ecx)
-	lea	64(%ecx), %ecx
-	sub	$64, %ebx
-	jae	L(StrncpyFillLoopMovdqa)
-
-L(StrncpyFillLess64):
-	add	$32, %ebx
-	jl	L(StrncpyFillLess32)
-	movdqa	%xmm0, (%ecx)
-	movdqa	%xmm0, 16(%ecx)
-	lea	32(%ecx), %ecx
-	sub	$16, %ebx
-	jl	L(StrncpyFillExit1)
-	movdqa	%xmm0, (%ecx)
-	lea	16(%ecx), %ecx
-	jmp	L(FillFrom1To16Bytes)
-
-L(StrncpyFillLess32):
-	add	$16, %ebx
-	jl	L(StrncpyFillExit1)
-	movdqa	%xmm0, (%ecx)
-	lea	16(%ecx), %ecx
-	jmp	L(FillFrom1To16Bytes)
-#  endif
-
-	.p2align 4
-L(ExitTail1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-	SAVE_RESULT_TAIL (0)
-#  ifdef USE_AS_STRNCPY
-	sub	$1, %ebx
-	lea	1(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	SAVE_RESULT_TAIL (1)
-#  ifdef USE_AS_STRNCPY
-	sub	$2, %ebx
-	lea	2(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-	SAVE_RESULT_TAIL (2)
-#  ifdef USE_AS_STRNCPY
-	sub	$3, %ebx
-	lea	3(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	SAVE_RESULT_TAIL (3)
-#  ifdef USE_AS_STRNCPY
-	sub	$4, %ebx
-	lea	4(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-	SAVE_RESULT_TAIL (4)
-#  ifdef USE_AS_STRNCPY
-	sub	$5, %ebx
-	lea	5(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-	SAVE_RESULT_TAIL (5)
-#  ifdef USE_AS_STRNCPY
-	sub	$6, %ebx
-	lea	6(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-	SAVE_RESULT_TAIL (6)
-#  ifdef USE_AS_STRNCPY
-	sub	$7, %ebx
-	lea	7(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	SAVE_RESULT_TAIL (7)
-#  ifdef USE_AS_STRNCPY
-	sub	$8, %ebx
-	lea	8(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail9):
-	movlpd	(%ecx), %xmm0
-	movb	8(%ecx), %al
-	movlpd	%xmm0, (%edx)
-	movb	%al, 8(%edx)
-	SAVE_RESULT_TAIL (8)
-#  ifdef USE_AS_STRNCPY
-	sub	$9, %ebx
-	lea	9(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail10):
-	movlpd	(%ecx), %xmm0
-	movw	8(%ecx), %ax
-	movlpd	%xmm0, (%edx)
-	movw	%ax, 8(%edx)
-	SAVE_RESULT_TAIL (9)
-#  ifdef USE_AS_STRNCPY
-	sub	$10, %ebx
-	lea	10(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail11):
-	movlpd	(%ecx), %xmm0
-	movl	7(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 7(%edx)
-	SAVE_RESULT_TAIL (10)
-#  ifdef USE_AS_STRNCPY
-	sub	$11, %ebx
-	lea	11(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail12):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 8(%edx)
-	SAVE_RESULT_TAIL (11)
-#  ifdef USE_AS_STRNCPY
-	sub	$12, %ebx
-	lea	12(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail13):
-	movlpd	(%ecx), %xmm0
-	movlpd	5(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 5(%edx)
-	SAVE_RESULT_TAIL (12)
-#  ifdef USE_AS_STRNCPY
-	sub	$13, %ebx
-	lea	13(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail14):
-	movlpd	(%ecx), %xmm0
-	movlpd	6(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 6(%edx)
-	SAVE_RESULT_TAIL (13)
-#  ifdef USE_AS_STRNCPY
-	sub	$14, %ebx
-	lea	14(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#  ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail15):
-	movlpd	(%ecx), %xmm0
-	movlpd	7(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 7(%edx)
-	SAVE_RESULT_TAIL (14)
-#  ifdef USE_AS_STRNCPY
-	sub	$15, %ebx
-	lea	15(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#  endif
-	RETURN
-
-	.p2align 4
-L(ExitTail16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	SAVE_RESULT_TAIL (15)
-#  ifdef USE_AS_STRNCPY
-	sub	$16, %ebx
-	lea	16(%edx), %ecx
-	jnz	L(StrncpyFillTailWithZero)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-#  endif
-	RETURN
-# endif
-
-# ifdef USE_AS_STRNCPY
-#  ifndef USE_AS_STRCAT
-	CFI_PUSH (%esi)
-	CFI_PUSH (%edi)
-#  endif
-	.p2align 4
-L(StrncpyLeaveCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(Aligned64LeaveCase2)
-
-L(Aligned64LeaveCase3):
-	add	$48, %ebx
-	jle	L(CopyFrom1To16BytesCase3)
-	movaps	%xmm4, -64(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase3)
-	movaps	%xmm5, -48(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase3)
-	movaps	%xmm6, -32(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(Aligned64LeaveCase2):
-	pcmpeqb	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	add	$48, %ebx
-	jle	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm5, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm4, -64(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm6, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm5, -48(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(CopyFrom1To16BytesCase2OrCase3)
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqb	%xmm7, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm6, -32(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-	jmp	L(CopyFrom1To16BytesCase2)
-
-/*--------------------------------------------------*/
-	.p2align 4
-L(StrncpyExit1Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	7(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 7(%edx)
-	mov	$15, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit2Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	6(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 6(%edx)
-	mov	$14, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit3Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	5(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 5(%edx)
-	mov	$13, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit4Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 8(%edx)
-	mov	$12, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit5Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movl	7(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 7(%edx)
-	mov	$11, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit6Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movl	6(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 6(%edx)
-	mov	$10, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit7Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movl	5(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 5(%edx)
-	mov	$9, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit8Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	mov	$8, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit9Case2OrCase3):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	mov	$7, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit10Case2OrCase3):
-	movlpd	-1(%ecx), %xmm0
-	movlpd	%xmm0, -1(%edx)
-	mov	$6, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit11Case2OrCase3):
-	movlpd	-2(%ecx), %xmm0
-	movlpd	%xmm0, -2(%edx)
-	mov	$5, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit12Case2OrCase3):
-	movl	(%ecx), %esi
-	movl	%esi, (%edx)
-	mov	$4, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit13Case2OrCase3):
-	movl	-1(%ecx), %esi
-	movl	%esi, -1(%edx)
-	mov	$3, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit14Case2OrCase3):
-	movl	-2(%ecx), %esi
-	movl	%esi, -2(%edx)
-	mov	$2, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-	.p2align 4
-L(StrncpyExit15Case2OrCase3):
-	movl	-3(%ecx), %esi
-	movl	%esi, -3(%edx)
-	mov	$1, %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave1):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit1)
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	31(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1)
-	palignr	$1, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit1)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit1):
-	lea	15(%edx, %esi), %edx
-	lea	15(%ecx, %esi), %ecx
-	movdqu	-16(%ecx), %xmm0
-	xor	%esi, %esi
-	movdqu	%xmm0, -16(%edx)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave2):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit2)
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	30(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2)
-	palignr	$2, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit2)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit2):
-	lea	14(%edx, %esi), %edx
-	lea	14(%ecx, %esi), %ecx
-	movdqu	-16(%ecx), %xmm0
-	xor	%esi, %esi
-	movdqu	%xmm0, -16(%edx)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave3):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit3)
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	29(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3)
-	palignr	$3, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit3)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit3):
-	lea	13(%edx, %esi), %edx
-	lea	13(%ecx, %esi), %ecx
-	movdqu	-16(%ecx), %xmm0
-	xor	%esi, %esi
-	movdqu	%xmm0, -16(%edx)
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave4):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit4)
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4)
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit4)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit4):
-	lea	12(%edx, %esi), %edx
-	lea	12(%ecx, %esi), %ecx
-	movlpd	-12(%ecx), %xmm0
-	movl	-4(%ecx), %eax
-	movlpd	%xmm0, -12(%edx)
-	movl	%eax, -4(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave5):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit5)
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	27(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5)
-	palignr	$5, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit5)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit5):
-	lea	11(%edx, %esi), %edx
-	lea	11(%ecx, %esi), %ecx
-	movlpd	-11(%ecx), %xmm0
-	movl	-4(%ecx), %eax
-	movlpd	%xmm0, -11(%edx)
-	movl	%eax, -4(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave6):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit6)
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	26(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6)
-	palignr	$6, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit6)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit6):
-	lea	10(%edx, %esi), %edx
-	lea	10(%ecx, %esi), %ecx
-
-	movlpd	-10(%ecx), %xmm0
-	movw	-2(%ecx), %ax
-	movlpd	%xmm0, -10(%edx)
-	movw	%ax, -2(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave7):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit7)
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	25(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7)
-	palignr	$7, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit7)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit7):
-	lea	9(%edx, %esi), %edx
-	lea	9(%ecx, %esi), %ecx
-
-	movlpd	-9(%ecx), %xmm0
-	movb	-1(%ecx), %ah
-	movlpd	%xmm0, -9(%edx)
-	movb	%ah, -1(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave8):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit8)
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8)
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit8)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit8):
-	lea	8(%edx, %esi), %edx
-	lea	8(%ecx, %esi), %ecx
-	movlpd	-8(%ecx), %xmm0
-	movlpd	%xmm0, -8(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave9):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit9)
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	23(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9)
-	palignr	$9, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit9)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit9):
-	lea	7(%edx, %esi), %edx
-	lea	7(%ecx, %esi), %ecx
-
-	movlpd	-8(%ecx), %xmm0
-	movlpd	%xmm0, -8(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave10):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit10)
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	22(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10)
-	palignr	$10, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit10)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit10):
-	lea	6(%edx, %esi), %edx
-	lea	6(%ecx, %esi), %ecx
-
-	movlpd	-8(%ecx), %xmm0
-	movlpd	%xmm0, -8(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave11):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit11)
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	21(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11)
-	palignr	$11, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit11)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit11):
-	lea	5(%edx, %esi), %edx
-	lea	5(%ecx, %esi), %ecx
-	movl	-5(%ecx), %esi
-	movb	-1(%ecx), %ah
-	movl	%esi, -5(%edx)
-	movb	%ah, -1(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave12):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit12)
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12)
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit12)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit12):
-	lea	4(%edx, %esi), %edx
-	lea	4(%ecx, %esi), %ecx
-	movl	-4(%ecx), %eax
-	movl	%eax, -4(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave13):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit13)
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	19(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13)
-	palignr	$13, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit13)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit13):
-	lea	3(%edx, %esi), %edx
-	lea	3(%ecx, %esi), %ecx
-
-	movl	-4(%ecx), %eax
-	movl	%eax, -4(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave14):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit14)
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	18(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14)
-	palignr	$14, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit14)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit14):
-	lea	2(%edx, %esi), %edx
-	lea	2(%ecx, %esi), %ecx
-	movw	-2(%ecx), %ax
-	movw	%ax, -2(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave15):
-	movaps	%xmm2, %xmm3
-	add	$48, %ebx
-	jle	L(StrncpyExit15)
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	17(%ecx), %xmm2
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15)
-	palignr	$15, %xmm3, %xmm2
-	movaps	%xmm2, 16(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15)
-	movaps	%xmm4, 32(%edx)
-	lea	16(%esi), %esi
-	sub	$16, %ebx
-	jbe	L(StrncpyExit15)
-	movaps	%xmm5, 48(%edx)
-	lea	16(%esi), %esi
-	lea	-16(%ebx), %ebx
-L(StrncpyExit15):
-	lea	1(%edx, %esi), %edx
-	lea	1(%ecx, %esi), %ecx
-	movb	-1(%ecx), %ah
-	movb	%ah, -1(%edx)
-	xor	%esi, %esi
-	jmp	L(CopyFrom1To16BytesCase3)
-# endif
-
-# ifndef USE_AS_STRCAT
-#  ifdef USE_AS_STRNCPY
-	CFI_POP (%esi)
-	CFI_POP (%edi)
-
-	.p2align 4
-L(ExitTail0):
-	movl	%edx, %eax
-	RETURN
-
-	.p2align 4
-L(StrncpyExit15Bytes):
-	cmp	$12, %ebx
-	jbe	L(StrncpyExit12Bytes)
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-	cmp	$13, %ebx
-	je	L(ExitTail13)
-	cmpb	$0, 12(%ecx)
-	jz	L(ExitTail13)
-	cmp	$14, %ebx
-	je	L(ExitTail14)
-	cmpb	$0, 13(%ecx)
-	jz	L(ExitTail14)
-	movlpd	(%ecx), %xmm0
-	movlpd	7(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 7(%edx)
-#   ifdef USE_AS_STPCPY
-	lea	14(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   else
-	movl	%edx, %eax
-#   endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit12Bytes):
-	cmp	$9, %ebx
-	je	L(ExitTail9)
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmp	$10, %ebx
-	je	L(ExitTail10)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmp	$11, %ebx
-	je	L(ExitTail11)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %eax
-	movlpd	%xmm0, (%edx)
-	movl	%eax, 8(%edx)
-	SAVE_RESULT_TAIL (11)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit8Bytes):
-	cmp	$4, %ebx
-	jbe	L(StrncpyExit4Bytes)
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-
-	cmp	$5, %ebx
-	je	L(ExitTail5)
-	cmpb	$0, 4(%ecx)
-	jz	L(ExitTail5)
-	cmp	$6, %ebx
-	je	L(ExitTail6)
-	cmpb	$0, 5(%ecx)
-	jz	L(ExitTail6)
-	cmp	$7, %ebx
-	je	L(ExitTail7)
-	cmpb	$0, 6(%ecx)
-	jz	L(ExitTail7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-#   ifdef USE_AS_STPCPY
-	lea	7(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   else
-	movl	%edx, %eax
-#   endif
-	RETURN
-
-	.p2align 4
-L(StrncpyExit4Bytes):
-	test	%ebx, %ebx
-	jz	L(ExitTail0)
-	cmp	$1, %ebx
-	je	L(ExitTail1)
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmp	$2, %ebx
-	je	L(ExitTail2)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmp	$3, %ebx
-	je	L(ExitTail3)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	SAVE_RESULT_TAIL (3)
-#   ifdef USE_AS_STPCPY
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-#   endif
-	RETURN
-#  endif
-
-END (STRCPY)
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcpy.S b/sysdeps/i386/i686/multiarch/strcpy.S
deleted file mode 100644
index ffbc03c6d5..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy.S
+++ /dev/null
@@ -1,116 +0,0 @@
-/* Multiple versions of strcpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if !defined (USE_AS_STPCPY) && !defined (USE_AS_STRNCPY)
-# ifndef STRCPY
-#  define STRCPY strcpy
-# endif
-#endif
-
-#ifdef USE_AS_STPCPY
-# ifdef USE_AS_STRNCPY
-#  define STRCPY_SSSE3	__stpncpy_ssse3
-#  define STRCPY_SSE2		__stpncpy_sse2
-#  define STRCPY_IA32		__stpncpy_ia32
-#  define __GI_STRCPY		__GI_stpncpy
-#  define __GI___STRCPY		__GI___stpncpy
-# else
-#  define STRCPY_SSSE3	__stpcpy_ssse3
-#  define STRCPY_SSE2		__stpcpy_sse2
-#  define STRCPY_IA32		__stpcpy_ia32
-#  define __GI_STRCPY		__GI_stpcpy
-#  define __GI___STRCPY		__GI___stpcpy
-# endif
-#else
-# ifdef USE_AS_STRNCPY
-#  define STRCPY_SSSE3	__strncpy_ssse3
-#  define STRCPY_SSE2		__strncpy_sse2
-#  define STRCPY_IA32		__strncpy_ia32
-#  define __GI_STRCPY		__GI_strncpy
-# else
-#  define STRCPY_SSSE3	__strcpy_ssse3
-#  define STRCPY_SSE2		__strcpy_sse2
-#  define STRCPY_IA32		__strcpy_ia32
-#  define __GI_STRCPY		__GI_strcpy
-# endif
-#endif
-
-
-/* Define multiple versions only for the definition in libc.  Don't
-   define multiple versions for strncpy in static library since we
-   need strncpy before the initialization happened.  */
-#if IS_IN (libc)
-
-	.text
-ENTRY(STRCPY)
-	.type	STRCPY, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (STRCPY_IA32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (STRCPY_SSE2)
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (STRCPY_SSSE3)
-2:	ret
-END(STRCPY)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCPY_IA32, @function; \
-	.align 16; \
-	.globl STRCPY_IA32; \
-	.hidden STRCPY_IA32; \
-	STRCPY_IA32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCPY_IA32, .-STRCPY_IA32
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcpy calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCPY; __GI_STRCPY = STRCPY_IA32
-#  undef libc_hidden_def
-#  define libc_hidden_def(name) \
-	.globl __GI___STRCPY; __GI___STRCPY = STRCPY_IA32
-
-# endif
-#endif
-
-#ifdef USE_AS_STPCPY
-# ifdef USE_AS_STRNCPY
-#  include "../../stpncpy.S"
-# else
-#  include "../../i586/stpcpy.S"
-# endif
-#else
-# ifndef USE_AS_STRNCPY
-#  include "../../i586/strcpy.S"
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcspn-c.c b/sysdeps/i386/i686/multiarch/strcspn-c.c
deleted file mode 100644
index 6d61e190a8..0000000000
--- a/sysdeps/i386/i686/multiarch/strcspn-c.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define __strcspn_sse2 __strcspn_ia32
-#include <sysdeps/x86_64/multiarch/strcspn-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strcspn.S b/sysdeps/i386/i686/multiarch/strcspn.S
deleted file mode 100644
index 21e5093924..0000000000
--- a/sysdeps/i386/i686/multiarch/strcspn.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Multiple versions of strcspn
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifdef USE_AS_STRPBRK
-#define STRCSPN_SSE42	__strpbrk_sse42
-#define STRCSPN_IA32	__strpbrk_ia32
-#define __GI_STRCSPN	__GI_strpbrk
-#else
-#ifndef STRCSPN
-#define STRCSPN		strcspn
-#define STRCSPN_SSE42	__strcspn_sse42
-#define STRCSPN_IA32	__strcspn_ia32
-#define __GI_STRCSPN	__GI_strcspn
-#endif
-#endif
-
-/* Define multiple versions only for the definition in libc.  Don't
-   define multiple versions for strpbrk in static library since we
-   need strpbrk before the initialization happened.  */
-#if (defined SHARED || !defined USE_AS_STRPBRK) && IS_IN (libc)
-	.text
-ENTRY(STRCSPN)
-	.type	STRCSPN, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (STRCSPN_IA32)
-	HAS_CPU_FEATURE (SSE4_2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (STRCSPN_SSE42)
-2:	ret
-END(STRCSPN)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCSPN_IA32, @function; \
-	.globl STRCSPN_IA32; \
-	.p2align 4; \
-	STRCSPN_IA32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCSPN_IA32, .-STRCSPN_IA32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCSPN; __GI_STRCSPN = STRCSPN_IA32
-#endif
-
-#ifdef USE_AS_STRPBRK
-#include "../../strpbrk.S"
-#else
-#include "../../strcspn.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
deleted file mode 100644
index d3ea864bab..0000000000
--- a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
+++ /dev/null
@@ -1,125 +0,0 @@
-/* strlen with SSE2 and BSF
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if defined SHARED && IS_IN (libc)
-
-#include <sysdep.h>
-
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-#define PARMS		4 + 8	/* Preserve ESI and EDI.  */
-#define	STR		PARMS
-#define ENTRANCE	PUSH (%esi); PUSH (%edi); cfi_remember_state
-#define RETURN		POP (%edi); POP (%esi); ret; \
-			cfi_restore_state; cfi_remember_state
-
-	.text
-ENTRY ( __strlen_sse2_bsf)
-	ENTRANCE
-	mov	STR(%esp), %edi
-	xor	%eax, %eax
-	mov	%edi, %ecx
-	and	$0x3f, %ecx
-	pxor	%xmm0, %xmm0
-	cmp	$0x30, %ecx
-	ja	L(next)
-	movdqu	(%edi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit_less16)
-	mov	%edi, %eax
-	and	$-16, %eax
-	jmp	L(align16_start)
-L(next):
-
-	mov	%edi, %eax
-	and	$-16, %eax
-	pcmpeqb	(%eax), %xmm0
-	mov	$-1, %esi
-	sub	%eax, %ecx
-	shl	%cl, %esi
-	pmovmskb %xmm0, %edx
-	and	%esi, %edx
-	jnz	L(exit)
-L(align16_start):
-	pxor	%xmm0, %xmm0
-	pxor	%xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	pxor	%xmm3, %xmm3
-	.p2align 4
-L(align16_loop):
-	pcmpeqb	16(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	jnz	L(exit16)
-
-	pcmpeqb	32(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	jnz	L(exit32)
-
-	pcmpeqb	48(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	jnz	L(exit48)
-
-	pcmpeqb	64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(align16_loop)
-L(exit):
-	sub	%edi, %eax
-L(exit_less16):
-	bsf	%edx, %edx
-	add	%edx, %eax
-	RETURN
-L(exit16):
-	sub	%edi, %eax
-	bsf	%edx, %edx
-	add	%edx, %eax
-	add	$16, %eax
-	RETURN
-L(exit32):
-	sub	%edi, %eax
-	bsf	%edx, %edx
-	add	%edx, %eax
-	add	$32, %eax
-	RETURN
-L(exit48):
-	sub	%edi, %eax
-	bsf	%edx, %edx
-	add	%edx, %eax
-	add	$48, %eax
-	POP (%edi)
-	POP (%esi)
-	ret
-
-END ( __strlen_sse2_bsf)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S
deleted file mode 100644
index 36fc1469d0..0000000000
--- a/sysdeps/i386/i686/multiarch/strlen-sse2.S
+++ /dev/null
@@ -1,695 +0,0 @@
-/* strlen with SSE2
-   Copyright (C) 2010-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* for strlen only SHARED version is optimized, for strcat, strncat, strnlen both STATIC and SHARED are optimized */
-
-#if (defined USE_AS_STRNLEN || defined USE_AS_STRCAT || defined SHARED) && IS_IN (libc)
-
-# ifndef USE_AS_STRCAT
-
-#  include <sysdep.h>
-#  define PARMS	4
-#  define STR	PARMS
-#  define RETURN	ret
-
-#  ifdef USE_AS_STRNLEN
-#   define LEN	PARMS + 8
-#   define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#   define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#   define PUSH(REG)	pushl	REG;	CFI_PUSH (REG)
-#   define POP(REG)	popl	REG;	CFI_POP (REG)
-#   undef RETURN
-#   define RETURN	POP (%edi); CFI_PUSH(%edi); ret
-#  endif
-
-#  ifndef STRLEN
-#   define STRLEN	__strlen_sse2
-#  endif
-
-	atom_text_section
-ENTRY (STRLEN)
-	mov	STR(%esp), %edx
-#  ifdef USE_AS_STRNLEN
-	PUSH	(%edi)
-	movl	LEN(%esp), %edi
-	sub	$4, %edi
-	jbe	L(len_less4_prolog)
-#  endif
-# endif
-	xor	%eax, %eax
-	cmpb	$0, (%edx)
-	jz	L(exit_tail0)
-	cmpb	$0, 1(%edx)
-	jz	L(exit_tail1)
-	cmpb	$0, 2(%edx)
-	jz	L(exit_tail2)
-	cmpb	$0, 3(%edx)
-	jz	L(exit_tail3)
-
-# ifdef USE_AS_STRNLEN
-	sub	$4, %edi
-	jbe	L(len_less8_prolog)
-# endif
-
-	cmpb	$0, 4(%edx)
-	jz	L(exit_tail4)
-	cmpb	$0, 5(%edx)
-	jz	L(exit_tail5)
-	cmpb	$0, 6(%edx)
-	jz	L(exit_tail6)
-	cmpb	$0, 7(%edx)
-	jz	L(exit_tail7)
-
-# ifdef USE_AS_STRNLEN
-	sub	$4, %edi
-	jbe	L(len_less12_prolog)
-# endif
-
-	cmpb	$0, 8(%edx)
-	jz	L(exit_tail8)
-	cmpb	$0, 9(%edx)
-	jz	L(exit_tail9)
-	cmpb	$0, 10(%edx)
-	jz	L(exit_tail10)
-	cmpb	$0, 11(%edx)
-	jz	L(exit_tail11)
-
-# ifdef USE_AS_STRNLEN
-	sub	$4, %edi
-	jbe	L(len_less16_prolog)
-# endif
-
-	cmpb	$0, 12(%edx)
-	jz	L(exit_tail12)
-	cmpb	$0, 13(%edx)
-	jz	L(exit_tail13)
-	cmpb	$0, 14(%edx)
-	jz	L(exit_tail14)
-	cmpb	$0, 15(%edx)
-	jz	L(exit_tail15)
-
-	pxor	%xmm0, %xmm0
-	lea	16(%edx), %eax
-	mov	%eax, %ecx
-	and	$-16, %eax
-
-# ifdef USE_AS_STRNLEN
-	and	$15, %edx
-	add	%edx, %edi
-	sub	$64, %edi
-	jbe	L(len_less64)
-# endif
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-# ifdef USE_AS_STRNLEN
-	sub	$64, %edi
-	jbe	L(len_less64)
-# endif
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-# ifdef USE_AS_STRNLEN
-	sub	$64, %edi
-	jbe	L(len_less64)
-# endif
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-# ifdef USE_AS_STRNLEN
-	sub	$64, %edi
-	jbe	L(len_less64)
-# endif
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-# ifdef USE_AS_STRNLEN
-	mov	%eax, %edx
-	and	$63, %edx
-	add	%edx, %edi
-# endif
-
-	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-# ifdef USE_AS_STRNLEN
-	sub	$64, %edi
-	jbe	L(len_less64)
-# endif
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqb	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	lea	64(%eax), %eax
-	jz	L(aligned_64_loop)
-
-	pcmpeqb	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	48(%ecx), %ecx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	-16(%ecx), %ecx
-	jnz	L(exit)
-
-	pcmpeqb	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	-16(%ecx), %ecx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-L(exit):
-	sub	%ecx, %eax
-	test	%dl, %dl
-	jz	L(exit_high)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_8)
-	test	$0x01, %dl
-	jnz	L(exit_tail0)
-	test	$0x02, %dl
-	jnz	L(exit_tail1)
-	test	$0x04, %dl
-	jnz	L(exit_tail2)
-	add	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_8):
-	test	$0x10, %dl
-	jnz	L(exit_tail4)
-	test	$0x20, %dl
-	jnz	L(exit_tail5)
-	test	$0x40, %dl
-	jnz	L(exit_tail6)
-	add	$7, %eax
-	RETURN
-
-	.p2align 4
-L(exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_high_8)
-	test	$0x01, %dh
-	jnz	L(exit_tail8)
-	test	$0x02, %dh
-	jnz	L(exit_tail9)
-	test	$0x04, %dh
-	jnz	L(exit_tail10)
-	add	$11, %eax
-	RETURN
-
-	.p2align 4
-L(exit_high_8):
-	test	$0x10, %dh
-	jnz	L(exit_tail12)
-	test	$0x20, %dh
-	jnz	L(exit_tail13)
-	test	$0x40, %dh
-	jnz	L(exit_tail14)
-	add	$15, %eax
-L(exit_tail0):
-	RETURN
-
-# ifdef USE_AS_STRNLEN
-
-	.p2align 4
-L(len_less64):
-	pxor	%xmm0, %xmm0
-	add	$64, %edi
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	sub	$16, %edi
-	jbe	L(return_start_len)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	sub	$16, %edi
-	jbe	L(return_start_len)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	sub	$16, %edi
-	jbe	L(return_start_len)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	movl	LEN(%esp), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit):
-	sub	%ecx, %eax
-
-	test	%dl, %dl
-	jz	L(strnlen_exit_high)
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(strnlen_exit_8)
-	test	$0x01, %dl
-	jnz	L(exit_tail0)
-	test	$0x02, %dl
-	jnz	L(strnlen_exit_tail1)
-	test	$0x04, %dl
-	jnz	L(strnlen_exit_tail2)
-	sub	$4, %edi
-	jb	L(return_start_len)
-	lea	3(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_8):
-	test	$0x10, %dl
-	jnz	L(strnlen_exit_tail4)
-	test	$0x20, %dl
-	jnz	L(strnlen_exit_tail5)
-	test	$0x40, %dl
-	jnz	L(strnlen_exit_tail6)
-	sub	$8, %edi
-	jb	L(return_start_len)
-	lea	7(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(strnlen_exit_high_8)
-	test	$0x01, %dh
-	jnz	L(strnlen_exit_tail8)
-	test	$0x02, %dh
-	jnz	L(strnlen_exit_tail9)
-	test	$0x04, %dh
-	jnz	L(strnlen_exit_tail10)
-	sub	$12, %edi
-	jb	L(return_start_len)
-	lea	11(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_high_8):
-	test	$0x10, %dh
-	jnz	L(strnlen_exit_tail12)
-	test	$0x20, %dh
-	jnz	L(strnlen_exit_tail13)
-	test	$0x40, %dh
-	jnz	L(strnlen_exit_tail14)
-	sub	$16, %edi
-	jb	L(return_start_len)
-	lea	15(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail1):
-	sub	$2, %edi
-	jb	L(return_start_len)
-	lea	1(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail2):
-	sub	$3, %edi
-	jb	L(return_start_len)
-	lea	2(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail4):
-	sub	$5, %edi
-	jb	L(return_start_len)
-	lea	4(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail5):
-	sub	$6, %edi
-	jb	L(return_start_len)
-	lea	5(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail6):
-	sub	$7, %edi
-	jb	L(return_start_len)
-	lea	6(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail8):
-	sub	$9, %edi
-	jb	L(return_start_len)
-	lea	8(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail9):
-	sub	$10, %edi
-	jb	L(return_start_len)
-	lea	9(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail10):
-	sub	$11, %edi
-	jb	L(return_start_len)
-	lea	10(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail12):
-	sub	$13, %edi
-	jb	L(return_start_len)
-	lea	12(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail13):
-	sub	$14, %edi
-	jb	L(return_start_len)
-	lea	13(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail14):
-	sub	$15, %edi
-	jb	L(return_start_len)
-	lea	14(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(return_start_len):
-	movl	LEN(%esp), %eax
-	RETURN
-
-/* for prolog only */
-
-	.p2align 4
-L(len_less4_prolog):
-	xor	%eax, %eax
-
-	add	$4, %edi
-	jz	L(exit_tail0)
-
-	cmpb	$0, (%edx)
-	jz	L(exit_tail0)
-	cmp	$1, %edi
-	je	L(exit_tail1)
-
-	cmpb	$0, 1(%edx)
-	jz	L(exit_tail1)
-	cmp	$2, %edi
-	je	L(exit_tail2)
-
-	cmpb	$0, 2(%edx)
-	jz	L(exit_tail2)
-	cmp	$3, %edi
-	je	L(exit_tail3)
-
-	cmpb	$0, 3(%edx)
-	jz	L(exit_tail3)
-	mov	$4, %eax
-	RETURN
-
-	.p2align 4
-L(len_less8_prolog):
-	add	$4, %edi
-
-	cmpb	$0, 4(%edx)
-	jz	L(exit_tail4)
-	cmp	$1, %edi
-	je	L(exit_tail5)
-
-	cmpb	$0, 5(%edx)
-	jz	L(exit_tail5)
-	cmp	$2, %edi
-	je	L(exit_tail6)
-
-	cmpb	$0, 6(%edx)
-	jz	L(exit_tail6)
-	cmp	$3, %edi
-	je	L(exit_tail7)
-
-	cmpb	$0, 7(%edx)
-	jz	L(exit_tail7)
-	mov	$8, %eax
-	RETURN
-
-
-	.p2align 4
-L(len_less12_prolog):
-	add	$4, %edi
-
-	cmpb	$0, 8(%edx)
-	jz	L(exit_tail8)
-	cmp	$1, %edi
-	je	L(exit_tail9)
-
-	cmpb	$0, 9(%edx)
-	jz	L(exit_tail9)
-	cmp	$2, %edi
-	je	L(exit_tail10)
-
-	cmpb	$0, 10(%edx)
-	jz	L(exit_tail10)
-	cmp	$3, %edi
-	je	L(exit_tail11)
-
-	cmpb	$0, 11(%edx)
-	jz	L(exit_tail11)
-	mov	$12, %eax
-	RETURN
-
-	.p2align 4
-L(len_less16_prolog):
-	add	$4, %edi
-
-	cmpb	$0, 12(%edx)
-	jz	L(exit_tail12)
-	cmp	$1, %edi
-	je	L(exit_tail13)
-
-	cmpb	$0, 13(%edx)
-	jz	L(exit_tail13)
-	cmp	$2, %edi
-	je	L(exit_tail14)
-
-	cmpb	$0, 14(%edx)
-	jz	L(exit_tail14)
-	cmp	$3, %edi
-	je	L(exit_tail15)
-
-	cmpb	$0, 15(%edx)
-	jz	L(exit_tail15)
-	mov	$16, %eax
-	RETURN
-# endif
-
-	.p2align 4
-L(exit_tail1):
-	add	$1, %eax
-	RETURN
-
-L(exit_tail2):
-	add	$2, %eax
-	RETURN
-
-L(exit_tail3):
-	add	$3, %eax
-	RETURN
-
-L(exit_tail4):
-	add	$4, %eax
-	RETURN
-
-L(exit_tail5):
-	add	$5, %eax
-	RETURN
-
-L(exit_tail6):
-	add	$6, %eax
-	RETURN
-
-L(exit_tail7):
-	add	$7, %eax
-	RETURN
-
-L(exit_tail8):
-	add	$8, %eax
-	RETURN
-
-L(exit_tail9):
-	add	$9, %eax
-	RETURN
-
-L(exit_tail10):
-	add	$10, %eax
-	RETURN
-
-L(exit_tail11):
-	add	$11, %eax
-	RETURN
-
-L(exit_tail12):
-	add	$12, %eax
-	RETURN
-
-L(exit_tail13):
-	add	$13, %eax
-	RETURN
-
-L(exit_tail14):
-	add	$14, %eax
-	RETURN
-
-L(exit_tail15):
-	add	$15, %eax
-# ifndef USE_AS_STRCAT
-	RETURN
-END (STRLEN)
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strlen.S b/sysdeps/i386/i686/multiarch/strlen.S
deleted file mode 100644
index 77cf6bcdb0..0000000000
--- a/sysdeps/i386/i686/multiarch/strlen.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Multiple versions of strlen
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc and for the
-   DSO.  In static binaries, we need strlen before the initialization
-   happened.  */
-#if defined SHARED && IS_IN (libc)
-	.text
-ENTRY(strlen)
-	.type	strlen, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__strlen_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strlen_sse2_bsf)
-	HAS_ARCH_FEATURE (Slow_BSF)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strlen_sse2)
-2:	ret
-END(strlen)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strlen_ia32, @function; \
-	.globl __strlen_ia32; \
-	.p2align 4; \
-	__strlen_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strlen_ia32, .-__strlen_ia32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strlen; __GI_strlen = __strlen_ia32
-#endif
-
-#include "../../i586/strlen.S"
diff --git a/sysdeps/i386/i686/multiarch/strncase-c.c b/sysdeps/i386/i686/multiarch/strncase-c.c
deleted file mode 100644
index 76581eb62b..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <string.h>
-
-extern __typeof (strncasecmp) __strncasecmp_nonascii;
-
-#define __strncasecmp __strncasecmp_nonascii
-#include <string/strncase.c>
-
-strong_alias (__strncasecmp_nonascii, __strncasecmp_ia32)
diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S
deleted file mode 100644
index a56e63a566..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Entry point for multi-version x86 strncasecmp.
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY(__strncasecmp)
-	.type	__strncasecmp, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__strncasecmp_ia32)
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strncasecmp_ssse3)
-	HAS_CPU_FEATURE (SSE4_2)
-	jz	2f
-	HAS_ARCH_FEATURE (Slow_SSE4_2)
-	jnz	2f
-	LOAD_FUNC_GOT_EAX (__strncasecmp_sse4_2)
-2:	ret
-END(__strncasecmp)
-
-weak_alias (__strncasecmp, strncasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strncase_l-c.c b/sysdeps/i386/i686/multiarch/strncase_l-c.c
deleted file mode 100644
index 7e601af271..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase_l-c.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <string.h>
-
-extern __typeof (strncasecmp_l) __strncasecmp_l_nonascii;
-
-#define __strncasecmp_l __strncasecmp_l_nonascii
-#define USE_IN_EXTENDED_LOCALE_MODEL    1
-#include <string/strncase.c>
-
-strong_alias (__strncasecmp_l_nonascii, __strncasecmp_l_ia32)
-
-/* The needs of strcasecmp in libc are minimal, no need to go through
-   the IFUNC.  */
-strong_alias (__strncasecmp_l_nonascii, __GI___strncasecmp_l)
diff --git a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S b/sysdeps/i386/i686/multiarch/strncase_l-sse4.S
deleted file mode 100644
index 557210832e..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define USE_AS_STRNCASECMP_L 1
-#include "strcmp-sse4.S"
diff --git a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S b/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S
deleted file mode 100644
index d438a1ae35..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define USE_AS_STRNCASECMP_L 1
-#include "strcmp-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strncase_l.S b/sysdeps/i386/i686/multiarch/strncase_l.S
deleted file mode 100644
index 8a74ee8574..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase_l.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* Multiple versions of strncasecmp_l
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCMP __strncasecmp_l
-#define USE_AS_STRNCASECMP_L
-#include "strcmp.S"
-
-weak_alias (__strncasecmp_l, strncasecmp_l)
diff --git a/sysdeps/i386/i686/multiarch/strncat-c.c b/sysdeps/i386/i686/multiarch/strncat-c.c
deleted file mode 100644
index 132a000545..0000000000
--- a/sysdeps/i386/i686/multiarch/strncat-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define STRNCAT __strncat_ia32
-#ifdef SHARED
-#undef libc_hidden_def
-#define libc_hidden_def(name) \
-  __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32);
-#endif
-
-#include "string/strncat.c"
diff --git a/sysdeps/i386/i686/multiarch/strncat-sse2.S b/sysdeps/i386/i686/multiarch/strncat-sse2.S
deleted file mode 100644
index f1045b72b8..0000000000
--- a/sysdeps/i386/i686/multiarch/strncat-sse2.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define STRCAT  __strncat_sse2
-#define USE_AS_STRNCAT
-
-#include "strcat-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strncat-ssse3.S b/sysdeps/i386/i686/multiarch/strncat-ssse3.S
deleted file mode 100644
index 625b90a978..0000000000
--- a/sysdeps/i386/i686/multiarch/strncat-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define STRCAT  __strncat_ssse3
-#define USE_AS_STRNCAT
-
-#include "strcat-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strncat.S b/sysdeps/i386/i686/multiarch/strncat.S
deleted file mode 100644
index 5c1bf41453..0000000000
--- a/sysdeps/i386/i686/multiarch/strncat.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncat
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCAT strncat
-#define USE_AS_STRNCAT
-#include "strcat.S"
diff --git a/sysdeps/i386/i686/multiarch/strncmp-c.c b/sysdeps/i386/i686/multiarch/strncmp-c.c
deleted file mode 100644
index cc059da494..0000000000
--- a/sysdeps/i386/i686/multiarch/strncmp-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifdef SHARED
-# define STRNCMP __strncmp_ia32
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)  \
-    __hidden_ver1 (__strncmp_ia32, __GI_strncmp, __strncmp_ia32);
-#endif
-
-#include "string/strncmp.c"
diff --git a/sysdeps/i386/i686/multiarch/strncmp-sse4.S b/sysdeps/i386/i686/multiarch/strncmp-sse4.S
deleted file mode 100644
index cf14dfaf6c..0000000000
--- a/sysdeps/i386/i686/multiarch/strncmp-sse4.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef SHARED
-# define USE_AS_STRNCMP
-# define STRCMP	__strncmp_sse4_2
-# include "strcmp-sse4.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S b/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
deleted file mode 100644
index 536c8685f2..0000000000
--- a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef SHARED
-# define USE_AS_STRNCMP
-# define STRCMP	__strncmp_ssse3
-# include "strcmp-ssse3.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strncmp.S b/sysdeps/i386/i686/multiarch/strncmp.S
deleted file mode 100644
index 150d4786d2..0000000000
--- a/sysdeps/i386/i686/multiarch/strncmp.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncmp
-   All versions must be listed in ifunc-impl-list.c.  */
-#define USE_AS_STRNCMP
-#define STRCMP	strncmp
-#include "strcmp.S"
diff --git a/sysdeps/i386/i686/multiarch/strncpy-c.c b/sysdeps/i386/i686/multiarch/strncpy-c.c
deleted file mode 100644
index 201e3f98b3..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define STRNCPY __strncpy_ia32
-#ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)  \
-    __hidden_ver1 (__strncpy_ia32, __GI_strncpy, __strncpy_ia32);
-#endif
-
-#include "string/strncpy.c"
diff --git a/sysdeps/i386/i686/multiarch/strncpy-sse2.S b/sysdeps/i386/i686/multiarch/strncpy-sse2.S
deleted file mode 100644
index bdd99239a4..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCPY
-#define STRCPY __strncpy_sse2
-#include "strcpy-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S b/sysdeps/i386/i686/multiarch/strncpy-ssse3.S
deleted file mode 100644
index bf82ee447d..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCPY
-#define STRCPY __strncpy_ssse3
-#include "strcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strncpy.S b/sysdeps/i386/i686/multiarch/strncpy.S
deleted file mode 100644
index 9c257efc6e..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncpy
-   All versions must be listed in ifunc-impl-list.c.  */
-#define USE_AS_STRNCPY
-#define STRCPY strncpy
-#include "strcpy.S"
diff --git a/sysdeps/i386/i686/multiarch/strnlen-c.c b/sysdeps/i386/i686/multiarch/strnlen-c.c
deleted file mode 100644
index 351e939a93..0000000000
--- a/sysdeps/i386/i686/multiarch/strnlen-c.c
+++ /dev/null
@@ -1,10 +0,0 @@
-#define STRNLEN  __strnlen_ia32
-#ifdef SHARED
-# undef libc_hidden_def
-# define libc_hidden_def(name)  \
-    __hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32); \
-    strong_alias (__strnlen_ia32, __strnlen_ia32_1); \
-    __hidden_ver1 (__strnlen_ia32_1, __GI___strnlen, __strnlen_ia32_1);
-#endif
-
-#include "string/strnlen.c"
diff --git a/sysdeps/i386/i686/multiarch/strnlen-sse2.S b/sysdeps/i386/i686/multiarch/strnlen-sse2.S
deleted file mode 100644
index 56b6ae2a5c..0000000000
--- a/sysdeps/i386/i686/multiarch/strnlen-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNLEN
-#define STRLEN __strnlen_sse2
-#include "strlen-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strnlen.S b/sysdeps/i386/i686/multiarch/strnlen.S
deleted file mode 100644
index d241522c70..0000000000
--- a/sysdeps/i386/i686/multiarch/strnlen.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of strnlen
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
-	.text
-ENTRY(__strnlen)
-	.type	__strnlen, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__strnlen_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strnlen_sse2)
-2:	ret
-END(__strnlen)
-
-weak_alias(__strnlen, strnlen)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strpbrk-c.c b/sysdeps/i386/i686/multiarch/strpbrk-c.c
deleted file mode 100644
index 5db62053b3..0000000000
--- a/sysdeps/i386/i686/multiarch/strpbrk-c.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define __strpbrk_sse2 __strpbrk_ia32
-#include <sysdeps/x86_64/multiarch/strpbrk-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strpbrk.S b/sysdeps/i386/i686/multiarch/strpbrk.S
deleted file mode 100644
index 7201d6376f..0000000000
--- a/sysdeps/i386/i686/multiarch/strpbrk.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strpbrk
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCSPN strpbrk
-#define USE_AS_STRPBRK
-#include "strcspn.S"
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
deleted file mode 100644
index 39a7c8825b..0000000000
--- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
+++ /dev/null
@@ -1,282 +0,0 @@
-/* strrchr with SSE2 with bsf and bsr
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS  4
-# define STR1  PARMS
-# define STR2  STR1+4
-
-	.text
-ENTRY (__strrchr_sse2_bsf)
-
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	PUSH	(%edi)
-	pxor	%xmm2, %xmm2
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-	/* ECX has OFFSET. */
-	and	$63, %ecx
-	cmp	$48, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	ja	L(crosscashe)
-
-/* unaligned string. */
-	movdqu	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-
-	test	%eax, %eax
-	jnz	L(unaligned_match1)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	and	$-16, %edi
-	add	$16, %edi
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	xor	%ebx, %ebx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-	CFI_POP	(%ebx)
-
-	.p2align 4
-L(unaligned_return_value1):
-	bsf	%edx, %ecx
-	mov	$2, %edx
-	shl	%cl, %edx
-	sub	$1, %edx
-	and	%edx, %eax
-	jz	L(return_null)
-	bsr	%eax, %eax
-	add	%edi, %eax
-	POP	(%edi)
-	ret
-	CFI_PUSH	(%edi)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%edx, %edx
-	jnz	L(unaligned_return_value1)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	mov	%eax, %ebx
-	lea	16(%edi), %esi
-	and	$-16, %edi
-	add	$16, %edi
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-	CFI_POP	(%ebx)
-
-	.p2align 4
-	L(crosscashe):
-/* Hancle unaligned string.  */
-	and	$15, %ecx
-	and	$-16, %edi
-	pxor	%xmm3, %xmm3
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm3, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	/* Remove the leading bytes.  */
-	shr	%cl, %edx
-	shr	%cl, %eax
-
-	test	%eax, %eax
-	jnz	L(unaligned_match)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	add	$16, %edi
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	xor	%ebx, %ebx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-	CFI_POP	(%ebx)
-
-	.p2align 4
-L(unaligned_return_value):
-	add	%ecx, %edi
-	bsf	%edx, %ecx
-	mov	$2, %edx
-	shl	%cl, %edx
-	sub	$1, %edx
-	and	%edx, %eax
-	jz	L(return_null)
-	bsr	%eax, %eax
-	add	%edi, %eax
-	POP	(%edi)
-	ret
-	CFI_PUSH	(%edi)
-
-	.p2align 4
-L(unaligned_match):
-	test	%edx, %edx
-	jnz	L(unaligned_return_value)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	mov	%eax, %ebx
-	add	$16, %edi
-	lea	(%edi, %ecx), %esi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jz	L(loop)
-
-L(matches):
-	test	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	test	%ebx, %ebx
-	jz	L(return_null_1)
-	bsr	%ebx, %eax
-	add	%esi, %eax
-
-	POP	(%ebx)
-	POP	(%esi)
-
-	sub	$16, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH	(%edi)
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %ecx
-	test	%ecx, %ecx
-	jnz	L(return_value_1)
-	mov	%eax, %ebx
-	mov	%edi, %esi
-	jmp	L(loop)
-
-	.p2align 4
-L(return_value_1):
-	bsf	%ecx, %ecx
-	mov	$2, %edx
-	shl	%cl, %edx
-	sub	$1, %edx
-	and	%edx, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-
-	bsr	%eax, %eax
-	add	%edi, %eax
-	sub	$16, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH	(%edi)
-/* Return NULL.  */
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH	(%edi)
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-/* Return NULL.  */
-	.p2align 4
-L(return_null_1):
-	POP	(%ebx)
-	POP	(%esi)
-	POP	(%edi)
-	xor	%eax, %eax
-	ret
-
-END (__strrchr_sse2_bsf)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2.S b/sysdeps/i386/i686/multiarch/strrchr-sse2.S
deleted file mode 100644
index 20934288be..0000000000
--- a/sysdeps/i386/i686/multiarch/strrchr-sse2.S
+++ /dev/null
@@ -1,708 +0,0 @@
-/* strrchr SSE2 without bsf and bsr
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS  8
-# define ENTRANCE PUSH(%edi);
-# define RETURN  POP(%edi); ret; CFI_PUSH(%edi);
-
-# define STR1  PARMS
-# define STR2  STR1+4
-
-	atom_text_section
-ENTRY (__strrchr_sse2)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	pxor	%xmm2, %xmm2
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-	/* ECX has OFFSET. */
-	and	$63, %ecx
-	cmp	$48, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	ja	L(crosscache)
-
-/* unaligned string. */
-	movdqu	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %ecx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match1)
-
-	test	%ecx, %ecx
-	jnz	L(return_null)
-
-	and	$-16, %edi
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	xor	%ebx, %ebx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-	CFI_POP	(%ebx)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%ecx, %ecx
-	jnz	L(prolog_find_zero_1)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	mov	%eax, %ebx
-	mov	%edi, %esi
-	and	$-16, %edi
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-	CFI_POP	(%ebx)
-
-	.p2align 4
-L(crosscache):
-/* Hancle unaligned string.  */
-	and	$15, %ecx
-	and	$-16, %edi
-	pxor	%xmm3, %xmm3
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm3, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	/* Remove the leading bytes.  */
-	shr	%cl, %edx
-	shr	%cl, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	xor	%ebx, %ebx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-	CFI_POP	(%ebx)
-
-	.p2align 4
-L(unaligned_match):
-	test	%edx, %edx
-	jnz	L(prolog_find_zero)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	mov	%eax, %ebx
-	lea	(%edi, %ecx), %esi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jz	L(loop)
-
-L(matches):
-	test	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	test	%ebx, %ebx
-	jz	L(return_null_1)
-	mov	%ebx, %eax
-	mov	%esi, %edi
-
-	POP	(%ebx)
-	POP	(%esi)
-
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(return_null_1):
-	POP	(%ebx)
-	POP	(%esi)
-
-	xor	%eax, %eax
-	RETURN
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %ecx
-	test	%ecx, %ecx
-	jnz	L(find_zero)
-	mov	%eax, %ebx
-	mov	%edi, %esi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	%cl, %cl
-	jz	L(find_zero_high)
-	mov	%cl, %dl
-	and	$15, %dl
-	jz	L(find_zero_8)
-	test	$0x01, %cl
-	jnz	L(FindZeroExit1)
-	test	$0x02, %cl
-	jnz	L(FindZeroExit2)
-	test	$0x04, %cl
-	jnz	L(FindZeroExit3)
-	and	$1 << 4 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_8):
-	test	$0x10, %cl
-	jnz	L(FindZeroExit5)
-	test	$0x20, %cl
-	jnz	L(FindZeroExit6)
-	test	$0x40, %cl
-	jnz	L(FindZeroExit7)
-	and	$1 << 8 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_high):
-	mov	%ch, %dh
-	and	$15, %dh
-	jz	L(find_zero_high_8)
-	test	$0x01, %ch
-	jnz	L(FindZeroExit9)
-	test	$0x02, %ch
-	jnz	L(FindZeroExit10)
-	test	$0x04, %ch
-	jnz	L(FindZeroExit11)
-	and	$1 << 12 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_high_8):
-	test	$0x10, %ch
-	jnz	L(FindZeroExit13)
-	test	$0x20, %ch
-	jnz	L(FindZeroExit14)
-	test	$0x40, %ch
-	jnz	L(FindZeroExit15)
-	and	$1 << 16 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit1):
-	and	$1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit2):
-	and	$1 << 2 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit3):
-	and	$1 << 3 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit5):
-	and	$1 << 5 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit6):
-	and	$1 << 6 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit7):
-	and	$1 << 7 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit9):
-	and	$1 << 9 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit10):
-	and	$1 << 10 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit11):
-	and	$1 << 11 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit13):
-	and	$1 << 13 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit14):
-	and	$1 << 14 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp	L(match_exit)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit15):
-	and	$1 << 15 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-
-	.p2align 4
-L(match_exit):
-	test	%ah, %ah
-	jnz	L(match_exit_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(match_exit_8)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_exit_8):
-	test	$0x80, %al
-	jnz	L(Exit8)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	lea	-12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_exit_high):
-	mov	%ah, %dh
-	and	$15 << 4, %dh
-	jnz	L(match_exit_high_8)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_exit_high_8):
-	test	$0x80, %ah
-	jnz	L(Exit16)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	lea	-4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	lea	-15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	lea	-14(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	lea	-13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	lea	-11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	lea	-10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit8):
-	lea	-9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	lea	-7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	lea	-6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	lea	-5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	lea	-3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	lea	-2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit16):
-	lea	-1(%edi), %eax
-	RETURN
-
-/* Return NULL.  */
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%ecx, %edi
-	mov     %edx, %ecx
-L(prolog_find_zero_1):
-	test	%cl, %cl
-	jz	L(prolog_find_zero_high)
-	mov	%cl, %dl
-	and	$15, %dl
-	jz	L(prolog_find_zero_8)
-	test	$0x01, %cl
-	jnz	L(PrologFindZeroExit1)
-	test	$0x02, %cl
-	jnz	L(PrologFindZeroExit2)
-	test	$0x04, %cl
-	jnz	L(PrologFindZeroExit3)
-	and	$1 << 4 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_8):
-	test	$0x10, %cl
-	jnz	L(PrologFindZeroExit5)
-	test	$0x20, %cl
-	jnz	L(PrologFindZeroExit6)
-	test	$0x40, %cl
-	jnz	L(PrologFindZeroExit7)
-	and	$1 << 8 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_high):
-	mov	%ch, %dh
-	and	$15, %dh
-	jz	L(prolog_find_zero_high_8)
-	test	$0x01, %ch
-	jnz	L(PrologFindZeroExit9)
-	test	$0x02, %ch
-	jnz	L(PrologFindZeroExit10)
-	test	$0x04, %ch
-	jnz	L(PrologFindZeroExit11)
-	and	$1 << 12 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_high_8):
-	test	$0x10, %ch
-	jnz	L(PrologFindZeroExit13)
-	test	$0x20, %ch
-	jnz	L(PrologFindZeroExit14)
-	test	$0x40, %ch
-	jnz	L(PrologFindZeroExit15)
-	and	$1 << 16 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit1):
-	and	$1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit2):
-	and	$1 << 2 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit3):
-	and	$1 << 3 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit5):
-	and	$1 << 5 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit6):
-	and	$1 << 6 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit7):
-	and	$1 << 7 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit9):
-	and	$1 << 9 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit10):
-	and	$1 << 10 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit11):
-	and	$1 << 11 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit13):
-	and	$1 << 13 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit14):
-	and	$1 << 14 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit15):
-	and	$1 << 15 - 1, %eax
-	jnz	L(match_exit)
-	xor	%eax, %eax
-	RETURN
-
-END (__strrchr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strrchr.S b/sysdeps/i386/i686/multiarch/strrchr.S
deleted file mode 100644
index d9281eaeae..0000000000
--- a/sysdeps/i386/i686/multiarch/strrchr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Multiple versions of strrchr
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
-	.text
-ENTRY(strrchr)
-	.type	strrchr, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__strrchr_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strrchr_sse2_bsf)
-	HAS_ARCH_FEATURE (Slow_BSF)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strrchr_sse2)
-2:	ret
-END(strrchr)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strrchr_ia32, @function; \
-	.globl __strrchr_ia32; \
-	.p2align 4; \
-	__strrchr_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strrchr_ia32, .-__strrchr_ia32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strrchr; __GI_strrchr = __strrchr_ia32
-#endif
-
-#include "../../strrchr.S"
diff --git a/sysdeps/i386/i686/multiarch/strspn-c.c b/sysdeps/i386/i686/multiarch/strspn-c.c
deleted file mode 100644
index bea09dea71..0000000000
--- a/sysdeps/i386/i686/multiarch/strspn-c.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define __strspn_sse2 __strspn_ia32
-#include <sysdeps/x86_64/multiarch/strspn-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strspn.S b/sysdeps/i386/i686/multiarch/strspn.S
deleted file mode 100644
index 1269062381..0000000000
--- a/sysdeps/i386/i686/multiarch/strspn.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of strspn
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc.  */
-#if IS_IN (libc)
-	.text
-ENTRY(strspn)
-	.type	strspn, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__strspn_ia32)
-	HAS_CPU_FEATURE (SSE4_2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__strspn_sse42)
-2:	ret
-END(strspn)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strspn_ia32, @function; \
-	.globl __strspn_ia32; \
-	.p2align 4; \
-__strspn_ia32: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strspn_ia32, .-__strspn_ia32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strspn; __GI_strspn = __strspn_ia32
-#endif
-
-#include "../../strspn.S"
diff --git a/sysdeps/i386/i686/multiarch/test-multiarch.c b/sysdeps/i386/i686/multiarch/test-multiarch.c
deleted file mode 100644
index 593cfec273..0000000000
--- a/sysdeps/i386/i686/multiarch/test-multiarch.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/test-multiarch.c>
diff --git a/sysdeps/i386/i686/multiarch/varshift.c b/sysdeps/i386/i686/multiarch/varshift.c
deleted file mode 100644
index 7760b966e2..0000000000
--- a/sysdeps/i386/i686/multiarch/varshift.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/varshift.c>
diff --git a/sysdeps/i386/i686/multiarch/varshift.h b/sysdeps/i386/i686/multiarch/varshift.h
deleted file mode 100644
index 7c72c70d67..0000000000
--- a/sysdeps/i386/i686/multiarch/varshift.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/varshift.h>
diff --git a/sysdeps/i386/i686/multiarch/wcschr-c.c b/sysdeps/i386/i686/multiarch/wcschr-c.c
deleted file mode 100644
index 38d41d04de..0000000000
--- a/sysdeps/i386/i686/multiarch/wcschr-c.c
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <wchar.h>
-
-#if IS_IN (libc)
-# undef libc_hidden_weak
-# define libc_hidden_weak(name)
-
-# undef weak_alias
-# define weak_alias(name,alias)
-
-# ifdef SHARED
-#  undef libc_hidden_def
-#  define libc_hidden_def(name) \
-   __hidden_ver1 (__wcschr_ia32, __GI_wcschr, __wcschr_ia32); \
-   strong_alias (__wcschr_ia32, __wcschr_ia32_1); \
-   __hidden_ver1 (__wcschr_ia32_1, __GI___wcschr, __wcschr_ia32_1);
-# endif
-#endif
-
-extern __typeof (wcschr) __wcschr_ia32;
-
-#define WCSCHR  __wcschr_ia32
-#include <wcsmbs/wcschr.c>
diff --git a/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/sysdeps/i386/i686/multiarch/wcschr-sse2.S
deleted file mode 100644
index 9ff6c3b8d6..0000000000
--- a/sysdeps/i386/i686/multiarch/wcschr-sse2.S
+++ /dev/null
@@ -1,219 +0,0 @@
-/* wcschr with SSE2, without using bsf instructions
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# define PARMS	4
-# define STR1	PARMS
-# define STR2	STR1+4
-
-	atom_text_section
-ENTRY (__wcschr_sse2)
-
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	mov	%ecx, %eax
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-
-	and	$63, %eax
-	cmp	$48, %eax
-	ja	L(cross_cache)
-
-	movdqu	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	and	$-16, %ecx
-	jmp	L(loop)
-
-	.p2align 4
-L(cross_cache):
-	PUSH	(%edi)
-	mov	%ecx, %edi
-	mov	%eax, %ecx
-	and	$-16, %edi
-	and	$15, %ecx
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-
-	sarl	%cl, %edx
-	sarl	%cl, %eax
-	test	%eax, %eax
-	jz	L(unaligned_no_match)
-
-	add	%edi, %ecx
-	POP	(%edi)
-
-	test	%edx, %edx
-	jz	L(match_case1)
-	test	%al, %al
-	jz	L(match_higth_case2)
-	test	$15, %al
-	jnz	L(match_case2_4)
-	test	$15, %dl
-	jnz	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(unaligned_no_match):
-	mov	%edi, %ecx
-	POP	(%edi)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	pxor	%xmm2, %xmm2
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	add	$16, %ecx
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	pmovmskb %xmm2, %edx
-	test	%eax, %eax
-	jz	L(return_null)
-	test	%edx, %edx
-	jz	L(match_case1)
-
-	.p2align 4
-L(match_case2):
-	test	%al, %al
-	jz	L(match_higth_case2)
-	test	$15, %al
-	jnz	L(match_case2_4)
-	test	$15, %dl
-	jnz	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case2_4):
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(match_higth_case2):
-	test	%dl, %dl
-	jnz	L(return_null)
-	test	$15, %ah
-	jnz	L(match_case2_12)
-	test	$15, %dh
-	jnz	L(return_null)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case2_12):
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case1):
-	test	%al, %al
-	jz	L(match_higth_case1)
-
-	test	$0x01, %al
-	jnz	L(exit0)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_higth_case1):
-	test	$0x01, %ah
-	jnz	L(exit3)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit0):
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit3):
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	ret
-
-END (__wcschr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcschr.S b/sysdeps/i386/i686/multiarch/wcschr.S
deleted file mode 100644
index d3c65a6436..0000000000
--- a/sysdeps/i386/i686/multiarch/wcschr.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of wcschr
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
-	.text
-ENTRY(__wcschr)
-	.type	wcschr, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__wcschr_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__wcschr_sse2)
-2:	ret
-END(__wcschr)
-weak_alias (__wcschr, wcschr)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-c.c b/sysdeps/i386/i686/multiarch/wcscmp-c.c
deleted file mode 100644
index e3337d77e2..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscmp-c.c
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <wchar.h>
-
-#define WCSCMP __wcscmp_ia32
-#ifdef SHARED
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
-  __hidden_ver1 (__wcscmp_ia32, __GI___wcscmp, __wcscmp_ia32);
-#endif
-#undef weak_alias
-#define weak_alias(name, alias)
-
-extern __typeof (wcscmp) __wcscmp_ia32;
-
-#include "wcsmbs/wcscmp.c"
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
deleted file mode 100644
index a464b58204..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
+++ /dev/null
@@ -1,1018 +0,0 @@
-/* wcscmp with SSE2
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define ENTRANCE PUSH(%esi); PUSH(%edi)
-# define RETURN  POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
-# define PARMS  4
-# define STR1  PARMS
-# define STR2  STR1+4
-
-/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
-
-	.text
-ENTRY (__wcscmp_sse2)
-/*
-	* This implementation uses SSE to compare up to 16 bytes at a time.
-*/
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %eax
-
-	mov	(%eax), %ecx
-	cmp	%ecx, (%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	mov	4(%eax), %ecx
-	cmp	%ecx, 4(%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	mov	8(%eax), %ecx
-	cmp	%ecx, 8(%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	mov	12(%eax), %ecx
-	cmp	%ecx, 12(%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	ENTRANCE
-	add	$16, %eax
-	add	$16, %edx
-
-	mov	%eax, %esi
-	mov	%edx, %edi
-	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
-	mov	%al, %ch
-	mov	%dl, %cl
-	and	$63, %eax		/* esi alignment in cache line */
-	and	$63, %edx		/* edi alignment in cache line */
-	and	$15, %cl
-	jz	L(continue_00)
-	cmp	$16, %edx
-	jb	L(continue_0)
-	cmp	$32, %edx
-	jb	L(continue_16)
-	cmp	$48, %edx
-	jb	L(continue_32)
-
-L(continue_48):
-	and	$15, %ch
-	jz	L(continue_48_00)
-	cmp	$16, %eax
-	jb	L(continue_0_48)
-	cmp	$32, %eax
-	jb	L(continue_16_48)
-	cmp	$48, %eax
-	jb	L(continue_32_48)
-
-	.p2align 4
-L(continue_48_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%edi), %xmm1
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_48_48)
-
-L(continue_0):
-	and	$15, %ch
-	jz	L(continue_0_00)
-	cmp	$16, %eax
-	jb	L(continue_0_0)
-	cmp	$32, %eax
-	jb	L(continue_0_16)
-	cmp	$48, %eax
-	jb	L(continue_0_32)
-
-	.p2align 4
-L(continue_0_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	mov	48(%esi), %ecx
-	cmp	%ecx, 48(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	52(%esi), %ecx
-	cmp	%ecx, 52(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	56(%esi), %ecx
-	cmp	%ecx, 56(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	60(%esi), %ecx
-	cmp	%ecx, 60(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_0_48)
-
-	.p2align 4
-L(continue_00):
-	and	$15, %ch
-	jz	L(continue_00_00)
-	cmp	$16, %eax
-	jb	L(continue_00_0)
-	cmp	$32, %eax
-	jb	L(continue_00_16)
-	cmp	$48, %eax
-	jb	L(continue_00_32)
-
-	.p2align 4
-L(continue_00_48):
-	pcmpeqd	(%edi), %xmm0
-	mov	(%edi), %eax
-	pmovmskb %xmm0, %ecx
-	test	%ecx, %ecx
-	jnz	L(less4_double_words1)
-
-	cmp	(%esi), %eax
-	jne	L(nequal)
-
-	mov	4(%edi), %eax
-	cmp	4(%esi), %eax
-	jne	L(nequal)
-
-	mov	8(%edi), %eax
-	cmp	8(%esi), %eax
-	jne	L(nequal)
-
-	mov	12(%edi), %eax
-	cmp	12(%esi), %eax
-	jne	L(nequal)
-
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	48(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_32):
-	and	$15, %ch
-	jz	L(continue_32_00)
-	cmp	$16, %eax
-	jb	L(continue_0_32)
-	cmp	$32, %eax
-	jb	L(continue_16_32)
-	cmp	$48, %eax
-	jb	L(continue_32_32)
-
-	.p2align 4
-L(continue_32_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	16(%esi), %ecx
-	cmp	%ecx, 16(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	20(%esi), %ecx
-	cmp	%ecx, 20(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	24(%esi), %ecx
-	cmp	%ecx, 24(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	28(%esi), %ecx
-	cmp	%ecx, 28(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%edi), %xmm1
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results */
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_32_48)
-
-	.p2align 4
-L(continue_16):
-	and	$15, %ch
-	jz	L(continue_16_00)
-	cmp	$16, %eax
-	jb	L(continue_0_16)
-	cmp	$32, %eax
-	jb	L(continue_16_16)
-	cmp	$48, %eax
-	jb	L(continue_16_32)
-
-	.p2align 4
-L(continue_16_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	mov	32(%esi), %ecx
-	cmp	%ecx, 32(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	36(%esi), %ecx
-	cmp	%ecx, 36(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	40(%esi), %ecx
-	cmp	%ecx, 40(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	44(%esi), %ecx
-	cmp	%ecx, 44(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	48(%edi), %xmm1
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_16_48)
-
-	.p2align 4
-L(continue_00_00):
-	movdqa	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqa	16(%edi), %xmm3
-	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm3		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
-	pmovmskb %xmm3, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqa	32(%edi), %xmm5
-	pcmpeqd	%xmm5, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%esi), %xmm5		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm5		/* packed sub of comparison results*/
-	pmovmskb %xmm5, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqa	48(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_00_00)
-
-	.p2align 4
-L(continue_00_32):
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_00_16):
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_00_0):
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	add	$48, %esi
-	add	$48, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_48_00):
-	pcmpeqd	(%esi), %xmm0
-	mov	(%edi), %eax
-	pmovmskb %xmm0, %ecx
-	test	%ecx, %ecx
-	jnz	L(less4_double_words1)
-
-	cmp	(%esi), %eax
-	jne	L(nequal)
-
-	mov	4(%edi), %eax
-	cmp	4(%esi), %eax
-	jne	L(nequal)
-
-	mov	8(%edi), %eax
-	cmp	8(%esi), %eax
-	jne	L(nequal)
-
-	mov	12(%edi), %eax
-	cmp	12(%esi), %eax
-	jne	L(nequal)
-
-	movdqu	16(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_32_00):
-	movdqu	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_16_00):
-	movdqu	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_0_00):
-	movdqu	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	add	$48, %esi
-	add	$48, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_32_32):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_48_48)
-
-	.p2align 4
-L(continue_16_16):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm3
-	movdqu	16(%esi), %xmm4
-	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
-	pmovmskb %xmm3, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_48_48)
-
-	.p2align 4
-L(continue_0_0):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm3
-	movdqu	16(%esi), %xmm4
-	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
-	pmovmskb %xmm3, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	add	$48, %esi
-	add	$48, %edi
-	jmp	L(continue_48_48)
-
-	.p2align 4
-L(continue_0_16):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_32_48)
-
-	.p2align 4
-L(continue_0_32):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_16_48)
-
-	.p2align 4
-L(continue_16_32):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_32_48)
-
-	.p2align 4
-L(less4_double_words1):
-	cmp	(%esi), %eax
-	jne	L(nequal)
-	test	%eax, %eax
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(less4_double_words):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words)
-	and	$15, %dl
-	jz	L(second_double_word)
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word):
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words):
-	and	$15, %dh
-	jz	L(fourth_double_word)
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word):
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(less4_double_words_16):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words_16)
-	and	$15, %dl
-	jz	L(second_double_word_16)
-	mov	16(%esi), %ecx
-	cmp	%ecx, 16(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word_16):
-	mov	20(%esi), %ecx
-	cmp	%ecx, 20(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words_16):
-	and	$15, %dh
-	jz	L(fourth_double_word_16)
-	mov	24(%esi), %ecx
-	cmp	%ecx, 24(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word_16):
-	mov	28(%esi), %ecx
-	cmp	%ecx, 28(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(less4_double_words_32):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words_32)
-	and	$15, %dl
-	jz	L(second_double_word_32)
-	mov	32(%esi), %ecx
-	cmp	%ecx, 32(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word_32):
-	mov	36(%esi), %ecx
-	cmp	%ecx, 36(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words_32):
-	and	$15, %dh
-	jz	L(fourth_double_word_32)
-	mov	40(%esi), %ecx
-	cmp	%ecx, 40(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word_32):
-	mov	44(%esi), %ecx
-	cmp	%ecx, 44(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(less4_double_words_48):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words_48)
-	and	$15, %dl
-	jz	L(second_double_word_48)
-	mov	48(%esi), %ecx
-	cmp	%ecx, 48(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word_48):
-	mov	52(%esi), %ecx
-	cmp	%ecx, 52(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words_48):
-	and	$15, %dh
-	jz	L(fourth_double_word_48)
-	mov	56(%esi), %ecx
-	cmp	%ecx, 56(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word_48):
-	mov	60(%esi), %ecx
-	cmp	%ecx, 60(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(nequal):
-	mov	$1, %eax
-	jg	L(return)
-	neg	%eax
-	RETURN
-
-	.p2align 4
-L(return):
-	RETURN
-
-	.p2align 4
-L(equal):
-	xorl	%eax, %eax
-	RETURN
-
-	CFI_POP (%edi)
-	CFI_POP (%esi)
-
-	.p2align 4
-L(neq):
-	mov	$1, %eax
-	jg	L(neq_bigger)
-	neg	%eax
-
-L(neq_bigger):
-	ret
-
-	.p2align 4
-L(eq):
-	xorl	%eax, %eax
-	ret
-
-END (__wcscmp_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscmp.S b/sysdeps/i386/i686/multiarch/wcscmp.S
deleted file mode 100644
index 7118bdd4db..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscmp.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Multiple versions of wcscmp
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc and for the
-   DSO.  In static binaries, we need wcscmp before the initialization
-   happened.  */
-#if IS_IN (libc)
-	.text
-ENTRY(__wcscmp)
-	.type	__wcscmp, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__wcscmp_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__wcscmp_sse2)
-2:	ret
-END(__wcscmp)
-weak_alias (__wcscmp, wcscmp)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscpy-c.c b/sysdeps/i386/i686/multiarch/wcscpy-c.c
deleted file mode 100644
index fb3000392b..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscpy-c.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#if IS_IN (libc)
-# define wcscpy  __wcscpy_ia32
-#endif
-
-#include "wcsmbs/wcscpy.c"
diff --git a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
deleted file mode 100644
index 6280ba92ab..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
+++ /dev/null
@@ -1,600 +0,0 @@
-/* wcscpy with SSSE3
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-# include <sysdep.h>
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# define PARMS	4
-# define RETURN	POP (%edi); ret; CFI_PUSH (%edi)
-# define STR1	PARMS
-# define STR2	STR1+4
-# define LEN	STR2+4
-
-	atom_text_section
-ENTRY (__wcscpy_ssse3)
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %ecx
-
-	cmp	$0, (%ecx)
-	jz	L(ExitTail4)
-	cmp	$0, 4(%ecx)
-	jz	L(ExitTail8)
-	cmp	$0, 8(%ecx)
-	jz	L(ExitTail12)
-	cmp	$0, 12(%ecx)
-	jz	L(ExitTail16)
-
-	PUSH	(%edi)
-	mov	%edx, %edi
-	PUSH	(%esi)
-	lea	16(%ecx), %esi
-
-	and	$-16, %esi
-
-	pxor	%xmm0, %xmm0
-	pcmpeqd	(%esi), %xmm0
-	movdqu	(%ecx), %xmm1
-	movdqu	%xmm1, (%edx)
-
-	pmovmskb %xmm0, %eax
-	sub	%ecx, %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	%edx, %eax
-	lea	16(%edx), %edx
-	and	$-16, %edx
-	sub	%edx, %eax
-
-	sub	%eax, %ecx
-	mov	%ecx, %eax
-	and	$0xf, %eax
-	mov	$0, %esi
-
-	jz	L(Align16Both)
-	cmp	$4, %eax
-	je	L(Shl4)
-	cmp	$8, %eax
-	je	L(Shl8)
-	jmp	L(Shl12)
-
-L(Align16Both):
-	movaps	(%ecx), %xmm1
-	movaps	16(%ecx), %xmm2
-	movaps	%xmm1, (%edx)
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqd	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm4
-	movaps	%xmm3, (%edx, %esi)
-	pcmpeqd	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm1
-	movaps	%xmm4, (%edx, %esi)
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm2
-	movaps	%xmm1, (%edx, %esi)
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqd	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm3, (%edx, %esi)
-	mov	%ecx, %eax
-	lea	16(%ecx, %esi), %ecx
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-
-	mov	$-0x40, %esi
-
-L(Aligned64Loop):
-	movaps	(%ecx), %xmm2
-	movaps	32(%ecx), %xmm3
-	movaps	%xmm2, %xmm4
-	movaps	16(%ecx), %xmm5
-	movaps	%xmm3, %xmm6
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	lea	64(%edx), %edx
-	pcmpeqd	%xmm0, %xmm3
-	lea	64(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-
-	test	%eax, %eax
-	jnz	L(Aligned64Leave)
-	movaps	%xmm4, -64(%edx)
-	movaps	%xmm5, -48(%edx)
-	movaps	%xmm6, -32(%edx)
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-L(Aligned64Leave):
-	pcmpeqd	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqd	%xmm5, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm4, -64(%edx)
-	test	%eax, %eax
-	lea	16(%esi), %esi
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqd	%xmm6, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm5, -48(%edx)
-	test	%eax, %eax
-	lea	16(%esi), %esi
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm6, -32(%edx)
-	pcmpeqd	%xmm7, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	lea	16(%esi), %esi
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	$-0x40, %esi
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-	.p2align 4
-L(Shl4):
-	movaps	-4(%ecx), %xmm1
-	movaps	12(%ecx), %xmm2
-L(Shl4Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	28(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-12(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-4(%ecx), %xmm1
-
-L(Shl4LoopStart):
-	movaps	12(%ecx), %xmm2
-	movaps	28(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	44(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	60(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$4, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$4, %xmm3, %xmm4
-	jnz	L(Shl4Start)
-
-	palignr	$4, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl4LoopStart)
-
-L(Shl4LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 8(%edx)
-	POP	(%esi)
-	add	$12, %edx
-	add	$12, %ecx
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(Shl8):
-	movaps	-8(%ecx), %xmm1
-	movaps	8(%ecx), %xmm2
-L(Shl8Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	24(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-8(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-8(%ecx), %xmm1
-
-L(Shl8LoopStart):
-	movaps	8(%ecx), %xmm2
-	movaps	24(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	40(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	56(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$8, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$8, %xmm3, %xmm4
-	jnz	L(Shl8Start)
-
-	palignr	$8, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl8LoopStart)
-
-L(Shl8LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	POP	(%esi)
-	add	$8, %edx
-	add	$8, %ecx
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(Shl12):
-	movaps	-12(%ecx), %xmm1
-	movaps	4(%ecx), %xmm2
-L(Shl12Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	20(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-4(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-12(%ecx), %xmm1
-
-L(Shl12LoopStart):
-	movaps	4(%ecx), %xmm2
-	movaps	20(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	36(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	52(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$12, %xmm4, %xmm5
-	test	%eax, %eax
-	palignr	$12, %xmm3, %xmm4
-	jnz	L(Shl12Start)
-
-	palignr	$12, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl12LoopStart)
-
-L(Shl12LoopExit):
-	movl	(%ecx), %esi
-	movl	%esi, (%edx)
-	mov	$4, %esi
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit12)
-L(Exit16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edi, %eax
-	RETURN
-
-CFI_POP	(%edi)
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	movl	%edx, %eax
-	ret
-
-END (__wcscpy_ssse3)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscpy.S b/sysdeps/i386/i686/multiarch/wcscpy.S
deleted file mode 100644
index cfc97dd87c..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscpy.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of wcscpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
-	.text
-ENTRY(wcscpy)
-	.type	wcscpy, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__wcscpy_ia32)
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__wcscpy_ssse3)
-2:	ret
-END(wcscpy)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen-c.c b/sysdeps/i386/i686/multiarch/wcslen-c.c
deleted file mode 100644
index a335dc0f7e..0000000000
--- a/sysdeps/i386/i686/multiarch/wcslen-c.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <wchar.h>
-
-#if IS_IN (libc)
-# define WCSLEN  __wcslen_ia32
-#endif
-
-extern __typeof (wcslen) __wcslen_ia32;
-
-#include "wcsmbs/wcslen.c"
diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
deleted file mode 100644
index bd3fc4c79b..0000000000
--- a/sysdeps/i386/i686/multiarch/wcslen-sse2.S
+++ /dev/null
@@ -1,193 +0,0 @@
-/* wcslen with SSE2
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-# include <sysdep.h>
-# define STR	4
-
-	.text
-ENTRY (__wcslen_sse2)
-	mov	STR(%esp), %edx
-
-	cmp	$0, (%edx)
-	jz	L(exit_tail0)
-	cmp	$0, 4(%edx)
-	jz	L(exit_tail1)
-	cmp	$0, 8(%edx)
-	jz	L(exit_tail2)
-	cmp	$0, 12(%edx)
-	jz	L(exit_tail3)
-	cmp	$0, 16(%edx)
-	jz	L(exit_tail4)
-	cmp	$0, 20(%edx)
-	jz	L(exit_tail5)
-	cmp	$0, 24(%edx)
-	jz	L(exit_tail6)
-	cmp	$0, 28(%edx)
-	jz	L(exit_tail7)
-
-	pxor	%xmm0, %xmm0
-
-	lea	32(%edx), %eax
-	lea	16(%edx), %ecx
-	and	$-16, %eax
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	16(%eax), %eax
-	jnz	L(exit)
-
-	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqd	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
-	test	%edx, %edx
-	lea	64(%eax), %eax
-	jz	L(aligned_64_loop)
-
-	pcmpeqd	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	48(%ecx), %ecx
-	jnz	L(exit)
-
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	-16(%ecx), %ecx
-	jnz	L(exit)
-
-	pcmpeqd	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	-16(%ecx), %ecx
-	jnz	L(exit)
-
-	pcmpeqd	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	test	%edx, %edx
-	lea	-16(%ecx), %ecx
-	jnz	L(exit)
-
-	jmp	L(aligned_64_loop)
-
-	.p2align 4
-L(exit):
-	sub	%ecx, %eax
-	shr	$2, %eax
-	test	%dl, %dl
-	jz	L(exit_high)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_1)
-	ret
-
-	.p2align 4
-L(exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_3)
-	add	$2, %eax
-	ret
-
-	.p2align 4
-L(exit_1):
-	add	$1, %eax
-	ret
-
-	.p2align 4
-L(exit_3):
-	add	$3, %eax
-	ret
-
-	.p2align 4
-L(exit_tail0):
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(exit_tail1):
-	mov	$1, %eax
-	ret
-
-	.p2align 4
-L(exit_tail2):
-	mov	$2, %eax
-	ret
-
-	.p2align 4
-L(exit_tail3):
-	mov	$3, %eax
-	ret
-
-	.p2align 4
-L(exit_tail4):
-	mov	$4, %eax
-	ret
-
-	.p2align 4
-L(exit_tail5):
-	mov	$5, %eax
-	ret
-
-	.p2align 4
-L(exit_tail6):
-	mov	$6, %eax
-	ret
-
-	.p2align 4
-L(exit_tail7):
-	mov	$7, %eax
-	ret
-
-END (__wcslen_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen.S b/sysdeps/i386/i686/multiarch/wcslen.S
deleted file mode 100644
index 6ef9b6e7b5..0000000000
--- a/sysdeps/i386/i686/multiarch/wcslen.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of wcslen
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
-	.text
-ENTRY(__wcslen)
-	.type	__wcslen, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__wcslen_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__wcslen_sse2)
-2:	ret
-END(__wcslen)
-
-weak_alias(__wcslen, wcslen)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-c.c b/sysdeps/i386/i686/multiarch/wcsrchr-c.c
deleted file mode 100644
index 8d8a335b5b..0000000000
--- a/sysdeps/i386/i686/multiarch/wcsrchr-c.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#if IS_IN (libc)
-# define wcsrchr  __wcsrchr_ia32
-#endif
-
-#include "wcsmbs/wcsrchr.c"
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
deleted file mode 100644
index 1a9b60e55e..0000000000
--- a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
+++ /dev/null
@@ -1,354 +0,0 @@
-/* wcsrchr with SSE2, without using bsf instructions.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-# include <sysdep.h>
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# define PARMS	8
-# define ENTRANCE	PUSH (%edi);
-# define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
-# define STR1	PARMS
-# define STR2	STR1+4
-
-	atom_text_section
-ENTRY (__wcsrchr_sse2)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	mov	%ecx, %edi
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-
-/* ECX has OFFSET. */
-	and	$63, %ecx
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-/* unaligned string. */
-	movdqu	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-/* Find where NULL is.  */
-	pmovmskb %xmm2, %ecx
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match1)
-
-	test	%ecx, %ecx
-	jnz	L(return_null)
-
-	and	$-16, %edi
-
-	PUSH	(%esi)
-
-	xor	%edx, %edx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%ecx, %ecx
-	jnz	L(prolog_find_zero_1)
-
-	PUSH	(%esi)
-
-/* Save current match */
-	mov	%eax, %edx
-	mov	%edi, %esi
-	and	$-16, %edi
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(crosscache):
-/* Hancle unaligned string.  */
-	and	$15, %ecx
-	and	$-16, %edi
-	pxor	%xmm3, %xmm3
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm3
-	pcmpeqd	%xmm1, %xmm0
-/* Find where NULL is.  */
-	pmovmskb %xmm3, %edx
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-/* Remove the leading bytes.  */
-	shr	%cl, %edx
-	shr	%cl, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	PUSH	(%esi)
-
-	xor	%edx, %edx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(unaligned_match):
-	test	%edx, %edx
-	jnz	L(prolog_find_zero)
-
-	PUSH	(%esi)
-
-	mov	%eax, %edx
-	lea	(%edi, %ecx), %esi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm3
-	pcmpeqd	%xmm3, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm3, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm4
-	pcmpeqd	%xmm4, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm4
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm4, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm5
-	pcmpeqd	%xmm5, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm5
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm5, %eax
-	or	%eax, %ecx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	test	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	test	%edx, %edx
-	jz	L(return_null_1)
-	mov	%edx, %eax
-	mov	%esi, %edi
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(return_null_1):
-	POP	(%esi)
-
-	xor	%eax, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %ecx
-	test	%ecx, %ecx
-	jnz	L(find_zero)
-/* save match info */
-	mov	%eax, %edx
-	mov	%edi, %esi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	%cl, %cl
-	jz	L(find_zero_in_third_or_fourth_wchar)
-	test	$15, %cl
-	jz	L(find_zero_in_second_wchar)
-	and	$1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_second_wchar):
-	and	$1 << 5 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_third_or_fourth_wchar):
-	test	$15, %ch
-	jz	L(find_zero_in_fourth_wchar)
-	and	$1 << 9 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_fourth_wchar):
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match_second_wchar):
-	lea	-12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_third_or_fourth_wchar):
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_third_wchar):
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_fourth_wchar):
-	lea	-4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%ecx, %edi
-	mov     %edx, %ecx
-L(prolog_find_zero_1):
-	test	%cl, %cl
-	jz	L(prolog_find_zero_in_third_or_fourth_wchar)
-	test	$15, %cl
-	jz	L(prolog_find_zero_in_second_wchar)
-	and	$1, %eax
-	jz	L(return_null)
-
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_second_wchar):
-	and	$1 << 5 - 1, %eax
-	jz	L(return_null)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_third_or_fourth_wchar):
-	test	$15, %ch
-	jz	L(prolog_find_zero_in_fourth_wchar)
-	and	$1 << 9 - 1, %eax
-	jz	L(return_null)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_fourth_wchar):
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-END (__wcsrchr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr.S b/sysdeps/i386/i686/multiarch/wcsrchr.S
deleted file mode 100644
index cf67333995..0000000000
--- a/sysdeps/i386/i686/multiarch/wcsrchr.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Multiple versions of wcsrchr
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
-	.text
-ENTRY(wcsrchr)
-	.type	wcsrchr, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__wcsrchr_ia32)
-	HAS_CPU_FEATURE (SSE2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__wcsrchr_sse2)
-2:	ret
-END(wcsrchr)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-c.c b/sysdeps/i386/i686/multiarch/wmemcmp-c.c
deleted file mode 100644
index 75ab4b94c1..0000000000
--- a/sysdeps/i386/i686/multiarch/wmemcmp-c.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <wchar.h>
-
-#if IS_IN (libc)
-# define WMEMCMP  __wmemcmp_ia32
-#endif
-
-extern __typeof (wmemcmp) __wmemcmp_ia32;
-
-#include "wcsmbs/wmemcmp.c"
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S b/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
deleted file mode 100644
index 1a857c7e21..0000000000
--- a/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_WMEMCMP 1
-#define MEMCMP __wmemcmp_sse4_2
-
-#include "memcmp-sse4.S"
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S b/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
deleted file mode 100644
index a41ef95fc1..0000000000
--- a/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_WMEMCMP 1
-#define MEMCMP __wmemcmp_ssse3
-
-#include "memcmp-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.S b/sysdeps/i386/i686/multiarch/wmemcmp.S
deleted file mode 100644
index 1b9a54a413..0000000000
--- a/sysdeps/i386/i686/multiarch/wmemcmp.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Multiple versions of wmemcmp
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
-
-#if IS_IN (libc)
-	.text
-ENTRY(wmemcmp)
-	.type	wmemcmp, @gnu_indirect_function
-	LOAD_GOT_AND_RTLD_GLOBAL_RO
-	LOAD_FUNC_GOT_EAX (__wmemcmp_ia32)
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__wmemcmp_ssse3)
-	HAS_CPU_FEATURE (SSE4_2)
-	jz	2f
-	LOAD_FUNC_GOT_EAX (__wmemcmp_sse4_2)
-2:	ret
-END(wmemcmp)
-#endif
diff --git a/sysdeps/i386/i686/nptl/tls.h b/sysdeps/i386/i686/nptl/tls.h
deleted file mode 100644
index 5b527af9d3..0000000000
--- a/sysdeps/i386/i686/nptl/tls.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _TLS_H
-
-/* Additional definitions for <tls.h> on i686 and up.  */
-
-
-/* Macros to load from and store into segment registers.  We can use
-   the 32-bit instructions.  */
-#define TLS_GET_GS() \
-  ({ int __seg; __asm ("movl %%gs, %0" : "=q" (__seg)); __seg; })
-#define TLS_SET_GS(val) \
-  __asm ("movl %0, %%gs" :: "q" (val))
-
-
-/* Get the full set of definitions.  */
-#include_next <tls.h>
-
-#endif	/* tls.h */
diff --git a/sysdeps/i386/i686/pthread_spin_trylock.S b/sysdeps/i386/i686/pthread_spin_trylock.S
deleted file mode 100644
index ce9c94d41a..0000000000
--- a/sysdeps/i386/i686/pthread_spin_trylock.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define HAVE_CMOV	1
-#include <sysdeps/i386/pthread_spin_trylock.S>
diff --git a/sysdeps/i386/i686/stack-aliasing.h b/sysdeps/i386/i686/stack-aliasing.h
deleted file mode 100644
index 9b5a1b0d47..0000000000
--- a/sysdeps/i386/i686/stack-aliasing.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Define macros for stack address aliasing issues for NPTL.  i686 version.
-   Copyright (C) 2014-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* What is useful is to avoid the 64k aliasing problem which reliably
-   happens if all stacks use sizes which are a multiple of 64k.  Tell
-   the stack allocator to disturb this by allocation one more page if
-   necessary.  */
-#define MULTI_PAGE_ALIASING     65536
diff --git a/sysdeps/i386/i686/strcmp.S b/sysdeps/i386/i686/strcmp.S
deleted file mode 100644
index 1ae305912e..0000000000
--- a/sysdeps/i386/i686/strcmp.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/* Highly optimized version for ix86, x>=6.
-   Copyright (C) 1999-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS	4		/* no space for saved regs */
-#define STR1	PARMS
-#define STR2	STR1+4
-
-        .text
-ENTRY (strcmp)
-
-	movl	STR1(%esp), %ecx
-	movl	STR2(%esp), %edx
-
-L(oop):	movb	(%ecx), %al
-	cmpb	(%edx), %al
-	jne	L(neq)
-	incl	%ecx
-	incl	%edx
-	testb	%al, %al
-	jnz	L(oop)
-
-	xorl	%eax, %eax
-	/* when strings are equal, pointers rest one beyond
-	   the end of the NUL terminators.  */
-	ret
-
-L(neq):	movl	$1, %eax
-	movl	$-1, %ecx
-	cmovbl	%ecx, %eax
-
-	ret
-END (strcmp)
-libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/i386/i686/tst-stack-align.h b/sysdeps/i386/i686/tst-stack-align.h
deleted file mode 100644
index 51f03fe77b..0000000000
--- a/sysdeps/i386/i686/tst-stack-align.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Copyright (C) 2003-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <stdio.h>
-#include <stdint.h>
-#ifndef __SSE__
-#include_next <tst-stack-align.h>
-#else
-#include <xmmintrin.h>
-
-#define TEST_STACK_ALIGN() \
-  ({									     \
-    __m128 _m;								     \
-    double _d = 12.0;							     \
-    long double _ld = 15.0;						     \
-    int _ret = 0;							     \
-    printf ("__m128:  %p %zu\n", &_m, __alignof (__m128));		     \
-    if ((((uintptr_t) &_m) & (__alignof (__m128) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("double:  %g %p %zu\n", _d, &_d, __alignof (double));	     \
-    if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0)		     \
-      _ret = 1;								     \
-									     \
-    printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double));    \
-    if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0)	     \
-      _ret = 1;								     \
-    _ret;								     \
-    })
-#endif