18 files changed, 2153 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/i386/i586/add_n.S b/REORG.TODO/sysdeps/i386/i586/add_n.S
new file mode 100644
index 0000000000..f73df092f0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/add_n.S
@@ -0,0 +1,143 @@
+/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+   sum in a third limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16		/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define S2	S1+4
+#define SIZE	S2+4
+
+	.text
+ENTRY (__mpn_add_n)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp),%edi
+	cfi_rel_offset (edi, 12)
+	movl	S1(%esp),%esi
+	cfi_rel_offset (esi, 8)
+	movl	S2(%esp),%ebx
+	cfi_rel_offset (ebx, 0)
+	movl	SIZE(%esp),%ecx
+	movl	(%ebx),%ebp
+	cfi_rel_offset (ebp, 4)
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	L(end)
+	pushl	%edx
+	cfi_adjust_cfa_offset (4)
+
+	ALIGN (3)
+L(oop):	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L(1):	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%ebp,%eax
+	movl	4(%ebx),%ebp
+	adcl	%ebp,%edx
+	movl	8(%ebx),%ebp
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L(2):	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	adcl	%ebp,%eax
+	movl	12(%ebx),%ebp
+	adcl	%ebp,%edx
+	movl	16(%ebx),%ebp
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L(3):	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	adcl	%ebp,%eax
+	movl	20(%ebx),%ebp
+	adcl	%ebp,%edx
+	movl	24(%ebx),%ebp
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L(4):	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%ebp,%eax
+	movl	28(%ebx),%ebp
+	adcl	%ebp,%edx
+	movl	32(%ebx),%ebp
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebx),%ebx
+	decl	%ecx
+	jnz	L(oop)
+
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+L(end):
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	L(end2)
+	incl	%edx
+L(oop2):
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	adcl	%ebp,%eax
+	movl	4(%ebx),%ebp
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebx),%ebx
+	decl	%edx
+	jnz	L(oop2)
+L(end2):
+	movl	(%esi),%eax
+	adcl	%ebp,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (__mpn_add_n)
diff --git a/REORG.TODO/sysdeps/i386/i586/addmul_1.S b/REORG.TODO/sysdeps/i386/i586/addmul_1.S
new file mode 100644
index 0000000000..a713192982
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/addmul_1.S
@@ -0,0 +1,94 @@
+/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+   the result to a second limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16	/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define SIZE	S1+4
+#define S2LIMB	SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+	.text
+ENTRY (__mpn_addmul_1)
+
+	pushl	%res_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%s1_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%s2_limb
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp), %res_ptr
+	cfi_rel_offset (res_ptr, 12)
+	movl	S1(%esp), %s1_ptr
+	cfi_rel_offset (s1_ptr, 8)
+	movl	SIZE(%esp), %size
+	movl	S2LIMB(%esp), %s2_limb
+	cfi_rel_offset (s2_limb, 0)
+	leal	(%res_ptr,%size,4), %res_ptr
+	leal	(%s1_ptr,%size,4), %s1_ptr
+	negl	%size
+	xorl	%ebp, %ebp
+	cfi_rel_offset (ebp, 4)
+	ALIGN (3)
+
+L(oop):	adcl	$0, %ebp
+	movl	(%s1_ptr,%size,4), %eax
+
+	mull	%s2_limb
+
+	addl	%ebp, %eax
+	movl	(%res_ptr,%size,4), %ebp
+
+	adcl	$0, %edx
+	addl	%eax, %ebp
+
+	movl	%ebp, (%res_ptr,%size,4)
+	incl	%size
+
+	movl	%edx, %ebp
+	jnz	L(oop)
+
+	adcl	$0, %ebp
+	movl	%ebp, %eax
+	popl	%s2_limb
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s2_limb)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%s1_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s1_ptr)
+	popl	%res_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (res_ptr)
+
+	ret
+#undef size
+END (__mpn_addmul_1)
diff --git a/REORG.TODO/sysdeps/i386/i586/bzero.S b/REORG.TODO/sysdeps/i386/i586/bzero.S
new file mode 100644
index 0000000000..2a106719a4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/bzero.S
@@ -0,0 +1,4 @@
+#define USE_AS_BZERO
+#define memset __bzero
+#include <sysdeps/i386/i586/memset.S>
+weak_alias (__bzero, bzero)
diff --git a/REORG.TODO/sysdeps/i386/i586/init-arch.h b/REORG.TODO/sysdeps/i386/i586/init-arch.h
new file mode 100644
index 0000000000..4711212e6f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/init-arch.h
@@ -0,0 +1,19 @@
+/* Copyright (C) 2015-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define MINIMUM_ISA 586
+#include <sysdeps/x86/init-arch.h>
diff --git a/REORG.TODO/sysdeps/i386/i586/lshift.S b/REORG.TODO/sysdeps/i386/i586/lshift.S
new file mode 100644
index 0000000000..7941c28d9d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/lshift.S
@@ -0,0 +1,255 @@
+/* Pentium optimized __mpn_lshift --
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16		/* space for 4 saved regs */
+#define RES	PARMS
+#define S	RES+4
+#define SIZE	S+4
+#define CNT	SIZE+4
+
+	.text
+ENTRY (__mpn_lshift)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebp, 0)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp),%edi
+	cfi_rel_offset (edi, 12)
+	movl	S(%esp),%esi
+	cfi_rel_offset (esi, 8)
+	movl	SIZE(%esp),%ebx
+	cfi_rel_offset (ebx, 0)
+	movl	CNT(%esp),%ecx
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	L(normal)
+	leal	4(%esi),%eax
+	cmpl	%edi,%eax
+	jnc	L(special)		/* jump if s_ptr + 1 >= res_ptr */
+	leal	(%esi,%ebx,4),%eax
+	cmpl	%eax,%edi
+	jnc	L(special)		/* jump if res_ptr >= s_ptr + size */
+
+L(normal):
+	leal	-4(%edi,%ebx,4),%edi
+	leal	-4(%esi,%ebx,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+	xorl	%eax,%eax
+	shldl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+	cfi_adjust_cfa_offset (4)
+
+	decl	%ebx
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	shrl	$3,%ebx
+	jz	L(end)
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+L(oop):	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebp
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	shldl	%cl,%eax,%ebp
+	shldl	%cl,%edx,%eax
+	movl	%ebp,(%edi)
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebp
+	movl	-12(%esi),%eax
+	shldl	%cl,%ebp,%edx
+	shldl	%cl,%eax,%ebp
+	movl	%edx,-8(%edi)
+	movl	%ebp,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebp
+	shldl	%cl,%edx,%eax
+	shldl	%cl,%ebp,%edx
+	movl	%eax,-16(%edi)
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	shldl	%cl,%eax,%ebp
+	shldl	%cl,%edx,%eax
+	movl	%ebp,-24(%edi)
+	movl	%eax,-28(%edi)
+
+	subl	$32,%esi
+	subl	$32,%edi
+	decl	%ebx
+	jnz	L(oop)
+
+L(end):	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	andl	$7,%ebx
+	jz	L(end2)
+L(oop2):
+	movl	(%esi),%eax
+	shldl	%cl,%eax,%edx
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	subl	$4,%esi
+	subl	$4,%edi
+	decl	%ebx
+	jnz	L(oop2)
+
+L(end2):
+	shll	%cl,%edx		/* compute least significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+	cfi_adjust_cfa_offset (-4)
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissible if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+	cfi_adjust_cfa_offset (16)
+	cfi_rel_offset (edi, 12)
+	cfi_rel_offset (esi, 8)
+	cfi_rel_offset (ebp, 4)
+	cfi_rel_offset (ebx, 0)
+L(special):
+	movl	(%esi),%edx
+	addl	$4,%esi
+
+	decl	%ebx
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	shrl	$3,%ebx
+
+	addl	%edx,%edx
+	incl	%ebx
+	decl	%ebx
+	jz	L(Lend)
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+L(Loop):
+	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebp
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebp,(%edi)
+	adcl	%edx,%edx
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebp
+	movl	12(%esi),%eax
+	adcl	%ebp,%ebp
+	movl	%edx,8(%edi)
+	adcl	%eax,%eax
+	movl	%ebp,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebp
+	adcl	%edx,%edx
+	movl	%eax,16(%edi)
+	adcl	%ebp,%ebp
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebp,24(%edi)
+	adcl	%edx,%edx
+	movl	%eax,28(%edi)
+
+	leal	32(%esi),%esi		/* use leal not to clobber carry */
+	leal	32(%edi),%edi
+	decl	%ebx
+	jnz	L(Loop)
+
+L(Lend):
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebx
+	jz	L(Lend2)
+	addl	%eax,%eax		/* restore carry from eax */
+L(Loop2):
+	movl	%edx,%ebp
+	movl	(%esi),%edx
+	adcl	%edx,%edx
+	movl	%ebp,(%edi)
+
+	leal	4(%esi),%esi		/* use leal not to clobber carry */
+	leal	4(%edi),%edi
+	decl	%ebx
+	jnz	L(Loop2)
+
+	jmp	L(L1)
+L(Lend2):
+	addl	%eax,%eax		/* restore carry from eax */
+L(L1):	movl	%edx,(%edi)		/* store last limb */
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (__mpn_lshift)
diff --git a/REORG.TODO/sysdeps/i386/i586/memcopy.h b/REORG.TODO/sysdeps/i386/i586/memcopy.h
new file mode 100644
index 0000000000..39f020a746
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memcopy.h
@@ -0,0 +1,95 @@
+/* memcopy.h -- definitions for memory copy functions.  Pentium version.
+   Copyright (C) 1994-2017 Free Software Foundation, Inc.
+   Contributed by Torbjorn Granlund (tege@sics.se).
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Get the i386 definitions.  We will override some of them below.  */
+#include <sysdeps/i386/memcopy.h>
+
+/* Written like this, the Pentium pipeline can execute the loop at a
+   sustained rate of 2 instructions/clock, or asymptotically 480
+   Mbytes/second at 60Mhz.  */
+
+#undef	WORD_COPY_FWD
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)		\
+  do									\
+    {									\
+      asm volatile ("subl	$32,%2\n"				\
+		    "js		2f\n"					\
+		    "movl	0(%0),%%edx\n"	/* alloc dest line */	\
+		    "1:\n"						\
+		    "movl	28(%0),%%eax\n"	/* alloc dest line */	\
+		    "subl	$32,%2\n"	/* decr loop count */	\
+		    "movl	0(%1),%%eax\n"	/* U pipe */		\
+		    "movl	4(%1),%%edx\n"	/* V pipe */		\
+		    "movl	%%eax,0(%0)\n"	/* U pipe */		\
+		    "movl	%%edx,4(%0)\n"	/* V pipe */		\
+		    "movl	8(%1),%%eax\n"				\
+		    "movl	12(%1),%%edx\n"				\
+		    "movl	%%eax,8(%0)\n"				\
+		    "movl	%%edx,12(%0)\n"				\
+		    "movl	16(%1),%%eax\n"				\
+		    "movl	20(%1),%%edx\n"				\
+		    "movl	%%eax,16(%0)\n"				\
+		    "movl	%%edx,20(%0)\n"				\
+		    "movl	24(%1),%%eax\n"				\
+		    "movl	28(%1),%%edx\n"				\
+		    "movl	%%eax,24(%0)\n"				\
+		    "movl	%%edx,28(%0)\n"				\
+		    "leal	32(%1),%1\n"	/* update src ptr */	\
+		    "leal	32(%0),%0\n"	/* update dst ptr */	\
+		    "jns	1b\n"					\
+		    "2: addl	$32,%2" :				\
+		    "=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) :	\
+		    "0" (dst_bp), "1" (src_bp), "2" (nbytes) :		\
+		    "ax", "dx");					\
+    } while (0)
+
+#undef	WORD_COPY_BWD
+#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes)		\
+  do									\
+    {									\
+      asm volatile ("subl	$32,%2\n"				\
+		    "js		2f\n"					\
+		    "movl	-4(%0),%%edx\n"				\
+		    "1:\n"						\
+		    "movl	-32(%0),%%eax\n"			\
+		    "subl	$32,%2\n"				\
+		    "movl	-4(%1),%%eax\n"				\
+		    "movl	-8(%1),%%edx\n"				\
+		    "movl	%%eax,-4(%0)\n"				\
+		    "movl	%%edx,-8(%0)\n"				\
+		    "movl	-12(%1),%%eax\n"			\
+		    "movl	-16(%1),%%edx\n"			\
+		    "movl	%%eax,-12(%0)\n"			\
+		    "movl	%%edx,-16(%0)\n"			\
+		    "movl	-20(%1),%%eax\n"			\
+		    "movl	-24(%1),%%edx\n"			\
+		    "movl	%%eax,-20(%0)\n"			\
+		    "movl	%%edx,-24(%0)\n"			\
+		    "movl	-28(%1),%%eax\n"			\
+		    "movl	-32(%1),%%edx\n"			\
+		    "movl	%%eax,-28(%0)\n"			\
+		    "movl	%%edx,-32(%0)\n"			\
+		    "leal	-32(%1),%1\n"				\
+		    "leal	-32(%0),%0\n"				\
+		    "jns	1b\n"					\
+		    "2: addl	$32,%2" :				\
+		    "=r" (dst_ep), "=r" (src_ep), "=r" (nbytes_left) :	\
+		    "0" (dst_ep), "1" (src_ep), "2" (nbytes) :		\
+		    "ax", "dx");					\
+    } while (0)
diff --git a/REORG.TODO/sysdeps/i386/i586/memcpy.S b/REORG.TODO/sysdeps/i386/i586/memcpy.S
new file mode 100644
index 0000000000..6474a3f653
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memcpy.S
@@ -0,0 +1,124 @@
+/* Highly optimized version for i586.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS	4+8	/* space for 2 saved regs */
+#define RTN	PARMS
+#define DEST	RTN
+#define SRC	DEST+4
+#define LEN	SRC+4
+
+        .text
+#if defined PIC && IS_IN (libc)
+ENTRY (__memcpy_chk)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memcpy_chk)
+#endif
+ENTRY (memcpy)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+
+	movl	DEST(%esp), %edi
+	cfi_rel_offset (edi, 4)
+	movl	SRC(%esp), %esi
+	cfi_rel_offset (esi, 0)
+	movl	LEN(%esp), %ecx
+	movl	%edi, %eax
+
+	/* We need this in any case.  */
+	cld
+
+	/* Cutoff for the big loop is a size of 32 bytes since otherwise
+	   the loop will never be entered.  */
+	cmpl	$32, %ecx
+	jbe	L(1)
+
+	negl	%eax
+	andl	$3, %eax
+	subl	%eax, %ecx
+	xchgl	%eax, %ecx
+
+	rep; movsb
+
+	movl	%eax, %ecx
+	subl	$32, %ecx
+	js	L(2)
+
+	/* Read ahead to make sure we write in the cache since the stupid
+	   i586 designers haven't implemented read-on-write-miss.  */
+	movl	(%edi), %eax
+L(3):	movl	28(%edi), %edx
+
+	/* Now correct the loop counter.  Please note that in the following
+	   code the flags are not changed anymore.  */
+	subl	$32, %ecx
+
+	movl	(%esi), %eax
+	movl	4(%esi), %edx
+	movl	%eax, (%edi)
+	movl	%edx, 4(%edi)
+	movl	8(%esi), %eax
+	movl	12(%esi), %edx
+	movl	%eax, 8(%edi)
+	movl	%edx, 12(%edi)
+	movl	16(%esi), %eax
+	movl	20(%esi), %edx
+	movl	%eax, 16(%edi)
+	movl	%edx, 20(%edi)
+	movl	24(%esi), %eax
+	movl	28(%esi), %edx
+	movl	%eax, 24(%edi)
+	movl	%edx, 28(%edi)
+
+	leal	32(%esi), %esi
+	leal	32(%edi), %edi
+
+	jns	L(3)
+
+	/* Correct extra loop counter modification.  */
+L(2):	addl	$32, %ecx
+#ifndef USE_AS_MEMPCPY
+	movl	DEST(%esp), %eax
+#endif
+
+L(1):	rep; movsb
+
+#ifdef USE_AS_MEMPCPY
+	movl	%edi, %eax
+#endif
+
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (memcpy)
+#ifndef USE_AS_MEMPCPY
+libc_hidden_builtin_def (memcpy)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i586/mempcpy.S b/REORG.TODO/sysdeps/i386/i586/mempcpy.S
new file mode 100644
index 0000000000..720a4c0923
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/mempcpy.S
@@ -0,0 +1,8 @@
+#define USE_AS_MEMPCPY
+#define memcpy __mempcpy
+#define __memcpy_chk __mempcpy_chk
+#include <sysdeps/i386/i586/memcpy.S>
+
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
diff --git a/REORG.TODO/sysdeps/i386/i586/memset.S b/REORG.TODO/sysdeps/i386/i586/memset.S
new file mode 100644
index 0000000000..4f8f1bcf94
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memset.S
@@ -0,0 +1,121 @@
+/* memset/bzero -- set memory area to CH/0
+   Highly optimized version for ix86, x>=5.
+   Copyright (C) 1996-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Torbjorn Granlund, <tege@matematik.su.se>
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS	4+4	/* space for 1 saved reg */
+#define RTN	PARMS
+#define DEST	RTN
+#ifdef USE_AS_BZERO
+# define LEN	DEST+4
+#else
+# define CHR	DEST+4
+# define LEN	CHR+4
+#endif
+
+        .text
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
+ENTRY (__memset_chk)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk)
+#endif
+ENTRY (memset)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+
+	movl	DEST(%esp), %edi
+	cfi_rel_offset (edi, 0)
+	movl	LEN(%esp), %edx
+#ifdef USE_AS_BZERO
+	xorl	%eax, %eax	/* we fill with 0 */
+#else
+	movb	CHR(%esp), %al
+	movb	%al, %ah
+	movl	%eax, %ecx
+	shll	$16, %eax
+	movw	%cx, %ax
+#endif
+	cld
+
+/* If less than 36 bytes to write, skip tricky code (it wouldn't work).  */
+	cmpl	$36, %edx
+	movl	%edx, %ecx	/* needed when branch is taken! */
+	jl	L(2)
+
+/* First write 0-3 bytes to make the pointer 32-bit aligned.  */
+	movl	%edi, %ecx	/* Copy ptr to ecx... */
+	negl	%ecx		/* ...and negate that and... */
+	andl	$3, %ecx	/* ...mask to get byte count.  */
+	subl	%ecx, %edx	/* adjust global byte count */
+	rep
+	stosb
+
+	subl	$32, %edx	/* offset count for unrolled loop */
+	movl	(%edi), %ecx	/* Fetch destination cache line */
+
+	.align	2, 0x90		/* supply 0x90 for broken assemblers */
+L(1):	movl	28(%edi), %ecx	/* allocate cache line for destination */
+	subl	$32, %edx	/* decr loop count */
+	movl	%eax, 0(%edi)	/* store words pairwise */
+	movl	%eax, 4(%edi)
+	movl	%eax, 8(%edi)
+	movl	%eax, 12(%edi)
+	movl	%eax, 16(%edi)
+	movl	%eax, 20(%edi)
+	movl	%eax, 24(%edi)
+	movl	%eax, 28(%edi)
+	leal	32(%edi), %edi	/* update destination pointer */
+	jge	L(1)
+
+	leal	32(%edx), %ecx	/* reset offset count */
+
+/* Write last 0-7 full 32-bit words (up to 8 words if loop was skipped).  */
+L(2):	shrl	$2, %ecx	/* convert byte count to longword count */
+	rep
+	stosl
+
+/* Finally write the last 0-3 bytes.  */
+	movl	%edx, %ecx
+	andl	$3, %ecx
+	rep
+	stosb
+
+#ifndef USE_AS_BZERO
+	/* Load result (only if used as memset).  */
+	movl DEST(%esp), %eax	/* start address of destination is result */
+#endif
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (memset)
+libc_hidden_builtin_def (memset)
+
+#if defined SHARED && IS_IN (libc) && !defined __memset_chk \
+    && !defined USE_AS_BZERO
+strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
+	.section .gnu.warning.__memset_zero_constant_len_parameter
+	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i586/memusage.h b/REORG.TODO/sysdeps/i386/i586/memusage.h
new file mode 100644
index 0000000000..c8170874d0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memusage.h
@@ -0,0 +1 @@
+#include "../i686/memusage.h"
diff --git a/REORG.TODO/sysdeps/i386/i586/mul_1.S b/REORG.TODO/sysdeps/i386/i586/mul_1.S
new file mode 100644
index 0000000000..bd3a07de90
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/mul_1.S
@@ -0,0 +1,90 @@
+/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store
+   the result in a second limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16	/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define SIZE	S1+4
+#define S2LIMB	SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+	.text
+ENTRY (__mpn_mul_1)
+
+	pushl	%res_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%s1_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%s2_limb
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp), %res_ptr
+	cfi_rel_offset (res_ptr, 12)
+	movl	S1(%esp), %s1_ptr
+	cfi_rel_offset (s1_ptr, 8)
+	movl	SIZE(%esp), %size
+	movl	S2LIMB(%esp), %s2_limb
+	cfi_rel_offset (s2_limb, 0)
+	leal	(%res_ptr,%size,4), %res_ptr
+	leal	(%s1_ptr,%size,4), %s1_ptr
+	negl	%size
+	xorl	%ebp, %ebp
+	cfi_rel_offset (ebp, 4)
+	ALIGN (3)
+
+L(oop):	adcl	$0, %ebp
+	movl	(%s1_ptr,%size,4), %eax
+
+	mull	%s2_limb
+
+	addl	%eax, %ebp
+
+	movl	%ebp, (%res_ptr,%size,4)
+	incl	%size
+
+	movl	%edx, %ebp
+	jnz	L(oop)
+
+	adcl	$0, %ebp
+	movl	%ebp, %eax
+	popl	%s2_limb
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s2_limb)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%s1_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s1_ptr)
+	popl	%res_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (res_ptr)
+
+	ret
+#undef size
+END (__mpn_mul_1)
diff --git a/REORG.TODO/sysdeps/i386/i586/rshift.S b/REORG.TODO/sysdeps/i386/i586/rshift.S
new file mode 100644
index 0000000000..24c76ee0bb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/rshift.S
@@ -0,0 +1,255 @@
+/* Pentium optimized __mpn_rshift --
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16		/* space for 4 saved regs */
+#define RES	PARMS
+#define S	RES+4
+#define SIZE	S+4
+#define CNT	SIZE+4
+
+	.text
+ENTRY (__mpn_rshift)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebp, 0)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp),%edi
+	cfi_rel_offset (edi, 12)
+	movl	S(%esp),%esi
+	cfi_rel_offset (esi, 8)
+	movl	SIZE(%esp),%ebx
+	cfi_rel_offset (ebx, 0)
+	movl	CNT(%esp),%ecx
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	L(normal)
+	leal	4(%edi),%eax
+	cmpl	%esi,%eax
+	jnc	L(special)		/* jump if res_ptr + 1 >= s_ptr */
+	leal	(%edi,%ebx,4),%eax
+	cmpl	%eax,%esi
+	jnc	L(special)		/* jump if s_ptr >= res_ptr + size */
+
+L(normal):
+	movl	(%esi),%edx
+	addl	$4,%esi
+	xorl	%eax,%eax
+	shrdl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+	cfi_adjust_cfa_offset (4)
+
+	decl	%ebx
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	shrl	$3,%ebx
+	jz	L(end)
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+L(oop):	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebp
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	shrdl	%cl,%eax,%ebp
+	shrdl	%cl,%edx,%eax
+	movl	%ebp,(%edi)
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebp
+	movl	12(%esi),%eax
+	shrdl	%cl,%ebp,%edx
+	shrdl	%cl,%eax,%ebp
+	movl	%edx,8(%edi)
+	movl	%ebp,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebp
+	shrdl	%cl,%edx,%eax
+	shrdl	%cl,%ebp,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	shrdl	%cl,%eax,%ebp
+	shrdl	%cl,%edx,%eax
+	movl	%ebp,24(%edi)
+	movl	%eax,28(%edi)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	decl	%ebx
+	jnz	L(oop)
+
+L(end):	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	andl	$7,%ebx
+	jz	L(end2)
+L(oop2):
+	movl	(%esi),%eax
+	shrdl	%cl,%eax,%edx		/* compute result limb */
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	addl	$4,%esi
+	addl	$4,%edi
+	decl	%ebx
+	jnz	L(oop2)
+
+L(end2):
+	shrl	%cl,%edx		/* compute most significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+	cfi_adjust_cfa_offset (-4)
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissible if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+	cfi_adjust_cfa_offset (16)
+	cfi_rel_offset (edi, 12)
+	cfi_rel_offset (esi, 8)
+	cfi_rel_offset (ebp, 4)
+	cfi_rel_offset (ebx, 0)
+L(special):
+	leal	-4(%edi,%ebx,4),%edi
+	leal	-4(%esi,%ebx,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+
+	decl	%ebx
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	shrl	$3,%ebx
+
+	shrl	$1,%edx
+	incl	%ebx
+	decl	%ebx
+	jz	L(Lend)
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+L(Loop):
+	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebp
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebp,(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebp
+	movl	-12(%esi),%eax
+	rcrl	$1,%ebp
+	movl	%edx,-8(%edi)
+	rcrl	$1,%eax
+	movl	%ebp,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebp
+	rcrl	$1,%edx
+	movl	%eax,-16(%edi)
+	rcrl	$1,%ebp
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebp,-24(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-28(%edi)
+
+	leal	-32(%esi),%esi		/* use leal not to clobber carry */
+	leal	-32(%edi),%edi
+	decl	%ebx
+	jnz	L(Loop)
+
+L(Lend):
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebx
+	jz	L(Lend2)
+	addl	%eax,%eax		/* restore carry from eax */
+L(Loop2):
+	movl	%edx,%ebp
+	movl	(%esi),%edx
+	rcrl	$1,%edx
+	movl	%ebp,(%edi)
+
+	leal	-4(%esi),%esi		/* use leal not to clobber carry */
+	leal	-4(%edi),%edi
+	decl	%ebx
+	jnz	L(Loop2)
+
+	jmp	L(L1)
+L(Lend2):
+	addl	%eax,%eax		/* restore carry from eax */
+L(L1):	movl	%edx,(%edi)		/* store last limb */
+
+	movl	$0,%eax
+	rcrl	$1,%eax
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (__mpn_rshift)
diff --git a/REORG.TODO/sysdeps/i386/i586/stpcpy.S b/REORG.TODO/sysdeps/i386/i586/stpcpy.S
new file mode 100644
index 0000000000..8691efd01c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/stpcpy.S
@@ -0,0 +1,8 @@
+#define USE_AS_STPCPY
+#define STRCPY __stpcpy
+
+#include <sysdeps/i386/i586/strcpy.S>
+
+weak_alias (__stpcpy, stpcpy)
+libc_hidden_def (__stpcpy)
+libc_hidden_builtin_def (stpcpy)
diff --git a/REORG.TODO/sysdeps/i386/i586/strchr.S b/REORG.TODO/sysdeps/i386/i586/strchr.S
new file mode 100644
index 0000000000..02f66b8f72
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/strchr.S
@@ -0,0 +1,348 @@
+/* Find character CH in a NUL terminated string.
+   Highly optimized version for ix85, x>=5.
+   Copyright (C) 1995-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+/* This version is especially optimized for the i586 (and following?)
+   processors.  This is mainly done by using the two pipelines.  The
+   version optimized for i486 is weak in this aspect because to get
+   as much parallelism we have to execute some *more* instructions.
+
+   The code below is structured to reflect the pairing of the instructions
+   as *I think* it is.  I have no processor data book to verify this.
+   If you find something you think is incorrect let me know.  */
+
+
+/* The magic value which is used throughout in the whole code.  */
+#define magic 0xfefefeff
+
+#define PARMS	4+16	/* space for 4 saved regs */
+#define RTN	PARMS
+#define STR	RTN
+#define CHR	STR+4
+
+	.text
+ENTRY (strchr)
+
+	pushl %edi		/* Save callee-safe registers.  */
+	cfi_adjust_cfa_offset (-4)
+	pushl %esi
+	cfi_adjust_cfa_offset (-4)
+
+	pushl %ebx
+	cfi_adjust_cfa_offset (-4)
+	pushl %ebp
+	cfi_adjust_cfa_offset (-4)
+
+	movl STR(%esp), %eax
+	movl CHR(%esp), %edx
+
+	movl %eax, %edi		/* duplicate string pointer for later */
+	cfi_rel_offset (edi, 12)
+	xorl %ecx, %ecx		/* clear %ecx */
+
+	/* At the moment %edx contains C.  What we need for the
+	   algorithm is C in all bytes of the dword.  Avoid
+	   operations on 16 bit words because these require an
+	   prefix byte (and one more cycle).  */
+	movb %dl, %dh		/* now it is 0|0|c|c */
+	movb %dl, %cl		/* we construct the lower half in %ecx */
+
+	shll $16, %edx		/* now %edx is c|c|0|0 */
+	movb %cl, %ch		/* now %ecx is 0|0|c|c */
+
+	orl %ecx, %edx		/* and finally c|c|c|c */
+	andl $3, %edi		/* mask alignment bits */
+
+	jz L(11)		/* alignment is 0 => start loop */
+
+	movb %dl, %cl		/* 0 is needed below */
+	jp L(0)			/* exactly two bits set */
+
+	xorb (%eax), %cl	/* is byte the one we are looking for? */
+	jz L(out)		/* yes => return pointer */
+
+	xorb %dl, %cl		/* load single byte and test for NUL */
+	je L(3)			/* yes => return NULL */
+
+	movb 1(%eax), %cl	/* load single byte */
+	incl %eax
+
+	cmpb %cl, %dl		/* is byte == C? */
+	je L(out)		/* aligned => return pointer */
+
+	cmpb $0, %cl		/* is byte NUL? */
+	je L(3)			/* yes => return NULL */
+
+	incl %eax
+	decl %edi
+
+	jne L(11)
+
+L(0):	movb (%eax), %cl	/* load single byte */
+
+	cmpb %cl, %dl		/* is byte == C? */
+	je L(out)		/* aligned => return pointer */
+
+	cmpb $0, %cl		/* is byte NUL? */
+	je L(3)			/* yes => return NULL */
+
+	incl %eax		/* increment pointer */
+
+	cfi_rel_offset (esi, 8)
+	cfi_rel_offset (ebx, 4)
+	cfi_rel_offset (ebp, 0)
+
+	/* The following code is the preparation for the loop.  The
+	   four instruction up to `L1' will not be executed in the loop
+	   because the same code is found at the end of the loop, but
+	   there it is executed in parallel with other instructions.  */
+L(11):	movl (%eax), %ecx
+	movl $magic, %ebp
+
+	movl $magic, %edi
+	addl %ecx, %ebp
+
+	/* The main loop: it looks complex and indeed it is.  I would
+	   love to say `it was hard to write, so it should he hard to
+	   read' but I will give some more hints.  To fully understand
+	   this code you should first take a look at the i486 version.
+	   The basic algorithm is the same, but here the code organized
+	   in a way which permits to use both pipelines all the time.
+
+	   I tried to make it a bit more understandable by indenting
+	   the code according to stage in the algorithm.  It goes as
+	   follows:
+		check for 0 in 1st word
+			check for C in 1st word
+					check for 0 in 2nd word
+						check for C in 2nd word
+		check for 0 in 3rd word
+			check for C in 3rd word
+					check for 0 in 4th word
+						check for C in 4th word
+
+	   Please note that doing the test for NUL before the test for
+	   C allows us to overlap the test for 0 in the next word with
+	   the test for C.  */
+
+L(1):	xorl %ecx, %ebp			/* (word^magic) */
+	addl %ecx, %edi			/* add magic word */
+
+	leal 4(%eax), %eax		/* increment pointer */
+	jnc L(4)			/* previous addl caused overflow? */
+
+		movl %ecx, %ebx		/* duplicate original word */
+	orl $magic, %ebp		/* (word^magic)|magic */
+
+	addl $1, %ebp			/* (word^magic)|magic == 0xffffffff? */
+	jne L(4)				/* yes => we found word with NUL */
+
+		movl $magic, %esi	/* load magic value */
+		xorl %edx, %ebx		/* clear words which are C */
+
+					movl (%eax), %ecx
+		addl %ebx, %esi		/* (word+magic) */
+
+					movl $magic, %edi
+		jnc L(5)		/* previous addl caused overflow? */
+
+					movl %edi, %ebp
+		xorl %ebx, %esi		/* (word+magic)^word */
+
+					addl %ecx, %ebp
+		orl $magic, %esi	/* ((word+magic)^word)|magic */
+
+		addl $1, %esi		/* ((word+magic)^word)|magic==0xf..f?*/
+		jne L(5)		/* yes => we found word with C */
+
+					xorl %ecx, %ebp
+					addl %ecx, %edi
+
+					leal 4(%eax), %eax
+					jnc L(4)
+
+						movl %ecx, %ebx
+					orl $magic, %ebp
+
+					addl $1, %ebp
+					jne L(4)
+
+						movl $magic, %esi
+						xorl %edx, %ebx
+
+	movl (%eax), %ecx
+						addl %ebx, %esi
+
+	movl $magic, %edi
+						jnc L(5)
+
+	movl %edi, %ebp
+						xorl %ebx, %esi
+
+	addl %ecx, %ebp
+						orl $magic, %esi
+
+						addl $1, %esi
+						jne L(5)
+
+	xorl %ecx, %ebp
+	addl %ecx, %edi
+
+	leal 4(%eax), %eax
+	jnc L(4)
+
+		movl %ecx, %ebx
+	orl $magic, %ebp
+
+	addl $1, %ebp
+	jne L(4)
+
+		movl $magic, %esi
+		xorl %edx, %ebx
+
+					movl (%eax), %ecx
+		addl %ebx, %esi
+
+					movl $magic, %edi
+		jnc L(5)
+
+					movl %edi, %ebp
+		xorl %ebx, %esi
+
+					addl %ecx, %ebp
+		orl $magic, %esi
+
+		addl $1, %esi
+		jne L(5)
+
+					xorl %ecx, %ebp
+					addl %ecx, %edi
+
+					leal 4(%eax), %eax
+					jnc L(4)
+
+						movl %ecx, %ebx
+					orl $magic, %ebp
+
+					addl $1, %ebp
+					jne L(4)
+
+						movl $magic, %esi
+						xorl %edx, %ebx
+
+	movl (%eax), %ecx
+						addl %ebx, %esi
+
+	movl $magic, %edi
+						jnc L(5)
+
+	movl %edi, %ebp
+						xorl %ebx, %esi
+
+	addl %ecx, %ebp
+						orl $magic, %esi
+
+						addl $1, %esi
+
+						je L(1)
+
+	/* We know there is no NUL byte but a C byte in the word.
+	   %ebx contains NUL in this particular byte.  */
+L(5):	subl $4, %eax		/* adjust pointer */
+	testb %bl, %bl		/* first byte == C? */
+
+	jz L(out)		/* yes => return pointer */
+
+	incl %eax		/* increment pointer */
+	testb %bh, %bh		/* second byte == C? */
+
+	jz L(out)		/* yes => return pointer */
+
+	shrl $16, %ebx		/* make upper bytes accessible */
+	incl %eax		/* increment pointer */
+
+	cmp $0, %bl		/* third byte == C */
+	je L(out)		/* yes => return pointer */
+
+	incl %eax		/* increment pointer */
+
+L(out):	popl %ebp		/* restore saved registers */
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl %ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+
+	popl %esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl %edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+
+	cfi_adjust_cfa_offset (16)
+	cfi_rel_offset (edi, 12)
+	cfi_rel_offset (esi, 8)
+	cfi_rel_offset (ebx, 4)
+	cfi_rel_offset (ebp, 0)
+	/* We know there is a NUL byte in the word.  But we have to test
+	   whether there is an C byte before it in the word.  */
+L(4):	subl $4, %eax		/* adjust pointer */
+	cmpb %dl, %cl		/* first byte == C? */
+
+	je L(out)		/* yes => return pointer */
+
+	cmpb $0, %cl		/* first byte == NUL? */
+	je L(3)			/* yes => return NULL */
+
+	incl %eax		/* increment pointer */
+
+	cmpb %dl, %ch		/* second byte == C? */
+	je L(out)		/* yes => return pointer */
+
+	cmpb $0, %ch		/* second byte == NUL? */
+	je L(3)			/* yes => return NULL */
+
+	shrl $16, %ecx		/* make upper bytes accessible */
+	incl %eax		/* increment pointer */
+
+	cmpb %dl, %cl		/* third byte == C? */
+	je L(out)		/* yes => return pointer */
+
+	cmpb $0, %cl		/* third byte == NUL? */
+	je L(3)			/* yes => return NULL */
+
+	incl %eax		/* increment pointer */
+
+	/* The test four the fourth byte is necessary!  */
+	cmpb %dl, %ch		/* fourth byte == C? */
+	je L(out)		/* yes => return pointer */
+
+L(3):	xorl %eax, %eax
+	jmp L(out)
+END (strchr)
+
+#undef index
+weak_alias (strchr, index)
+libc_hidden_builtin_def (strchr)
diff --git a/REORG.TODO/sysdeps/i386/i586/strcpy.S b/REORG.TODO/sysdeps/i386/i586/strcpy.S
new file mode 100644
index 0000000000..a444604f4f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/strcpy.S
@@ -0,0 +1,169 @@
+/* strcpy/stpcpy implementation for i586.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS	4+12	/* space for 3 saved regs */
+#define RTN	PARMS
+#define DEST	RTN
+#define SRC	DEST+4
+
+#ifndef USE_AS_STPCPY
+# define STRCPY strcpy
+#endif
+
+#define magic 0xfefefeff
+
+	.text
+ENTRY (STRCPY)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	DEST(%esp), %edi
+	cfi_rel_offset (edi, 8)
+	movl	SRC(%esp), %esi
+	cfi_rel_offset (esi, 4)
+
+	xorl	%eax, %eax
+	leal	-1(%esi), %ecx
+
+	movl	$magic, %ebx
+	cfi_rel_offset (ebx, 0)
+	andl	$3, %ecx
+
+#ifdef PIC
+	call	2f
+	cfi_adjust_cfa_offset (4)
+2:	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+	/* 0xb is the distance between 2: and 1: but we avoid writing
+	   1f-2b because the assembler generates worse code.  */
+	leal	0xb(%edx,%ecx,8), %ecx
+#else
+	leal	1f(,%ecx,8), %ecx
+#endif
+
+	jmp	*%ecx
+
+	.align 8
+1:
+	orb	(%esi), %al
+	jz	L(end)
+	stosb
+	xorl	%eax, %eax
+	incl	%esi
+
+	orb	(%esi), %al
+	jz	L(end)
+	stosb
+	xorl	%eax, %eax
+	incl	%esi
+
+	orb	(%esi), %al
+	jz	L(end)
+	stosb
+	xorl	%eax, %eax
+	incl	%esi
+
+L(1):	movl	(%esi), %ecx
+	leal	4(%esi),%esi
+
+	subl	%ecx, %eax
+	addl	%ebx, %ecx
+
+	decl	%eax
+	jnc	L(3)
+
+	movl	%ecx, %edx
+	xorl	%ecx, %eax
+
+	subl	%ebx, %edx
+	andl	$~magic, %eax
+
+	jne	L(4)
+
+	movl	%edx, (%edi)
+	leal	4(%edi),%edi
+
+	jmp	L(1)
+
+L(3):	movl	%ecx, %edx
+
+	subl	%ebx, %edx
+
+L(4):	movb	%dl, (%edi)
+	testb	%dl, %dl
+
+	movl	%edx, %eax
+	jz	L(end2)
+
+	shrl	$16, %eax
+	movb	%dh, 1(%edi)
+#ifdef USE_AS_STPCPY
+	addl	$1, %edi
+#endif
+
+	cmpb	$0, %dh
+	jz	L(end2)
+
+#ifdef USE_AS_STPCPY
+	movb	%al, 1(%edi)
+	addl	$1, %edi
+
+	cmpb	$0, %al
+	jz	L(end2)
+
+	addl	$1, %edi
+#else
+	movb	%al, 2(%edi)
+	testb	%al, %al
+
+	leal	3(%edi), %edi
+	jz	L(end2)
+#endif
+
+L(end):	movb	%ah, (%edi)
+
+L(end2):
+#ifdef USE_AS_STPCPY
+	movl	%edi, %eax
+#else
+	movl	DEST(%esp), %eax
+#endif
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (STRCPY)
+#ifndef USE_AS_STPCPY
+libc_hidden_builtin_def (strcpy)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i586/strlen.S b/REORG.TODO/sysdeps/i386/i586/strlen.S
new file mode 100644
index 0000000000..cfea2e020f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/strlen.S
@@ -0,0 +1,182 @@
+/* strlen -- Compute length of NUL terminated string.
+   Highly optimized version for ix86, x>=5.
+   Copyright (C) 1995-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+/* This version is especially optimized for the i586 (and following?)
+   processors.  This is mainly done by using the two pipelines.  The
+   version optimized for i486 is weak in this aspect because to get
+   as much parallelism we have to execute some *more* instructions.
+
+   The code below is structured to reflect the pairing of the instructions
+   as *I think* it is.  I have no processor data book to verify this.
+   If you find something you think is incorrect let me know.  */
+
+
+/* The magic value which is used throughout in the whole code.  */
+#define magic 0xfefefeff
+
+#define PARMS	4		/* no space for saved regs */
+#define STR	PARMS
+
+	.text
+ENTRY (strlen)
+
+	movl STR(%esp), %eax
+	movl $3, %edx		/* load mask (= 3) */
+
+	andl %eax, %edx		/* separate last two bits of address */
+
+	jz L(1)			/* aligned => start loop */
+	jp L(0)			/* exactly two bits set */
+
+	cmpb %dh, (%eax)	/* is byte NUL? */
+	je L(2)			/* yes => return */
+
+	incl %eax		/* increment pointer */
+	cmpb %dh, (%eax)	/* is byte NUL? */
+
+	je L(2)			/* yes => return */
+
+	incl %eax		/* increment pointer */
+	xorl $2, %edx
+
+	jz L(1)
+
+L(0):	cmpb %dh, (%eax)	/* is byte NUL? */
+	je L(2)			/* yes => return */
+
+	incl %eax		/* increment pointer */
+	xorl %edx, %edx		/* We need %edx == 0 for later */
+
+      /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+	 change any of the hole bits of LONGWORD.
+
+	 1) Is this safe?  Will it catch all the zero bytes?
+	 Suppose there is a byte with all zeros.  Any carry bits
+	 propagating from its left will fall into the hole at its
+	 least significant bit and stop.  Since there will be no
+	 carry from its most significant bit, the LSB of the
+	 byte to the left will be unchanged, and the zero will be
+	 detected.
+
+	 2) Is this worthwhile?  Will it ignore everything except
+	 zero bytes?  Suppose every byte of LONGWORD has a bit set
+	 somewhere.  There will be a carry into bit 8.	If bit 8
+	 is set, this will carry into bit 16.  If bit 8 is clear,
+	 one of bits 9-15 must be set, so there will be a carry
+	 into bit 16.  Similarly, there will be a carry into bit
+	 24.  If one of bits 24-31 is set, there will be a carry
+	 into bit 32 (=carry flag), so all of the hole bits will
+	 be changed.
+
+	 Note: %edx == 0 in any case here.  */
+
+L(1):
+	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
+	addl $4, %eax		/* adjust pointer for *next* word */
+
+	subl %ecx, %edx		/* first step to negate word */
+	addl $magic, %ecx	/* add magic word */
+
+	decl %edx		/* complete negation of word */
+	jnc L(3)		/* previous addl caused overflow? */
+
+	xorl %ecx, %edx		/* (word+magic)^word */
+
+	andl $~magic, %edx	/* any of the carry flags set? */
+
+	jne L(3)		/* yes => determine byte */
+
+
+	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
+	addl $4, %eax		/* adjust pointer for *next* word */
+
+	subl %ecx, %edx		/* first step to negate word */
+	addl $magic, %ecx	/* add magic word */
+
+	decl %edx		/* complete negation of word */
+	jnc L(3)		/* previous addl caused overflow? */
+
+	xorl %ecx, %edx		/* (word+magic)^word */
+
+	andl $~magic, %edx	/* any of the carry flags set? */
+
+	jne L(3)		/* yes => determine byte */
+
+
+	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
+	addl $4, %eax		/* adjust pointer for *next* word */
+
+	subl %ecx, %edx		/* first step to negate word */
+	addl $magic, %ecx	/* add magic word */
+
+	decl %edx		/* complete negation of word */
+	jnc L(3)		/* previous addl caused overflow? */
+
+	xorl %ecx, %edx		/* (word+magic)^word */
+
+	andl $~magic, %edx	/* any of the carry flags set? */
+
+	jne L(3)		/* yes => determine byte */
+
+
+	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
+	addl $4, %eax		/* adjust pointer for *next* word */
+
+	subl %ecx, %edx		/* first step to negate word */
+	addl $magic, %ecx	/* add magic word */
+
+	decl %edx		/* complete negation of word */
+	jnc L(3)		/* previous addl caused overflow? */
+
+	xorl %ecx, %edx		/* (word+magic)^word */
+
+	andl $~magic, %edx	/* any of the carry flags set? */
+
+	je L(1)			/* no => start loop again */
+
+
+L(3):	subl $4, %eax		/* correct too early pointer increment */
+	subl $magic, %ecx
+
+	cmpb $0, %cl		/* lowest byte NUL? */
+	jz L(2)			/* yes => return */
+
+	inc %eax		/* increment pointer */
+	testb %ch, %ch		/* second byte NUL? */
+
+	jz L(2)			/* yes => return */
+
+	shrl $16, %ecx		/* make upper bytes accessible */
+	incl %eax		/* increment pointer */
+
+	cmpb $0, %cl		/* is third byte NUL? */
+	jz L(2)			/* yes => return */
+
+	incl %eax		/* increment pointer */
+
+L(2):	subl STR(%esp), %eax	/* now compute the length as difference
+				   between start and terminating NUL
+				   character */
+	ret
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/REORG.TODO/sysdeps/i386/i586/sub_n.S b/REORG.TODO/sysdeps/i386/i586/sub_n.S
new file mode 100644
index 0000000000..21b5a2742c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/sub_n.S
@@ -0,0 +1,143 @@
+/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
+   and store difference in a third limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16		/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define S2	S1+4
+#define SIZE	S2+4
+
+	.text
+ENTRY (__mpn_sub_n)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp),%edi
+	cfi_rel_offset (edi, 12)
+	movl	S1(%esp),%esi
+	cfi_rel_offset (esi, 8)
+	movl	S2(%esp),%ebx
+	cfi_rel_offset (ebx, 0)
+	movl	SIZE(%esp),%ecx
+	movl	(%ebx),%ebp
+	cfi_rel_offset (ebp, 4)
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	L(end)
+	pushl	%edx
+	cfi_adjust_cfa_offset (4)
+
+	ALIGN (3)
+L(oop):	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L(1):	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	sbbl	%ebp,%eax
+	movl	4(%ebx),%ebp
+	sbbl	%ebp,%edx
+	movl	8(%ebx),%ebp
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L(2):	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	sbbl	%ebp,%eax
+	movl	12(%ebx),%ebp
+	sbbl	%ebp,%edx
+	movl	16(%ebx),%ebp
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L(3):	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	sbbl	%ebp,%eax
+	movl	20(%ebx),%ebp
+	sbbl	%ebp,%edx
+	movl	24(%ebx),%ebp
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L(4):	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	sbbl	%ebp,%eax
+	movl	28(%ebx),%ebp
+	sbbl	%ebp,%edx
+	movl	32(%ebx),%ebp
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebx),%ebx
+	decl	%ecx
+	jnz	L(oop)
+
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+L(end):
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	L(end2)
+	incl	%edx
+L(oop2):
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	sbbl	%ebp,%eax
+	movl	4(%ebx),%ebp
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebx),%ebx
+	decl	%edx
+	jnz	L(oop2)
+L(end2):
+	movl	(%esi),%eax
+	sbbl	%ebp,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (__mpn_sub_n)
diff --git a/REORG.TODO/sysdeps/i386/i586/submul_1.S b/REORG.TODO/sysdeps/i386/i586/submul_1.S
new file mode 100644
index 0000000000..5e5e121ca2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/submul_1.S
@@ -0,0 +1,94 @@
+/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+   the result from a second limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16	/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define SIZE	S1+4
+#define S2LIMB	SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+	.text
+ENTRY (__mpn_submul_1)
+
+	pushl	%res_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%s1_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%s2_limb
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp), %res_ptr
+	cfi_rel_offset (res_ptr, 12)
+	movl	S1(%esp), %s1_ptr
+	cfi_rel_offset (s1_ptr, 8)
+	movl	SIZE(%esp), %size
+	movl	S2LIMB(%esp), %s2_limb
+	cfi_rel_offset (s2_limb, 0)
+	leal	(%res_ptr,%size,4), %res_ptr
+	leal	(%s1_ptr,%size,4), %s1_ptr
+	negl	%size
+	xorl	%ebp, %ebp
+	cfi_rel_offset (ebp, 4)
+	ALIGN (3)
+
+L(oop):	adcl	$0, %ebp
+	movl	(%s1_ptr,%size,4), %eax
+
+	mull	%s2_limb
+
+	addl	%ebp, %eax
+	movl	(%res_ptr,%size,4), %ebp
+
+	adcl	$0, %edx
+	subl	%eax, %ebp
+
+	movl	%ebp, (%res_ptr,%size,4)
+	incl	%size
+
+	movl	%edx, %ebp
+	jnz	L(oop)
+
+	adcl	$0, %ebp
+	movl	%ebp, %eax
+	popl	%s2_limb
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s2_limb)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%s1_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s1_ptr)
+	popl	%res_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (res_ptr)
+
+	ret
+#undef size
+END (__mpn_submul_1)