about summary refs log tree commit diff
path: root/REORG.TODO/sysdeps/i386/i586
diff options
context:
space:
mode:
authorZack Weinberg <zackw@panix.com>2017-06-08 15:39:03 -0400
committerZack Weinberg <zackw@panix.com>2017-06-08 15:39:03 -0400
commit5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch)
tree4470480d904b65cf14ca524f96f79eca818c3eaf /REORG.TODO/sysdeps/i386/i586
parent199fc19d3aaaf57944ef036e15904febe877fc93 (diff)
downloadglibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.tar.gz
glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.tar.xz
glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.zip
Prepare for radical source tree reorganization. zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage
directory, REORG.TODO, except for files that will certainly still
exist in their current form at top level when we're done (COPYING,
COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which
are moved to the new directory OldChangeLogs, instead), and the
generated file INSTALL (which is just deleted; in the new order, there
will be no generated files checked into version control).
Diffstat (limited to 'REORG.TODO/sysdeps/i386/i586')
-rw-r--r--REORG.TODO/sysdeps/i386/i586/add_n.S143
-rw-r--r--REORG.TODO/sysdeps/i386/i586/addmul_1.S94
-rw-r--r--REORG.TODO/sysdeps/i386/i586/bzero.S4
-rw-r--r--REORG.TODO/sysdeps/i386/i586/init-arch.h19
-rw-r--r--REORG.TODO/sysdeps/i386/i586/lshift.S255
-rw-r--r--REORG.TODO/sysdeps/i386/i586/memcopy.h95
-rw-r--r--REORG.TODO/sysdeps/i386/i586/memcpy.S124
-rw-r--r--REORG.TODO/sysdeps/i386/i586/mempcpy.S8
-rw-r--r--REORG.TODO/sysdeps/i386/i586/memset.S121
-rw-r--r--REORG.TODO/sysdeps/i386/i586/memusage.h1
-rw-r--r--REORG.TODO/sysdeps/i386/i586/mul_1.S90
-rw-r--r--REORG.TODO/sysdeps/i386/i586/rshift.S255
-rw-r--r--REORG.TODO/sysdeps/i386/i586/stpcpy.S8
-rw-r--r--REORG.TODO/sysdeps/i386/i586/strchr.S348
-rw-r--r--REORG.TODO/sysdeps/i386/i586/strcpy.S169
-rw-r--r--REORG.TODO/sysdeps/i386/i586/strlen.S182
-rw-r--r--REORG.TODO/sysdeps/i386/i586/sub_n.S143
-rw-r--r--REORG.TODO/sysdeps/i386/i586/submul_1.S94
18 files changed, 2153 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/i386/i586/add_n.S b/REORG.TODO/sysdeps/i386/i586/add_n.S
new file mode 100644
index 0000000000..f73df092f0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/add_n.S
@@ -0,0 +1,143 @@
+/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+   sum in a third limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16		/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define S2	S1+4
+#define SIZE	S2+4
+
+	.text
+ENTRY (__mpn_add_n)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp),%edi
+	cfi_rel_offset (edi, 12)
+	movl	S1(%esp),%esi
+	cfi_rel_offset (esi, 8)
+	movl	S2(%esp),%ebx
+	cfi_rel_offset (ebx, 0)
+	movl	SIZE(%esp),%ecx
+	movl	(%ebx),%ebp
+	cfi_rel_offset (ebp, 4)
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	L(end)
+	pushl	%edx
+	cfi_adjust_cfa_offset (4)
+
+	ALIGN (3)
+L(oop):	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L(1):	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%ebp,%eax
+	movl	4(%ebx),%ebp
+	adcl	%ebp,%edx
+	movl	8(%ebx),%ebp
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L(2):	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	adcl	%ebp,%eax
+	movl	12(%ebx),%ebp
+	adcl	%ebp,%edx
+	movl	16(%ebx),%ebp
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L(3):	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	adcl	%ebp,%eax
+	movl	20(%ebx),%ebp
+	adcl	%ebp,%edx
+	movl	24(%ebx),%ebp
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L(4):	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%ebp,%eax
+	movl	28(%ebx),%ebp
+	adcl	%ebp,%edx
+	movl	32(%ebx),%ebp
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebx),%ebx
+	decl	%ecx
+	jnz	L(oop)
+
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+L(end):
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	L(end2)
+	incl	%edx
+L(oop2):
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	adcl	%ebp,%eax
+	movl	4(%ebx),%ebp
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebx),%ebx
+	decl	%edx
+	jnz	L(oop2)
+L(end2):
+	movl	(%esi),%eax
+	adcl	%ebp,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (__mpn_add_n)
diff --git a/REORG.TODO/sysdeps/i386/i586/addmul_1.S b/REORG.TODO/sysdeps/i386/i586/addmul_1.S
new file mode 100644
index 0000000000..a713192982
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/addmul_1.S
@@ -0,0 +1,94 @@
+/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+   the result to a second limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16	/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define SIZE	S1+4
+#define S2LIMB	SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+	.text
+ENTRY (__mpn_addmul_1)
+
+	pushl	%res_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%s1_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%s2_limb
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp), %res_ptr
+	cfi_rel_offset (res_ptr, 12)
+	movl	S1(%esp), %s1_ptr
+	cfi_rel_offset (s1_ptr, 8)
+	movl	SIZE(%esp), %size
+	movl	S2LIMB(%esp), %s2_limb
+	cfi_rel_offset (s2_limb, 0)
+	leal	(%res_ptr,%size,4), %res_ptr
+	leal	(%s1_ptr,%size,4), %s1_ptr
+	negl	%size
+	xorl	%ebp, %ebp
+	cfi_rel_offset (ebp, 4)
+	ALIGN (3)
+
+L(oop):	adcl	$0, %ebp
+	movl	(%s1_ptr,%size,4), %eax
+
+	mull	%s2_limb
+
+	addl	%ebp, %eax
+	movl	(%res_ptr,%size,4), %ebp
+
+	adcl	$0, %edx
+	addl	%eax, %ebp
+
+	movl	%ebp, (%res_ptr,%size,4)
+	incl	%size
+
+	movl	%edx, %ebp
+	jnz	L(oop)
+
+	adcl	$0, %ebp
+	movl	%ebp, %eax
+	popl	%s2_limb
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s2_limb)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%s1_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s1_ptr)
+	popl	%res_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (res_ptr)
+
+	ret
+#undef size
+END (__mpn_addmul_1)
diff --git a/REORG.TODO/sysdeps/i386/i586/bzero.S b/REORG.TODO/sysdeps/i386/i586/bzero.S
new file mode 100644
index 0000000000..2a106719a4
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/bzero.S
@@ -0,0 +1,4 @@
+#define USE_AS_BZERO
+#define memset __bzero
+#include <sysdeps/i386/i586/memset.S>
+weak_alias (__bzero, bzero)
diff --git a/REORG.TODO/sysdeps/i386/i586/init-arch.h b/REORG.TODO/sysdeps/i386/i586/init-arch.h
new file mode 100644
index 0000000000..4711212e6f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/init-arch.h
@@ -0,0 +1,19 @@
+/* Copyright (C) 2015-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define MINIMUM_ISA 586
+#include <sysdeps/x86/init-arch.h>
diff --git a/REORG.TODO/sysdeps/i386/i586/lshift.S b/REORG.TODO/sysdeps/i386/i586/lshift.S
new file mode 100644
index 0000000000..7941c28d9d
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/lshift.S
@@ -0,0 +1,255 @@
+/* Pentium optimized __mpn_lshift --
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16		/* space for 4 saved regs */
+#define RES	PARMS
+#define S	RES+4
+#define SIZE	S+4
+#define CNT	SIZE+4
+
+	.text
+ENTRY (__mpn_lshift)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebp, 0)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp),%edi
+	cfi_rel_offset (edi, 12)
+	movl	S(%esp),%esi
+	cfi_rel_offset (esi, 8)
+	movl	SIZE(%esp),%ebx
+	cfi_rel_offset (ebx, 0)
+	movl	CNT(%esp),%ecx
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	L(normal)
+	leal	4(%esi),%eax
+	cmpl	%edi,%eax
+	jnc	L(special)		/* jump if s_ptr + 1 >= res_ptr */
+	leal	(%esi,%ebx,4),%eax
+	cmpl	%eax,%edi
+	jnc	L(special)		/* jump if res_ptr >= s_ptr + size */
+
+L(normal):
+	leal	-4(%edi,%ebx,4),%edi
+	leal	-4(%esi,%ebx,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+	xorl	%eax,%eax
+	shldl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+	cfi_adjust_cfa_offset (4)
+
+	decl	%ebx
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	shrl	$3,%ebx
+	jz	L(end)
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+L(oop):	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebp
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	shldl	%cl,%eax,%ebp
+	shldl	%cl,%edx,%eax
+	movl	%ebp,(%edi)
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebp
+	movl	-12(%esi),%eax
+	shldl	%cl,%ebp,%edx
+	shldl	%cl,%eax,%ebp
+	movl	%edx,-8(%edi)
+	movl	%ebp,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebp
+	shldl	%cl,%edx,%eax
+	shldl	%cl,%ebp,%edx
+	movl	%eax,-16(%edi)
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	shldl	%cl,%eax,%ebp
+	shldl	%cl,%edx,%eax
+	movl	%ebp,-24(%edi)
+	movl	%eax,-28(%edi)
+
+	subl	$32,%esi
+	subl	$32,%edi
+	decl	%ebx
+	jnz	L(oop)
+
+L(end):	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	andl	$7,%ebx
+	jz	L(end2)
+L(oop2):
+	movl	(%esi),%eax
+	shldl	%cl,%eax,%edx
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	subl	$4,%esi
+	subl	$4,%edi
+	decl	%ebx
+	jnz	L(oop2)
+
+L(end2):
+	shll	%cl,%edx		/* compute least significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+	cfi_adjust_cfa_offset (-4)
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissible if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+	cfi_adjust_cfa_offset (16)
+	cfi_rel_offset (edi, 12)
+	cfi_rel_offset (esi, 8)
+	cfi_rel_offset (ebp, 4)
+	cfi_rel_offset (ebx, 0)
+L(special):
+	movl	(%esi),%edx
+	addl	$4,%esi
+
+	decl	%ebx
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	shrl	$3,%ebx
+
+	addl	%edx,%edx
+	incl	%ebx
+	decl	%ebx
+	jz	L(Lend)
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+L(Loop):
+	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebp
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebp,(%edi)
+	adcl	%edx,%edx
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebp
+	movl	12(%esi),%eax
+	adcl	%ebp,%ebp
+	movl	%edx,8(%edi)
+	adcl	%eax,%eax
+	movl	%ebp,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebp
+	adcl	%edx,%edx
+	movl	%eax,16(%edi)
+	adcl	%ebp,%ebp
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebp,24(%edi)
+	adcl	%edx,%edx
+	movl	%eax,28(%edi)
+
+	leal	32(%esi),%esi		/* use leal not to clobber carry */
+	leal	32(%edi),%edi
+	decl	%ebx
+	jnz	L(Loop)
+
+L(Lend):
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebx
+	jz	L(Lend2)
+	addl	%eax,%eax		/* restore carry from eax */
+L(Loop2):
+	movl	%edx,%ebp
+	movl	(%esi),%edx
+	adcl	%edx,%edx
+	movl	%ebp,(%edi)
+
+	leal	4(%esi),%esi		/* use leal not to clobber carry */
+	leal	4(%edi),%edi
+	decl	%ebx
+	jnz	L(Loop2)
+
+	jmp	L(L1)
+L(Lend2):
+	addl	%eax,%eax		/* restore carry from eax */
+L(L1):	movl	%edx,(%edi)		/* store last limb */
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (__mpn_lshift)
diff --git a/REORG.TODO/sysdeps/i386/i586/memcopy.h b/REORG.TODO/sysdeps/i386/i586/memcopy.h
new file mode 100644
index 0000000000..39f020a746
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memcopy.h
@@ -0,0 +1,95 @@
+/* memcopy.h -- definitions for memory copy functions.  Pentium version.
+   Copyright (C) 1994-2017 Free Software Foundation, Inc.
+   Contributed by Torbjorn Granlund (tege@sics.se).
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Get the i386 definitions.  We will override some of them below.  */
+#include <sysdeps/i386/memcopy.h>
+
+/* Written like this, the Pentium pipeline can execute the loop at a
+   sustained rate of 2 instructions/clock, or asymptotically 480
+   Mbytes/second at 60Mhz.  */
+
+#undef	WORD_COPY_FWD
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)		\
+  do									\
+    {									\
+      asm volatile ("subl	$32,%2\n"				\
+		    "js		2f\n"					\
+		    "movl	0(%0),%%edx\n"	/* alloc dest line */	\
+		    "1:\n"						\
+		    "movl	28(%0),%%eax\n"	/* alloc dest line */	\
+		    "subl	$32,%2\n"	/* decr loop count */	\
+		    "movl	0(%1),%%eax\n"	/* U pipe */		\
+		    "movl	4(%1),%%edx\n"	/* V pipe */		\
+		    "movl	%%eax,0(%0)\n"	/* U pipe */		\
+		    "movl	%%edx,4(%0)\n"	/* V pipe */		\
+		    "movl	8(%1),%%eax\n"				\
+		    "movl	12(%1),%%edx\n"				\
+		    "movl	%%eax,8(%0)\n"				\
+		    "movl	%%edx,12(%0)\n"				\
+		    "movl	16(%1),%%eax\n"				\
+		    "movl	20(%1),%%edx\n"				\
+		    "movl	%%eax,16(%0)\n"				\
+		    "movl	%%edx,20(%0)\n"				\
+		    "movl	24(%1),%%eax\n"				\
+		    "movl	28(%1),%%edx\n"				\
+		    "movl	%%eax,24(%0)\n"				\
+		    "movl	%%edx,28(%0)\n"				\
+		    "leal	32(%1),%1\n"	/* update src ptr */	\
+		    "leal	32(%0),%0\n"	/* update dst ptr */	\
+		    "jns	1b\n"					\
+		    "2: addl	$32,%2" :				\
+		    "=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) :	\
+		    "0" (dst_bp), "1" (src_bp), "2" (nbytes) :		\
+		    "ax", "dx");					\
+    } while (0)
+
+#undef	WORD_COPY_BWD
+#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes)		\
+  do									\
+    {									\
+      asm volatile ("subl	$32,%2\n"				\
+		    "js		2f\n"					\
+		    "movl	-4(%0),%%edx\n"				\
+		    "1:\n"						\
+		    "movl	-32(%0),%%eax\n"			\
+		    "subl	$32,%2\n"				\
+		    "movl	-4(%1),%%eax\n"				\
+		    "movl	-8(%1),%%edx\n"				\
+		    "movl	%%eax,-4(%0)\n"				\
+		    "movl	%%edx,-8(%0)\n"				\
+		    "movl	-12(%1),%%eax\n"			\
+		    "movl	-16(%1),%%edx\n"			\
+		    "movl	%%eax,-12(%0)\n"			\
+		    "movl	%%edx,-16(%0)\n"			\
+		    "movl	-20(%1),%%eax\n"			\
+		    "movl	-24(%1),%%edx\n"			\
+		    "movl	%%eax,-20(%0)\n"			\
+		    "movl	%%edx,-24(%0)\n"			\
+		    "movl	-28(%1),%%eax\n"			\
+		    "movl	-32(%1),%%edx\n"			\
+		    "movl	%%eax,-28(%0)\n"			\
+		    "movl	%%edx,-32(%0)\n"			\
+		    "leal	-32(%1),%1\n"				\
+		    "leal	-32(%0),%0\n"				\
+		    "jns	1b\n"					\
+		    "2: addl	$32,%2" :				\
+		    "=r" (dst_ep), "=r" (src_ep), "=r" (nbytes_left) :	\
+		    "0" (dst_ep), "1" (src_ep), "2" (nbytes) :		\
+		    "ax", "dx");					\
+    } while (0)
diff --git a/REORG.TODO/sysdeps/i386/i586/memcpy.S b/REORG.TODO/sysdeps/i386/i586/memcpy.S
new file mode 100644
index 0000000000..6474a3f653
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memcpy.S
@@ -0,0 +1,124 @@
+/* Highly optimized version for i586.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS	4+8	/* space for 2 saved regs */
+#define RTN	PARMS
+#define DEST	RTN
+#define SRC	DEST+4
+#define LEN	SRC+4
+
+        .text
+#if defined PIC && IS_IN (libc)
+ENTRY (__memcpy_chk)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memcpy_chk)
+#endif
+ENTRY (memcpy)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+
+	movl	DEST(%esp), %edi
+	cfi_rel_offset (edi, 4)
+	movl	SRC(%esp), %esi
+	cfi_rel_offset (esi, 0)
+	movl	LEN(%esp), %ecx
+	movl	%edi, %eax
+
+	/* We need this in any case.  */
+	cld
+
+	/* Cutoff for the big loop is a size of 32 bytes since otherwise
+	   the loop will never be entered.  */
+	cmpl	$32, %ecx
+	jbe	L(1)
+
+	negl	%eax
+	andl	$3, %eax
+	subl	%eax, %ecx
+	xchgl	%eax, %ecx
+
+	rep; movsb
+
+	movl	%eax, %ecx
+	subl	$32, %ecx
+	js	L(2)
+
+	/* Read ahead to make sure we write in the cache since the stupid
+	   i586 designers haven't implemented read-on-write-miss.  */
+	movl	(%edi), %eax
+L(3):	movl	28(%edi), %edx
+
+	/* Now correct the loop counter.  Please note that in the following
+	   code the flags are not changed anymore.  */
+	subl	$32, %ecx
+
+	movl	(%esi), %eax
+	movl	4(%esi), %edx
+	movl	%eax, (%edi)
+	movl	%edx, 4(%edi)
+	movl	8(%esi), %eax
+	movl	12(%esi), %edx
+	movl	%eax, 8(%edi)
+	movl	%edx, 12(%edi)
+	movl	16(%esi), %eax
+	movl	20(%esi), %edx
+	movl	%eax, 16(%edi)
+	movl	%edx, 20(%edi)
+	movl	24(%esi), %eax
+	movl	28(%esi), %edx
+	movl	%eax, 24(%edi)
+	movl	%edx, 28(%edi)
+
+	leal	32(%esi), %esi
+	leal	32(%edi), %edi
+
+	jns	L(3)
+
+	/* Correct extra loop counter modification.  */
+L(2):	addl	$32, %ecx
+#ifndef USE_AS_MEMPCPY
+	movl	DEST(%esp), %eax
+#endif
+
+L(1):	rep; movsb
+
+#ifdef USE_AS_MEMPCPY
+	movl	%edi, %eax
+#endif
+
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (memcpy)
+#ifndef USE_AS_MEMPCPY
+libc_hidden_builtin_def (memcpy)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i586/mempcpy.S b/REORG.TODO/sysdeps/i386/i586/mempcpy.S
new file mode 100644
index 0000000000..720a4c0923
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/mempcpy.S
@@ -0,0 +1,8 @@
+#define USE_AS_MEMPCPY
+#define memcpy __mempcpy
+#define __memcpy_chk __mempcpy_chk
+#include <sysdeps/i386/i586/memcpy.S>
+
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
diff --git a/REORG.TODO/sysdeps/i386/i586/memset.S b/REORG.TODO/sysdeps/i386/i586/memset.S
new file mode 100644
index 0000000000..4f8f1bcf94
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memset.S
@@ -0,0 +1,121 @@
+/* memset/bzero -- set memory area to CH/0
+   Highly optimized version for ix86, x>=5.
+   Copyright (C) 1996-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Torbjorn Granlund, <tege@matematik.su.se>
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS	4+4	/* space for 1 saved reg */
+#define RTN	PARMS
+#define DEST	RTN
+#ifdef USE_AS_BZERO
+# define LEN	DEST+4
+#else
+# define CHR	DEST+4
+# define LEN	CHR+4
+#endif
+
+        .text
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
+ENTRY (__memset_chk)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk)
+#endif
+ENTRY (memset)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+
+	movl	DEST(%esp), %edi
+	cfi_rel_offset (edi, 0)
+	movl	LEN(%esp), %edx
+#ifdef USE_AS_BZERO
+	xorl	%eax, %eax	/* we fill with 0 */
+#else
+	movb	CHR(%esp), %al
+	movb	%al, %ah
+	movl	%eax, %ecx
+	shll	$16, %eax
+	movw	%cx, %ax
+#endif
+	cld
+
+/* If less than 36 bytes to write, skip tricky code (it wouldn't work).  */
+	cmpl	$36, %edx
+	movl	%edx, %ecx	/* needed when branch is taken! */
+	jl	L(2)
+
+/* First write 0-3 bytes to make the pointer 32-bit aligned.  */
+	movl	%edi, %ecx	/* Copy ptr to ecx... */
+	negl	%ecx		/* ...and negate that and... */
+	andl	$3, %ecx	/* ...mask to get byte count.  */
+	subl	%ecx, %edx	/* adjust global byte count */
+	rep
+	stosb
+
+	subl	$32, %edx	/* offset count for unrolled loop */
+	movl	(%edi), %ecx	/* Fetch destination cache line */
+
+	.align	2, 0x90		/* supply 0x90 for broken assemblers */
+L(1):	movl	28(%edi), %ecx	/* allocate cache line for destination */
+	subl	$32, %edx	/* decr loop count */
+	movl	%eax, 0(%edi)	/* store words pairwise */
+	movl	%eax, 4(%edi)
+	movl	%eax, 8(%edi)
+	movl	%eax, 12(%edi)
+	movl	%eax, 16(%edi)
+	movl	%eax, 20(%edi)
+	movl	%eax, 24(%edi)
+	movl	%eax, 28(%edi)
+	leal	32(%edi), %edi	/* update destination pointer */
+	jge	L(1)
+
+	leal	32(%edx), %ecx	/* reset offset count */
+
+/* Write last 0-7 full 32-bit words (up to 8 words if loop was skipped).  */
+L(2):	shrl	$2, %ecx	/* convert byte count to longword count */
+	rep
+	stosl
+
+/* Finally write the last 0-3 bytes.  */
+	movl	%edx, %ecx
+	andl	$3, %ecx
+	rep
+	stosb
+
+#ifndef USE_AS_BZERO
+	/* Load result (only if used as memset).  */
+	movl DEST(%esp), %eax	/* start address of destination is result */
+#endif
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (memset)
+libc_hidden_builtin_def (memset)
+
+#if defined SHARED && IS_IN (libc) && !defined __memset_chk \
+    && !defined USE_AS_BZERO
+strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
+	.section .gnu.warning.__memset_zero_constant_len_parameter
+	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i586/memusage.h b/REORG.TODO/sysdeps/i386/i586/memusage.h
new file mode 100644
index 0000000000..c8170874d0
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/memusage.h
@@ -0,0 +1 @@
+#include "../i686/memusage.h"
diff --git a/REORG.TODO/sysdeps/i386/i586/mul_1.S b/REORG.TODO/sysdeps/i386/i586/mul_1.S
new file mode 100644
index 0000000000..bd3a07de90
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/mul_1.S
@@ -0,0 +1,90 @@
+/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store
+   the result in a second limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16	/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define SIZE	S1+4
+#define S2LIMB	SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+	.text
+ENTRY (__mpn_mul_1)
+
+	pushl	%res_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%s1_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%s2_limb
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp), %res_ptr
+	cfi_rel_offset (res_ptr, 12)
+	movl	S1(%esp), %s1_ptr
+	cfi_rel_offset (s1_ptr, 8)
+	movl	SIZE(%esp), %size
+	movl	S2LIMB(%esp), %s2_limb
+	cfi_rel_offset (s2_limb, 0)
+	leal	(%res_ptr,%size,4), %res_ptr
+	leal	(%s1_ptr,%size,4), %s1_ptr
+	negl	%size
+	xorl	%ebp, %ebp
+	cfi_rel_offset (ebp, 4)
+	ALIGN (3)
+
+L(oop):	adcl	$0, %ebp
+	movl	(%s1_ptr,%size,4), %eax
+
+	mull	%s2_limb
+
+	addl	%eax, %ebp
+
+	movl	%ebp, (%res_ptr,%size,4)
+	incl	%size
+
+	movl	%edx, %ebp
+	jnz	L(oop)
+
+	adcl	$0, %ebp
+	movl	%ebp, %eax
+	popl	%s2_limb
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s2_limb)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%s1_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s1_ptr)
+	popl	%res_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (res_ptr)
+
+	ret
+#undef size
+END (__mpn_mul_1)
diff --git a/REORG.TODO/sysdeps/i386/i586/rshift.S b/REORG.TODO/sysdeps/i386/i586/rshift.S
new file mode 100644
index 0000000000..24c76ee0bb
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/rshift.S
@@ -0,0 +1,255 @@
+/* Pentium optimized __mpn_rshift --
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16		/* space for 4 saved regs */
+#define RES	PARMS
+#define S	RES+4
+#define SIZE	S+4
+#define CNT	SIZE+4
+
+	.text
+ENTRY (__mpn_rshift)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebp, 0)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp),%edi
+	cfi_rel_offset (edi, 12)
+	movl	S(%esp),%esi
+	cfi_rel_offset (esi, 8)
+	movl	SIZE(%esp),%ebx
+	cfi_rel_offset (ebx, 0)
+	movl	CNT(%esp),%ecx
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	L(normal)
+	leal	4(%edi),%eax
+	cmpl	%esi,%eax
+	jnc	L(special)		/* jump if res_ptr + 1 >= s_ptr */
+	leal	(%edi,%ebx,4),%eax
+	cmpl	%eax,%esi
+	jnc	L(special)		/* jump if s_ptr >= res_ptr + size */
+
+L(normal):
+	movl	(%esi),%edx
+	addl	$4,%esi
+	xorl	%eax,%eax
+	shrdl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+	cfi_adjust_cfa_offset (4)
+
+	decl	%ebx
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	shrl	$3,%ebx
+	jz	L(end)
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+L(oop):	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebp
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	shrdl	%cl,%eax,%ebp
+	shrdl	%cl,%edx,%eax
+	movl	%ebp,(%edi)
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebp
+	movl	12(%esi),%eax
+	shrdl	%cl,%ebp,%edx
+	shrdl	%cl,%eax,%ebp
+	movl	%edx,8(%edi)
+	movl	%ebp,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebp
+	shrdl	%cl,%edx,%eax
+	shrdl	%cl,%ebp,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	shrdl	%cl,%eax,%ebp
+	shrdl	%cl,%edx,%eax
+	movl	%ebp,24(%edi)
+	movl	%eax,28(%edi)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	decl	%ebx
+	jnz	L(oop)
+
+L(end):	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	andl	$7,%ebx
+	jz	L(end2)
+L(oop2):
+	movl	(%esi),%eax
+	shrdl	%cl,%eax,%edx		/* compute result limb */
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	addl	$4,%esi
+	addl	$4,%edi
+	decl	%ebx
+	jnz	L(oop2)
+
+L(end2):
+	shrl	%cl,%edx		/* compute most significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+	cfi_adjust_cfa_offset (-4)
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissible if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+	cfi_adjust_cfa_offset (16)
+	cfi_rel_offset (edi, 12)
+	cfi_rel_offset (esi, 8)
+	cfi_rel_offset (ebp, 4)
+	cfi_rel_offset (ebx, 0)
+L(special):
+	leal	-4(%edi,%ebx,4),%edi
+	leal	-4(%esi,%ebx,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+
+	decl	%ebx
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	shrl	$3,%ebx
+
+	shrl	$1,%edx
+	incl	%ebx
+	decl	%ebx
+	jz	L(Lend)
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+L(Loop):
+	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebp
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebp,(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebp
+	movl	-12(%esi),%eax
+	rcrl	$1,%ebp
+	movl	%edx,-8(%edi)
+	rcrl	$1,%eax
+	movl	%ebp,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebp
+	rcrl	$1,%edx
+	movl	%eax,-16(%edi)
+	rcrl	$1,%ebp
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebp,-24(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-28(%edi)
+
+	leal	-32(%esi),%esi		/* use leal not to clobber carry */
+	leal	-32(%edi),%edi
+	decl	%ebx
+	jnz	L(Loop)
+
+L(Lend):
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebx
+	jz	L(Lend2)
+	addl	%eax,%eax		/* restore carry from eax */
+L(Loop2):
+	movl	%edx,%ebp
+	movl	(%esi),%edx
+	rcrl	$1,%edx
+	movl	%ebp,(%edi)
+
+	leal	-4(%esi),%esi		/* use leal not to clobber carry */
+	leal	-4(%edi),%edi
+	decl	%ebx
+	jnz	L(Loop2)
+
+	jmp	L(L1)
+L(Lend2):
+	addl	%eax,%eax		/* restore carry from eax */
+L(L1):	movl	%edx,(%edi)		/* store last limb */
+
+	movl	$0,%eax
+	rcrl	$1,%eax
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (__mpn_rshift)
diff --git a/REORG.TODO/sysdeps/i386/i586/stpcpy.S b/REORG.TODO/sysdeps/i386/i586/stpcpy.S
new file mode 100644
index 0000000000..8691efd01c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/stpcpy.S
@@ -0,0 +1,8 @@
+#define USE_AS_STPCPY
+#define STRCPY __stpcpy
+
+#include <sysdeps/i386/i586/strcpy.S>
+
+weak_alias (__stpcpy, stpcpy)
+libc_hidden_def (__stpcpy)
+libc_hidden_builtin_def (stpcpy)
diff --git a/REORG.TODO/sysdeps/i386/i586/strchr.S b/REORG.TODO/sysdeps/i386/i586/strchr.S
new file mode 100644
index 0000000000..02f66b8f72
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/strchr.S
@@ -0,0 +1,348 @@
+/* Find character CH in a NUL terminated string.
+   Highly optimized version for ix85, x>=5.
+   Copyright (C) 1995-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+/* This version is especially optimized for the i586 (and following?)
+   processors.  This is mainly done by using the two pipelines.  The
+   version optimized for i486 is weak in this aspect because to get
+   as much parallelism we have to execute some *more* instructions.
+
+   The code below is structured to reflect the pairing of the instructions
+   as *I think* it is.  I have no processor data book to verify this.
+   If you find something you think is incorrect let me know.  */
+
+
+/* The magic value which is used throughout in the whole code.  */
+#define magic 0xfefefeff
+
+#define PARMS	4+16	/* space for 4 saved regs */
+#define RTN	PARMS
+#define STR	RTN
+#define CHR	STR+4
+
+	.text
+ENTRY (strchr)
+
+	pushl %edi		/* Save callee-safe registers.  */
+	cfi_adjust_cfa_offset (-4)
+	pushl %esi
+	cfi_adjust_cfa_offset (-4)
+
+	pushl %ebx
+	cfi_adjust_cfa_offset (-4)
+	pushl %ebp
+	cfi_adjust_cfa_offset (-4)
+
+	movl STR(%esp), %eax
+	movl CHR(%esp), %edx
+
+	movl %eax, %edi		/* duplicate string pointer for later */
+	cfi_rel_offset (edi, 12)
+	xorl %ecx, %ecx		/* clear %ecx */
+
+	/* At the moment %edx contains C.  What we need for the
+	   algorithm is C in all bytes of the dword.  Avoid
+	   operations on 16 bit words because these require an
+	   prefix byte (and one more cycle).  */
+	movb %dl, %dh		/* now it is 0|0|c|c */
+	movb %dl, %cl		/* we construct the lower half in %ecx */
+
+	shll $16, %edx		/* now %edx is c|c|0|0 */
+	movb %cl, %ch		/* now %ecx is 0|0|c|c */
+
+	orl %ecx, %edx		/* and finally c|c|c|c */
+	andl $3, %edi		/* mask alignment bits */
+
+	jz L(11)		/* alignment is 0 => start loop */
+
+	movb %dl, %cl		/* 0 is needed below */
+	jp L(0)			/* exactly two bits set */
+
+	xorb (%eax), %cl	/* is byte the one we are looking for? */
+	jz L(out)		/* yes => return pointer */
+
+	xorb %dl, %cl		/* load single byte and test for NUL */
+	je L(3)			/* yes => return NULL */
+
+	movb 1(%eax), %cl	/* load single byte */
+	incl %eax
+
+	cmpb %cl, %dl		/* is byte == C? */
+	je L(out)		/* aligned => return pointer */
+
+	cmpb $0, %cl		/* is byte NUL? */
+	je L(3)			/* yes => return NULL */
+
+	incl %eax
+	decl %edi
+
+	jne L(11)
+
+L(0):	movb (%eax), %cl	/* load single byte */
+
+	cmpb %cl, %dl		/* is byte == C? */
+	je L(out)		/* aligned => return pointer */
+
+	cmpb $0, %cl		/* is byte NUL? */
+	je L(3)			/* yes => return NULL */
+
+	incl %eax		/* increment pointer */
+
+	cfi_rel_offset (esi, 8)
+	cfi_rel_offset (ebx, 4)
+	cfi_rel_offset (ebp, 0)
+
+	/* The following code is the preparation for the loop.  The
+	   four instruction up to `L1' will not be executed in the loop
+	   because the same code is found at the end of the loop, but
+	   there it is executed in parallel with other instructions.  */
+L(11):	movl (%eax), %ecx
+	movl $magic, %ebp
+
+	movl $magic, %edi
+	addl %ecx, %ebp
+
+	/* The main loop: it looks complex and indeed it is.  I would
+	   love to say `it was hard to write, so it should he hard to
+	   read' but I will give some more hints.  To fully understand
+	   this code you should first take a look at the i486 version.
+	   The basic algorithm is the same, but here the code organized
+	   in a way which permits to use both pipelines all the time.
+
+	   I tried to make it a bit more understandable by indenting
+	   the code according to stage in the algorithm.  It goes as
+	   follows:
+		check for 0 in 1st word
+			check for C in 1st word
+					check for 0 in 2nd word
+						check for C in 2nd word
+		check for 0 in 3rd word
+			check for C in 3rd word
+					check for 0 in 4th word
+						check for C in 4th word
+
+	   Please note that doing the test for NUL before the test for
+	   C allows us to overlap the test for 0 in the next word with
+	   the test for C.  */
+
+L(1):	xorl %ecx, %ebp			/* (word^magic) */
+	addl %ecx, %edi			/* add magic word */
+
+	leal 4(%eax), %eax		/* increment pointer */
+	jnc L(4)			/* previous addl caused overflow? */
+
+		movl %ecx, %ebx		/* duplicate original word */
+	orl $magic, %ebp		/* (word^magic)|magic */
+
+	addl $1, %ebp			/* (word^magic)|magic == 0xffffffff? */
+	jne L(4)				/* yes => we found word with NUL */
+
+		movl $magic, %esi	/* load magic value */
+		xorl %edx, %ebx		/* clear words which are C */
+
+					movl (%eax), %ecx
+		addl %ebx, %esi		/* (word+magic) */
+
+					movl $magic, %edi
+		jnc L(5)		/* previous addl caused overflow? */
+
+					movl %edi, %ebp
+		xorl %ebx, %esi		/* (word+magic)^word */
+
+					addl %ecx, %ebp
+		orl $magic, %esi	/* ((word+magic)^word)|magic */
+
+		addl $1, %esi		/* ((word+magic)^word)|magic==0xf..f?*/
+		jne L(5)		/* yes => we found word with C */
+
+					xorl %ecx, %ebp
+					addl %ecx, %edi
+
+					leal 4(%eax), %eax
+					jnc L(4)
+
+						movl %ecx, %ebx
+					orl $magic, %ebp
+
+					addl $1, %ebp
+					jne L(4)
+
+						movl $magic, %esi
+						xorl %edx, %ebx
+
+	movl (%eax), %ecx
+						addl %ebx, %esi
+
+	movl $magic, %edi
+						jnc L(5)
+
+	movl %edi, %ebp
+						xorl %ebx, %esi
+
+	addl %ecx, %ebp
+						orl $magic, %esi
+
+						addl $1, %esi
+						jne L(5)
+
+	xorl %ecx, %ebp
+	addl %ecx, %edi
+
+	leal 4(%eax), %eax
+	jnc L(4)
+
+		movl %ecx, %ebx
+	orl $magic, %ebp
+
+	addl $1, %ebp
+	jne L(4)
+
+		movl $magic, %esi
+		xorl %edx, %ebx
+
+					movl (%eax), %ecx
+		addl %ebx, %esi
+
+					movl $magic, %edi
+		jnc L(5)
+
+					movl %edi, %ebp
+		xorl %ebx, %esi
+
+					addl %ecx, %ebp
+		orl $magic, %esi
+
+		addl $1, %esi
+		jne L(5)
+
+					xorl %ecx, %ebp
+					addl %ecx, %edi
+
+					leal 4(%eax), %eax
+					jnc L(4)
+
+						movl %ecx, %ebx
+					orl $magic, %ebp
+
+					addl $1, %ebp
+					jne L(4)
+
+						movl $magic, %esi
+						xorl %edx, %ebx
+
+	movl (%eax), %ecx
+						addl %ebx, %esi
+
+	movl $magic, %edi
+						jnc L(5)
+
+	movl %edi, %ebp
+						xorl %ebx, %esi
+
+	addl %ecx, %ebp
+						orl $magic, %esi
+
+						addl $1, %esi
+
+						je L(1)
+
+	/* We know there is no NUL byte but a C byte in the word.
+	   %ebx contains NUL in this particular byte.  */
+L(5):	subl $4, %eax		/* adjust pointer */
+	testb %bl, %bl		/* first byte == C? */
+
+	jz L(out)		/* yes => return pointer */
+
+	incl %eax		/* increment pointer */
+	testb %bh, %bh		/* second byte == C? */
+
+	jz L(out)		/* yes => return pointer */
+
+	shrl $16, %ebx		/* make upper bytes accessible */
+	incl %eax		/* increment pointer */
+
+	cmp $0, %bl		/* third byte == C */
+	je L(out)		/* yes => return pointer */
+
+	incl %eax		/* increment pointer */
+
+L(out):	popl %ebp		/* restore saved registers */
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl %ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+
+	popl %esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl %edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+
+	cfi_adjust_cfa_offset (16)
+	cfi_rel_offset (edi, 12)
+	cfi_rel_offset (esi, 8)
+	cfi_rel_offset (ebx, 4)
+	cfi_rel_offset (ebp, 0)
+	/* We know there is a NUL byte in the word.  But we have to test
+	   whether there is an C byte before it in the word.  */
+L(4):	subl $4, %eax		/* adjust pointer */
+	cmpb %dl, %cl		/* first byte == C? */
+
+	je L(out)		/* yes => return pointer */
+
+	cmpb $0, %cl		/* first byte == NUL? */
+	je L(3)			/* yes => return NULL */
+
+	incl %eax		/* increment pointer */
+
+	cmpb %dl, %ch		/* second byte == C? */
+	je L(out)		/* yes => return pointer */
+
+	cmpb $0, %ch		/* second byte == NUL? */
+	je L(3)			/* yes => return NULL */
+
+	shrl $16, %ecx		/* make upper bytes accessible */
+	incl %eax		/* increment pointer */
+
+	cmpb %dl, %cl		/* third byte == C? */
+	je L(out)		/* yes => return pointer */
+
+	cmpb $0, %cl		/* third byte == NUL? */
+	je L(3)			/* yes => return NULL */
+
+	incl %eax		/* increment pointer */
+
+	/* The test four the fourth byte is necessary!  */
+	cmpb %dl, %ch		/* fourth byte == C? */
+	je L(out)		/* yes => return pointer */
+
+L(3):	xorl %eax, %eax
+	jmp L(out)
+END (strchr)
+
+#undef index
+weak_alias (strchr, index)
+libc_hidden_builtin_def (strchr)
diff --git a/REORG.TODO/sysdeps/i386/i586/strcpy.S b/REORG.TODO/sysdeps/i386/i586/strcpy.S
new file mode 100644
index 0000000000..a444604f4f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/strcpy.S
@@ -0,0 +1,169 @@
+/* strcpy/stpcpy implementation for i586.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define PARMS	4+12	/* space for 3 saved regs */
+#define RTN	PARMS
+#define DEST	RTN
+#define SRC	DEST+4
+
+#ifndef USE_AS_STPCPY
+# define STRCPY strcpy
+#endif
+
+#define magic 0xfefefeff
+
+	.text
+ENTRY (STRCPY)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	DEST(%esp), %edi
+	cfi_rel_offset (edi, 8)
+	movl	SRC(%esp), %esi
+	cfi_rel_offset (esi, 4)
+
+	xorl	%eax, %eax
+	leal	-1(%esi), %ecx
+
+	movl	$magic, %ebx
+	cfi_rel_offset (ebx, 0)
+	andl	$3, %ecx
+
+#ifdef PIC
+	call	2f
+	cfi_adjust_cfa_offset (4)
+2:	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+	/* 0xb is the distance between 2: and 1: but we avoid writing
+	   1f-2b because the assembler generates worse code.  */
+	leal	0xb(%edx,%ecx,8), %ecx
+#else
+	leal	1f(,%ecx,8), %ecx
+#endif
+
+	jmp	*%ecx
+
+	.align 8
+1:
+	orb	(%esi), %al
+	jz	L(end)
+	stosb
+	xorl	%eax, %eax
+	incl	%esi
+
+	orb	(%esi), %al
+	jz	L(end)
+	stosb
+	xorl	%eax, %eax
+	incl	%esi
+
+	orb	(%esi), %al
+	jz	L(end)
+	stosb
+	xorl	%eax, %eax
+	incl	%esi
+
+L(1):	movl	(%esi), %ecx
+	leal	4(%esi),%esi
+
+	subl	%ecx, %eax
+	addl	%ebx, %ecx
+
+	decl	%eax
+	jnc	L(3)
+
+	movl	%ecx, %edx
+	xorl	%ecx, %eax
+
+	subl	%ebx, %edx
+	andl	$~magic, %eax
+
+	jne	L(4)
+
+	movl	%edx, (%edi)
+	leal	4(%edi),%edi
+
+	jmp	L(1)
+
+L(3):	movl	%ecx, %edx
+
+	subl	%ebx, %edx
+
+L(4):	movb	%dl, (%edi)
+	testb	%dl, %dl
+
+	movl	%edx, %eax
+	jz	L(end2)
+
+	shrl	$16, %eax
+	movb	%dh, 1(%edi)
+#ifdef USE_AS_STPCPY
+	addl	$1, %edi
+#endif
+
+	cmpb	$0, %dh
+	jz	L(end2)
+
+#ifdef USE_AS_STPCPY
+	movb	%al, 1(%edi)
+	addl	$1, %edi
+
+	cmpb	$0, %al
+	jz	L(end2)
+
+	addl	$1, %edi
+#else
+	movb	%al, 2(%edi)
+	testb	%al, %al
+
+	leal	3(%edi), %edi
+	jz	L(end2)
+#endif
+
+L(end):	movb	%ah, (%edi)
+
+L(end2):
+#ifdef USE_AS_STPCPY
+	movl	%edi, %eax
+#else
+	movl	DEST(%esp), %eax
+#endif
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (STRCPY)
+#ifndef USE_AS_STPCPY
+libc_hidden_builtin_def (strcpy)
+#endif
diff --git a/REORG.TODO/sysdeps/i386/i586/strlen.S b/REORG.TODO/sysdeps/i386/i586/strlen.S
new file mode 100644
index 0000000000..cfea2e020f
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/strlen.S
@@ -0,0 +1,182 @@
+/* strlen -- Compute length of NUL terminated string.
+   Highly optimized version for ix86, x>=5.
+   Copyright (C) 1995-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+/* This version is especially optimized for the i586 (and following?)
+   processors.  This is mainly done by using the two pipelines.  The
+   version optimized for i486 is weak in this aspect because to get
+   as much parallelism we have to execute some *more* instructions.
+
+   The code below is structured to reflect the pairing of the instructions
+   as *I think* it is.  I have no processor data book to verify this.
+   If you find something you think is incorrect let me know.  */
+
+
+/* The magic value which is used throughout in the whole code.  */
+#define magic 0xfefefeff
+
+#define PARMS	4		/* no space for saved regs */
+#define STR	PARMS
+
+	.text
+ENTRY (strlen)
+
+	movl STR(%esp), %eax
+	movl $3, %edx		/* load mask (= 3) */
+
+	andl %eax, %edx		/* separate last two bits of address */
+
+	jz L(1)			/* aligned => start loop */
+	jp L(0)			/* exactly two bits set */
+
+	cmpb %dh, (%eax)	/* is byte NUL? */
+	je L(2)			/* yes => return */
+
+	incl %eax		/* increment pointer */
+	cmpb %dh, (%eax)	/* is byte NUL? */
+
+	je L(2)			/* yes => return */
+
+	incl %eax		/* increment pointer */
+	xorl $2, %edx
+
+	jz L(1)
+
+L(0):	cmpb %dh, (%eax)	/* is byte NUL? */
+	je L(2)			/* yes => return */
+
+	incl %eax		/* increment pointer */
+	xorl %edx, %edx		/* We need %edx == 0 for later */
+
+      /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
+	 change any of the hole bits of LONGWORD.
+
+	 1) Is this safe?  Will it catch all the zero bytes?
+	 Suppose there is a byte with all zeros.  Any carry bits
+	 propagating from its left will fall into the hole at its
+	 least significant bit and stop.  Since there will be no
+	 carry from its most significant bit, the LSB of the
+	 byte to the left will be unchanged, and the zero will be
+	 detected.
+
+	 2) Is this worthwhile?  Will it ignore everything except
+	 zero bytes?  Suppose every byte of LONGWORD has a bit set
+	 somewhere.  There will be a carry into bit 8.	If bit 8
+	 is set, this will carry into bit 16.  If bit 8 is clear,
+	 one of bits 9-15 must be set, so there will be a carry
+	 into bit 16.  Similarly, there will be a carry into bit
+	 24.  If one of bits 24-31 is set, there will be a carry
+	 into bit 32 (=carry flag), so all of the hole bits will
+	 be changed.
+
+	 Note: %edx == 0 in any case here.  */
+
+L(1):
+	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
+	addl $4, %eax		/* adjust pointer for *next* word */
+
+	subl %ecx, %edx		/* first step to negate word */
+	addl $magic, %ecx	/* add magic word */
+
+	decl %edx		/* complete negation of word */
+	jnc L(3)		/* previous addl caused overflow? */
+
+	xorl %ecx, %edx		/* (word+magic)^word */
+
+	andl $~magic, %edx	/* any of the carry flags set? */
+
+	jne L(3)		/* yes => determine byte */
+
+
+	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
+	addl $4, %eax		/* adjust pointer for *next* word */
+
+	subl %ecx, %edx		/* first step to negate word */
+	addl $magic, %ecx	/* add magic word */
+
+	decl %edx		/* complete negation of word */
+	jnc L(3)		/* previous addl caused overflow? */
+
+	xorl %ecx, %edx		/* (word+magic)^word */
+
+	andl $~magic, %edx	/* any of the carry flags set? */
+
+	jne L(3)		/* yes => determine byte */
+
+
+	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
+	addl $4, %eax		/* adjust pointer for *next* word */
+
+	subl %ecx, %edx		/* first step to negate word */
+	addl $magic, %ecx	/* add magic word */
+
+	decl %edx		/* complete negation of word */
+	jnc L(3)		/* previous addl caused overflow? */
+
+	xorl %ecx, %edx		/* (word+magic)^word */
+
+	andl $~magic, %edx	/* any of the carry flags set? */
+
+	jne L(3)		/* yes => determine byte */
+
+
+	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
+	addl $4, %eax		/* adjust pointer for *next* word */
+
+	subl %ecx, %edx		/* first step to negate word */
+	addl $magic, %ecx	/* add magic word */
+
+	decl %edx		/* complete negation of word */
+	jnc L(3)		/* previous addl caused overflow? */
+
+	xorl %ecx, %edx		/* (word+magic)^word */
+
+	andl $~magic, %edx	/* any of the carry flags set? */
+
+	je L(1)			/* no => start loop again */
+
+
+L(3):	subl $4, %eax		/* correct too early pointer increment */
+	subl $magic, %ecx
+
+	cmpb $0, %cl		/* lowest byte NUL? */
+	jz L(2)			/* yes => return */
+
+	inc %eax		/* increment pointer */
+	testb %ch, %ch		/* second byte NUL? */
+
+	jz L(2)			/* yes => return */
+
+	shrl $16, %ecx		/* make upper bytes accessible */
+	incl %eax		/* increment pointer */
+
+	cmpb $0, %cl		/* is third byte NUL? */
+	jz L(2)			/* yes => return */
+
+	incl %eax		/* increment pointer */
+
+L(2):	subl STR(%esp), %eax	/* now compute the length as difference
+				   between start and terminating NUL
+				   character */
+	ret
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/REORG.TODO/sysdeps/i386/i586/sub_n.S b/REORG.TODO/sysdeps/i386/i586/sub_n.S
new file mode 100644
index 0000000000..21b5a2742c
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/sub_n.S
@@ -0,0 +1,143 @@
+/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
+   and store difference in a third limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16		/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define S2	S1+4
+#define SIZE	S2+4
+
+	.text
+ENTRY (__mpn_sub_n)
+
+	pushl	%edi
+	cfi_adjust_cfa_offset (4)
+	pushl	%esi
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp),%edi
+	cfi_rel_offset (edi, 12)
+	movl	S1(%esp),%esi
+	cfi_rel_offset (esi, 8)
+	movl	S2(%esp),%ebx
+	cfi_rel_offset (ebx, 0)
+	movl	SIZE(%esp),%ecx
+	movl	(%ebx),%ebp
+	cfi_rel_offset (ebp, 4)
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	L(end)
+	pushl	%edx
+	cfi_adjust_cfa_offset (4)
+
+	ALIGN (3)
+L(oop):	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L(1):	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	sbbl	%ebp,%eax
+	movl	4(%ebx),%ebp
+	sbbl	%ebp,%edx
+	movl	8(%ebx),%ebp
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L(2):	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	sbbl	%ebp,%eax
+	movl	12(%ebx),%ebp
+	sbbl	%ebp,%edx
+	movl	16(%ebx),%ebp
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L(3):	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	sbbl	%ebp,%eax
+	movl	20(%ebx),%ebp
+	sbbl	%ebp,%edx
+	movl	24(%ebx),%ebp
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L(4):	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	sbbl	%ebp,%eax
+	movl	28(%ebx),%ebp
+	sbbl	%ebp,%edx
+	movl	32(%ebx),%ebp
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebx),%ebx
+	decl	%ecx
+	jnz	L(oop)
+
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+L(end):
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	L(end2)
+	incl	%edx
+L(oop2):
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	sbbl	%ebp,%eax
+	movl	4(%ebx),%ebp
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebx),%ebx
+	decl	%edx
+	jnz	L(oop2)
+L(end2):
+	movl	(%esi),%eax
+	sbbl	%ebp,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebx
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebx)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%esi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (esi)
+	popl	%edi
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (edi)
+
+	ret
+END (__mpn_sub_n)
diff --git a/REORG.TODO/sysdeps/i386/i586/submul_1.S b/REORG.TODO/sysdeps/i386/i586/submul_1.S
new file mode 100644
index 0000000000..5e5e121ca2
--- /dev/null
+++ b/REORG.TODO/sysdeps/i386/i586/submul_1.S
@@ -0,0 +1,94 @@
+/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+   the result from a second limb vector.
+   Copyright (C) 1992-2017 Free Software Foundation, Inc.
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library; see the file COPYING.LIB.  If not,
+   see <http://www.gnu.org/licenses/>.  */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define PARMS	4+16	/* space for 4 saved regs */
+#define RES	PARMS
+#define S1	RES+4
+#define SIZE	S1+4
+#define S2LIMB	SIZE+4
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebx
+
+	.text
+ENTRY (__mpn_submul_1)
+
+	pushl	%res_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%s1_ptr
+	cfi_adjust_cfa_offset (4)
+	pushl	%ebp
+	cfi_adjust_cfa_offset (4)
+	pushl	%s2_limb
+	cfi_adjust_cfa_offset (4)
+
+	movl	RES(%esp), %res_ptr
+	cfi_rel_offset (res_ptr, 12)
+	movl	S1(%esp), %s1_ptr
+	cfi_rel_offset (s1_ptr, 8)
+	movl	SIZE(%esp), %size
+	movl	S2LIMB(%esp), %s2_limb
+	cfi_rel_offset (s2_limb, 0)
+	leal	(%res_ptr,%size,4), %res_ptr
+	leal	(%s1_ptr,%size,4), %s1_ptr
+	negl	%size
+	xorl	%ebp, %ebp
+	cfi_rel_offset (ebp, 4)
+	ALIGN (3)
+
+L(oop):	adcl	$0, %ebp
+	movl	(%s1_ptr,%size,4), %eax
+
+	mull	%s2_limb
+
+	addl	%ebp, %eax
+	movl	(%res_ptr,%size,4), %ebp
+
+	adcl	$0, %edx
+	subl	%eax, %ebp
+
+	movl	%ebp, (%res_ptr,%size,4)
+	incl	%size
+
+	movl	%edx, %ebp
+	jnz	L(oop)
+
+	adcl	$0, %ebp
+	movl	%ebp, %eax
+	popl	%s2_limb
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s2_limb)
+	popl	%ebp
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (ebp)
+	popl	%s1_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (s1_ptr)
+	popl	%res_ptr
+	cfi_adjust_cfa_offset (-4)
+	cfi_restore (res_ptr)
+
+	ret
+#undef size
+END (__mpn_submul_1)