about summary refs log tree commit diff
path: root/sysdeps/sparc
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/sparc')
-rwxr-xr-xsysdeps/sparc/configure4
-rw-r--r--sysdeps/sparc/configure.in6
-rw-r--r--sysdeps/sparc/fpu/bits/mathdef.h4
-rw-r--r--sysdeps/sparc/fpu/fraiseexcpt.c22
-rw-r--r--sysdeps/sparc/sparc32/Makefile4
-rw-r--r--sysdeps/sparc/sparc32/__longjmp.S43
-rw-r--r--sysdeps/sparc/sparc32/add_n.S294
-rw-r--r--sysdeps/sparc/sparc32/addmul_1.S50
-rw-r--r--sysdeps/sparc/sparc32/alloca.S3
-rw-r--r--sysdeps/sparc/sparc32/bsd-_setjmp.S41
-rw-r--r--sysdeps/sparc/sparc32/bsd-setjmp.S41
-rw-r--r--sysdeps/sparc/sparc32/divrem.m462
-rw-r--r--sysdeps/sparc/sparc32/dl-machine.h372
-rw-r--r--sysdeps/sparc/sparc32/dotmul.S10
-rw-r--r--sysdeps/sparc/sparc32/elf/start.S86
-rw-r--r--sysdeps/sparc/sparc32/elf/start.c68
-rw-r--r--sysdeps/sparc/sparc32/fpu/bits/fenv.h4
-rw-r--r--sysdeps/sparc/sparc32/fpu/fpu_control.h8
-rw-r--r--sysdeps/sparc/sparc32/lshift.S50
-rw-r--r--sysdeps/sparc/sparc32/mul_1.S50
-rw-r--r--sysdeps/sparc/sparc32/rem.S190
-rw-r--r--sysdeps/sparc/sparc32/rshift.S48
-rw-r--r--sysdeps/sparc/sparc32/sdiv.S190
-rw-r--r--sysdeps/sparc/sparc32/setjmp.S52
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/addmul_1.S85
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/dotmul.S13
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/mul_1.S76
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/rem.S18
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/sdiv.S14
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/submul_1.S14
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/udiv.S13
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S157
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/umul.S13
-rw-r--r--sysdeps/sparc/sparc32/sparcv8/urem.S15
-rw-r--r--sysdeps/sparc/sparc32/sub_n.S406
-rw-r--r--sysdeps/sparc/sparc32/submul_1.S50
-rw-r--r--sysdeps/sparc/sparc32/udiv.S186
-rw-r--r--sysdeps/sparc/sparc32/udiv_qrnnd.S65
-rw-r--r--sysdeps/sparc/sparc32/umul.S26
-rw-r--r--sysdeps/sparc/sparc32/urem.S186
-rw-r--r--sysdeps/sparc/sparc64/dl-machine.h15
-rw-r--r--sysdeps/sparc/sparc64/fpu/fpu_control.h4
42 files changed, 1564 insertions, 1494 deletions
diff --git a/sysdeps/sparc/configure b/sysdeps/sparc/configure
deleted file mode 100755
index a8f815a577..0000000000
--- a/sysdeps/sparc/configure
+++ /dev/null
@@ -1,4 +0,0 @@
- # Local configure fragment for sysdeps/sparc.
-
-# The assembler on SPARC needs the -fPIC flag even when it's assembler code.
-ASFLAGS_SO=-fPIC
diff --git a/sysdeps/sparc/configure.in b/sysdeps/sparc/configure.in
deleted file mode 100644
index e71a7e30fa..0000000000
--- a/sysdeps/sparc/configure.in
+++ /dev/null
@@ -1,6 +0,0 @@
-sinclude(./aclocal.m4)dnl Autoconf lossage
-GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
-# Local configure fragment for sysdeps/sparc.
-
-# The assembler on SPARC needs the -fPIC flag even when it's assembler code.
-ASFLAGS_SO=-fPIC
diff --git a/sysdeps/sparc/fpu/bits/mathdef.h b/sysdeps/sparc/fpu/bits/mathdef.h
index 0bc9c94ecc..505d724fd0 100644
--- a/sysdeps/sparc/fpu/bits/mathdef.h
+++ b/sysdeps/sparc/fpu/bits/mathdef.h
@@ -62,3 +62,7 @@ typedef double double_t;
 #define INFINITY	HUGE_VAL
 
 #endif
+
+/* The values returned by `ilogb' for 0 and NaN respectively.  */
+#define FP_ILOGB0       0x80000001
+#define FP_ILOGBNAN     0x7fffffff
diff --git a/sysdeps/sparc/fpu/fraiseexcpt.c b/sysdeps/sparc/fpu/fraiseexcpt.c
index 308c9b2e7f..28db8b360a 100644
--- a/sysdeps/sparc/fpu/fraiseexcpt.c
+++ b/sysdeps/sparc/fpu/fraiseexcpt.c
@@ -20,14 +20,16 @@
 #include <fenv.h>
 #include <math.h>
 
-static void
-ignore_me(double foo)
-{
-}
-
 void
 feraiseexcept (int excepts)
 {
+  static volatile double sink;
+  static const struct {
+    double zero, one, max, min, sixteen, pi;
+  } c = {
+    0.0, 1.0, DBL_MAX, DBL_MIN, 16.0, M_PI
+  };
+
   /* Raise exceptions represented by EXPECTS.  But we must raise only
      one signal at a time.  It is important the if the overflow/underflow
      exception and the inexact exception are given at the same time,
@@ -37,30 +39,30 @@ feraiseexcept (int excepts)
   if ((FE_INVALID & excepts) != 0)
     {
       /* One example of a invalid operation is 0/0.  */
-      ignore_me (0.0 / 0.0);
+      sink = c.zero / c.zero;
     }
 
   /* Next: division by zero.  */
   if ((FE_DIVBYZERO & excepts) != 0)
     {
-      ignore_me (1.0 / 0.0);
+      sink = c.one / c.zero;
     }
 
   /* Next: overflow.  */
   if ((FE_OVERFLOW & excepts) != 0)
     {
-      ignore_me (LDBL_MAX * LDBL_MAX);
+      sink = c.max * c.max;
     }
 
   /* Next: underflow.  */
   if ((FE_UNDERFLOW & excepts) != 0)
     {
-      ignore_me (LDBL_MIN / 16.0);
+      sink = c.min / c.sixteen;
     }
 
   /* Last: inexact.  */
   if ((FE_INEXACT & excepts) != 0)
     {
-      ignore_me (1.0 / M_PI);
+      sink = c.one / c.pi;
     }
 }
diff --git a/sysdeps/sparc/sparc32/Makefile b/sysdeps/sparc/sparc32/Makefile
index deec2f8145..c7c867ef22 100644
--- a/sysdeps/sparc/sparc32/Makefile
+++ b/sysdeps/sparc/sparc32/Makefile
@@ -37,7 +37,7 @@ divrem := sdiv udiv rem urem
 +divrem-S-rem := true
 +divrem-S-udiv := false
 +divrem-S-urem := false
-$(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4
+$(divrem:%=$(sysdep_dir)/sparc/sparc32/%.S): $(sysdep_dir)/sparc/sparc32/divrem.m4
 	(echo "define(NAME,\`.$(+divrem-NAME)')\
 	       define(OP,\`$(+divrem-OP-$(+divrem-NAME))')\
 	       define(S,\`$(+divrem-S-$(+divrem-NAME))')\
@@ -48,4 +48,4 @@ $(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4
 	mv -f $@-tmp $@
 	test ! -d CVS || cvs commit -m'Regenerated from $<' $@
 
-sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/%.S)
+sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/sparc32/%.S)
diff --git a/sysdeps/sparc/sparc32/__longjmp.S b/sysdeps/sparc/sparc32/__longjmp.S
index 36e1c170dd..651ede4c15 100644
--- a/sysdeps/sparc/sparc32/__longjmp.S
+++ b/sysdeps/sparc/sparc32/__longjmp.S
@@ -22,38 +22,43 @@
 #include <bits/setjmp.h>
 #define ENV(reg) [%g1 + (reg * 4)]
 
-ENTRY (__longjmp)
+ENTRY(__longjmp)
 	/* Store our arguments in global registers so we can still
 	   use them while unwinding frames and their register windows.  */
 	mov %o0, %g1		/* ENV in %g1 */
-     	orcc %o1, %g0, %g6	/* VAL in %g6 */
+     	orcc %o1, %g0, %g2	/* VAL in %g2 */
      	be,a 0f			/* Branch if zero; else skip delay slot.  */
-	 mov 1, %g6		/* Delay slot only hit if zero: VAL = 1.  */
+	 mov 1, %g2		/* Delay slot only hit if zero: VAL = 1.  */
 0:
-
-	/* Cache target FP in register %g7.  */
-	ld ENV (JB_FP), %g7
+	/* Cache target FP in register %g3.  */
+	ld ENV(JB_FP), %g3
 
 	/* Now we will loop, unwinding the register windows up the stack
-	   until the restored %fp value matches the target value in %g7.  */
+	   until the restored %fp value matches the target value in %g3.  */
 
-loop:	cmp %fp, %g7		/* Have we reached the target frame? */
-	bl,a loop		/* Loop while current fp is below target.  */
+LOC(loop):
+	cmp %fp, %g3		/* Have we reached the target frame? */
+	bl,a LOC(loop)		/* Loop while current fp is below target.  */
 	 restore		/* Unwind register window in delay slot.  */
-	be,a found		/* Better have hit it exactly.  */
-	 ld ENV (JB_SP), %o0	/* Delay slot: extract target SP.  */
+	be,a LOC(found)		/* Better have hit it exactly.  */
+	 ld ENV(JB_SP), %o0	/* Delay slot: extract target SP.  */
 
-bogus:	/* Get here only if the jmp_buf or stack is clobbered.  */
-	call C_SYMBOL_NAME (abort)
-	nop
+LOC(bogus):
+	/* Get here only if the jmp_buf or stack is clobbered.  */
+	call C_SYMBOL_NAME(abort)
+	 nop
 	unimp 0
 
-found:	/* We have unwound register windows so %fp matches the target.  */
+LOC(found):
+	/* We have unwound register windows so %fp matches the target.  */
 	cmp %o0, %sp		/* Check jmp_buf SP vs register window.  */
-	bge,a sp_ok		/* Saved must not be deeper than register.  */
+	bge,a LOC(sp_ok)	/* Saved must not be deeper than register.  */
 	 mov %o0, %sp		/* OK, install new SP.  */
-	b,a bogus		/* Bogus, we lose.  */
+	b,a LOC(bogus)		/* Bogus, we lose.  */
 
-sp_ok:	ld ENV (JB_PC), %o0	/* Extract target return PC.  */
+LOC(sp_ok):
+	ld ENV(JB_PC), %o0	/* Extract target return PC.  */
 	jmp %o0 + 8		/* Return there.  */
-	 mov %g6, %o0		/* Delay slot: set return value.  */
+	 mov %g2, %o0		/* Delay slot: set return value.  */
+
+END(__longjmp)
diff --git a/sysdeps/sparc/sparc32/add_n.S b/sysdeps/sparc/sparc32/add_n.S
index 9852c256aa..5a6fccbbee 100644
--- a/sysdeps/sparc/sparc32/add_n.S
+++ b/sysdeps/sparc/sparc32/add_n.S
@@ -1,20 +1,20 @@
 ! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
 ! sum in a third limb vector.
-
-! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+!
 ! This file is part of the GNU MP Library.
-
+!
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Library General Public License as published by
 ! the Free Software Foundation; either version 2 of the License, or (at your
 ! option) any later version.
-
+!
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 ! License for more details.
-
+!
 ! You should have received a copy of the GNU Library General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@@ -22,205 +22,217 @@
 
 
 ! INPUT PARAMETERS
-#define res_ptr	%o0
-#define s1_ptr	%o1
-#define s2_ptr	%o2
-#define size	%o3
+#define RES_PTR	%o0
+#define S1_PTR	%o1
+#define S2_PTR	%o2
+#define SIZE	%o3
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-	.text
-	.align	4
-	.global	C_SYMBOL_NAME(__mpn_add_n)
-C_SYMBOL_NAME(__mpn_add_n):
-	xor	s2_ptr,res_ptr,%g1
+ENTRY(__mpn_add_n)
+	xor	S2_PTR,RES_PTR,%g1
 	andcc	%g1,4,%g0
-	bne	L1			! branch if alignment differs
-	nop
+	bne	LOC(1)			! branch if alignment differs
+	 nop
 ! **  V1a  **
-L0:	andcc	res_ptr,4,%g0		! res_ptr unaligned? Side effect: cy=0
-	be	L_v1			! if no, branch
-	nop
-/* Add least significant limb separately to align res_ptr and s2_ptr */
-	ld	[s1_ptr],%g4
-	add	s1_ptr,4,s1_ptr
-	ld	[s2_ptr],%g2
-	add	s2_ptr,4,s2_ptr
-	add	size,-1,size
+LOC(0):	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
+	be	LOC(v1)			! if no, branch
+	 nop
+/* Add least significant limb separately to align RES_PTR and S2_PTR */
+	ld	[S1_PTR],%g4
+	add	S1_PTR,4,S1_PTR
+	ld	[S2_PTR],%g2
+	add	S2_PTR,4,S2_PTR
+	add	SIZE,-1,SIZE
 	addcc	%g4,%g2,%o4
-	st	%o4,[res_ptr]
-	add	res_ptr,4,res_ptr
-L_v1:	addx	%g0,%g0,%o4		! save cy in register
-	cmp	size,2			! if size < 2 ...
-	bl	Lend2			! ... branch to tail code
+	st	%o4,[RES_PTR]
+	add	RES_PTR,4,RES_PTR
+LOC(v1):
+	addx	%g0,%g0,%o4		! save cy in register
+	cmp	SIZE,2			! if SIZE < 2 ...
+	bl	LOC(end2)		! ... branch to tail code
 	subcc	%g0,%o4,%g0		! restore cy
 
-	ld	[s1_ptr+0],%g4
-	addcc	size,-10,size
-	ld	[s1_ptr+4],%g1
-	ldd	[s2_ptr+0],%g2
-	blt	Lfin1
+	ld	[S1_PTR+0],%g4
+	addcc	SIZE,-10,SIZE
+	ld	[S1_PTR+4],%g1
+	ldd	[S2_PTR+0],%g2
+	blt	LOC(fin1)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop1:	addxcc	%g4,%g2,%o4
-	ld	[s1_ptr+8],%g4
+LOC(loop1):
+	addxcc	%g4,%g2,%o4
+	ld	[S1_PTR+8],%g4
 	addxcc	%g1,%g3,%o5
-	ld	[s1_ptr+12],%g1
-	ldd	[s2_ptr+8],%g2
-	std	%o4,[res_ptr+0]
+	ld	[S1_PTR+12],%g1
+	ldd	[S2_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
 	addxcc	%g4,%g2,%o4
-	ld	[s1_ptr+16],%g4
+	ld	[S1_PTR+16],%g4
 	addxcc	%g1,%g3,%o5
-	ld	[s1_ptr+20],%g1
-	ldd	[s2_ptr+16],%g2
-	std	%o4,[res_ptr+8]
+	ld	[S1_PTR+20],%g1
+	ldd	[S2_PTR+16],%g2
+	std	%o4,[RES_PTR+8]
 	addxcc	%g4,%g2,%o4
-	ld	[s1_ptr+24],%g4
+	ld	[S1_PTR+24],%g4
 	addxcc	%g1,%g3,%o5
-	ld	[s1_ptr+28],%g1
-	ldd	[s2_ptr+24],%g2
-	std	%o4,[res_ptr+16]
+	ld	[S1_PTR+28],%g1
+	ldd	[S2_PTR+24],%g2
+	std	%o4,[RES_PTR+16]
 	addxcc	%g4,%g2,%o4
-	ld	[s1_ptr+32],%g4
+	ld	[S1_PTR+32],%g4
 	addxcc	%g1,%g3,%o5
-	ld	[s1_ptr+36],%g1
-	ldd	[s2_ptr+32],%g2
-	std	%o4,[res_ptr+24]
+	ld	[S1_PTR+36],%g1
+	ldd	[S2_PTR+32],%g2
+	std	%o4,[RES_PTR+24]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-8,size
-	add	s1_ptr,32,s1_ptr
-	add	s2_ptr,32,s2_ptr
-	add	res_ptr,32,res_ptr
-	bge	Loop1
+	addcc	SIZE,-8,SIZE
+	add	S1_PTR,32,S1_PTR
+	add	S2_PTR,32,S2_PTR
+	add	RES_PTR,32,RES_PTR
+	bge	LOC(loop1)
 	subcc	%g0,%o4,%g0		! restore cy
 
-Lfin1:	addcc	size,8-2,size
-	blt	Lend1
+LOC(fin1):
+	addcc	SIZE,8-2,SIZE
+	blt	LOC(end1)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 2 limbs until less than 2 limbs remain */
-Loope1:	addxcc	%g4,%g2,%o4
-	ld	[s1_ptr+8],%g4
+LOC(loope1):
+	addxcc	%g4,%g2,%o4
+	ld	[S1_PTR+8],%g4
 	addxcc	%g1,%g3,%o5
-	ld	[s1_ptr+12],%g1
-	ldd	[s2_ptr+8],%g2
-	std	%o4,[res_ptr+0]
+	ld	[S1_PTR+12],%g1
+	ldd	[S2_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-2,size
-	add	s1_ptr,8,s1_ptr
-	add	s2_ptr,8,s2_ptr
-	add	res_ptr,8,res_ptr
-	bge	Loope1
+	addcc	SIZE,-2,SIZE
+	add	S1_PTR,8,S1_PTR
+	add	S2_PTR,8,S2_PTR
+	add	RES_PTR,8,RES_PTR
+	bge	LOC(loope1)
 	subcc	%g0,%o4,%g0		! restore cy
-Lend1:	addxcc	%g4,%g2,%o4
+LOC(end1):
+	addxcc	%g4,%g2,%o4
 	addxcc	%g1,%g3,%o5
-	std	%o4,[res_ptr+0]
+	std	%o4,[RES_PTR+0]
 	addx	%g0,%g0,%o4		! save cy in register
 
-	andcc	size,1,%g0
-	be	Lret1
+	andcc	SIZE,1,%g0
+	be	LOC(ret1)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add last limb */
-	ld	[s1_ptr+8],%g4
-	ld	[s2_ptr+8],%g2
+	ld	[S1_PTR+8],%g4
+	ld	[S2_PTR+8],%g2
 	addxcc	%g4,%g2,%o4
-	st	%o4,[res_ptr+8]
+	st	%o4,[RES_PTR+8]
 
-Lret1:	retl
+LOC(ret1):
+	retl
 	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
 
-L1:	xor	s1_ptr,res_ptr,%g1
+LOC(1):	xor	S1_PTR,RES_PTR,%g1
 	andcc	%g1,4,%g0
-	bne	L2
+	bne	LOC(2)
 	nop
 ! **  V1b  **
-	mov	s2_ptr,%g1
-	mov	s1_ptr,s2_ptr
-	b	L0
-	mov	%g1,s1_ptr
+	mov	S2_PTR,%g1
+	mov	S1_PTR,S2_PTR
+	b	LOC(0)
+	mov	%g1,S1_PTR
 
 ! **  V2  **
-/* If we come here, the alignment of s1_ptr and res_ptr as well as the
-   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
+   alignment of S2_PTR and RES_PTR differ.  Since there are only two ways
    things can be aligned (that we care about) we now know that the alignment
-   of s1_ptr and s2_ptr are the same.  */
+   of S1_PTR and S2_PTR are the same.  */
 
-L2:	cmp	size,1
-	be	Ljone
+LOC(2):	cmp	SIZE,1
+	be	LOC(jone)
 	nop
-	andcc	s1_ptr,4,%g0		! s1_ptr unaligned? Side effect: cy=0
-	be	L_v2			! if no, branch
+	andcc	S1_PTR,4,%g0		! S1_PTR unaligned? Side effect: cy=0
+	be	LOC(v2)			! if no, branch
 	nop
-/* Add least significant limb separately to align s1_ptr and s2_ptr */
-	ld	[s1_ptr],%g4
-	add	s1_ptr,4,s1_ptr
-	ld	[s2_ptr],%g2
-	add	s2_ptr,4,s2_ptr
-	add	size,-1,size
+/* Add least significant limb separately to align S1_PTR and S2_PTR */
+	ld	[S1_PTR],%g4
+	add	S1_PTR,4,S1_PTR
+	ld	[S2_PTR],%g2
+	add	S2_PTR,4,S2_PTR
+	add	SIZE,-1,SIZE
 	addcc	%g4,%g2,%o4
-	st	%o4,[res_ptr]
-	add	res_ptr,4,res_ptr
+	st	%o4,[RES_PTR]
+	add	RES_PTR,4,RES_PTR
 
-L_v2:	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-8,size
-	blt	Lfin2
+LOC(v2):
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	SIZE,-8,SIZE
+	blt	LOC(fin2)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop2:	ldd	[s1_ptr+0],%g2
-	ldd	[s2_ptr+0],%o4
+LOC(loop2):
+	ldd	[S1_PTR+0],%g2
+	ldd	[S2_PTR+0],%o4
 	addxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+0]
+	st	%g2,[RES_PTR+0]
 	addxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+4]
-	ldd	[s1_ptr+8],%g2
-	ldd	[s2_ptr+8],%o4
+	st	%g3,[RES_PTR+4]
+	ldd	[S1_PTR+8],%g2
+	ldd	[S2_PTR+8],%o4
 	addxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+8]
+	st	%g2,[RES_PTR+8]
 	addxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+12]
-	ldd	[s1_ptr+16],%g2
-	ldd	[s2_ptr+16],%o4
+	st	%g3,[RES_PTR+12]
+	ldd	[S1_PTR+16],%g2
+	ldd	[S2_PTR+16],%o4
 	addxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+16]
+	st	%g2,[RES_PTR+16]
 	addxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+20]
-	ldd	[s1_ptr+24],%g2
-	ldd	[s2_ptr+24],%o4
+	st	%g3,[RES_PTR+20]
+	ldd	[S1_PTR+24],%g2
+	ldd	[S2_PTR+24],%o4
 	addxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+24]
+	st	%g2,[RES_PTR+24]
 	addxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+28]
+	st	%g3,[RES_PTR+28]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-8,size
-	add	s1_ptr,32,s1_ptr
-	add	s2_ptr,32,s2_ptr
-	add	res_ptr,32,res_ptr
-	bge	Loop2
+	addcc	SIZE,-8,SIZE
+	add	S1_PTR,32,S1_PTR
+	add	S2_PTR,32,S2_PTR
+	add	RES_PTR,32,RES_PTR
+	bge	LOC(loop2)
 	subcc	%g0,%o4,%g0		! restore cy
 
-Lfin2:	addcc	size,8-2,size
-	blt	Lend2
+LOC(fin2):
+	addcc	SIZE,8-2,SIZE
+	blt	LOC(end2)
 	subcc	%g0,%o4,%g0		! restore cy
-Loope2:	ldd	[s1_ptr+0],%g2
-	ldd	[s2_ptr+0],%o4
+LOC(loope2):
+	ldd	[S1_PTR+0],%g2
+	ldd	[S2_PTR+0],%o4
 	addxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+0]
+	st	%g2,[RES_PTR+0]
 	addxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+4]
+	st	%g3,[RES_PTR+4]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-2,size
-	add	s1_ptr,8,s1_ptr
-	add	s2_ptr,8,s2_ptr
-	add	res_ptr,8,res_ptr
-	bge	Loope2
+	addcc	SIZE,-2,SIZE
+	add	S1_PTR,8,S1_PTR
+	add	S2_PTR,8,S2_PTR
+	add	RES_PTR,8,RES_PTR
+	bge	LOC(loope2)
 	subcc	%g0,%o4,%g0		! restore cy
-Lend2:	andcc	size,1,%g0
-	be	Lret2
+LOC(end2):
+	andcc	SIZE,1,%g0
+	be	LOC(ret2)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add last limb */
-Ljone:	ld	[s1_ptr],%g4
-	ld	[s2_ptr],%g2
+LOC(jone):
+	ld	[S1_PTR],%g4
+	ld	[S2_PTR],%g2
 	addxcc	%g4,%g2,%o4
-	st	%o4,[res_ptr]
+	st	%o4,[RES_PTR]
 
-Lret2:	retl
+LOC(ret2):
+	retl
 	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
+
+END(__mpn_add_n)
diff --git a/sysdeps/sparc/sparc32/addmul_1.S b/sysdeps/sparc/sparc32/addmul_1.S
index 375d25db6b..5e0c381f4f 100644
--- a/sysdeps/sparc/sparc32/addmul_1.S
+++ b/sysdeps/sparc/sparc32/addmul_1.S
@@ -1,20 +1,20 @@
 ! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
 ! the result to a second limb vector.
-
-! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
+!
 ! This file is part of the GNU MP Library.
-
+!
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Library General Public License as published by
 ! the Free Software Foundation; either version 2 of the License, or (at your
 ! option) any later version.
-
+!
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 ! License for more details.
-
+!
 ! You should have received a copy of the GNU Library General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@@ -22,17 +22,14 @@
 
 
 ! INPUT PARAMETERS
-! res_ptr	o0
-! s1_ptr	o1
-! size		o2
-! s2_limb	o3
+! RES_PTR	o0
+! S1_PTR	o1
+! SIZE		o2
+! S2_LIMB	o3
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-.text
-	.align 4
-	.global	C_SYMBOL_NAME(__mpn_addmul_1)
-C_SYMBOL_NAME(__mpn_addmul_1):
+ENTRY(__mpn_addmul_1)
 	! Make S1_PTR and RES_PTR point at the end of their blocks
 	! and put (- 4 x SIZE) in index/loop counter.
 	sll	%o2,2,%o2
@@ -41,19 +38,19 @@ C_SYMBOL_NAME(__mpn_addmul_1):
 	sub	%g0,%o2,%o2
 
 	cmp	%o3,0xfff
-	bgu	Large
+	bgu	LOC(large)
 	nop
 
 	ld	[%o1+%o2],%o5
 	mov	0,%o0
-	b	L0
+	b	LOC(0)
 	 add	%o4,-4,%o4
-Loop0:
+LOC(loop0):
 	addcc	%o5,%g1,%g1
 	ld	[%o1+%o2],%o5
 	addx	%o0,%g0,%o0
 	st	%g1,[%o4+%o2]
-L0:	wr	%g0,%o3,%y
+LOC(0):	wr	%g0,%o3,%y
 	sra	%o5,31,%g2
 	and	%o3,%g2,%g2
 	andcc	%g1,0,%g1
@@ -79,7 +76,7 @@ L0:	wr	%g0,%o3,%y
 	addcc	%g1,%o0,%g1
 	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
 	addcc	%o2,4,%o2	! loop counter
-	bne	Loop0
+	bne	LOC(loop0)
 	 ld	[%o4+%o2],%o5
 
 	addcc	%o5,%g1,%g1
@@ -88,17 +85,18 @@ L0:	wr	%g0,%o3,%y
 	st	%g1,[%o4+%o2]
 
 
-Large:	ld	[%o1+%o2],%o5
+LOC(large):
+	ld	[%o1+%o2],%o5
 	mov	0,%o0
 	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
-	b	L1
+	b	LOC(1)
 	 add	%o4,-4,%o4
-Loop:
+LOC(loop):
 	addcc	%o5,%g3,%g3
 	ld	[%o1+%o2],%o5
 	addx	%o0,%g0,%o0
 	st	%g3,[%o4+%o2]
-L1:	wr	%g0,%o5,%y
+LOC(1):	wr	%g0,%o5,%y
 	and	%o5,%g4,%g2
 	andcc	%g0,%g0,%g1
 	mulscc	%g1,%o3,%g1
@@ -138,10 +136,12 @@ L1:	wr	%g0,%o5,%y
 	addcc	%g3,%o0,%g3
 	addx	%g2,%g1,%o0
 	addcc	%o2,4,%o2
-	bne	Loop
+	bne	LOC(loop)
 	 ld	[%o4+%o2],%o5
 
 	addcc	%o5,%g3,%g3
 	addx	%o0,%g0,%o0
 	retl
 	st	%g3,[%o4+%o2]
+
+END(__mpn_addmul_1)
diff --git a/sysdeps/sparc/sparc32/alloca.S b/sysdeps/sparc/sparc32/alloca.S
index dcbd171163..1eb755afab 100644
--- a/sysdeps/sparc/sparc32/alloca.S
+++ b/sysdeps/sparc/sparc32/alloca.S
@@ -16,7 +16,7 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-#include "sysdep.h"
+#include <sysdep.h>
 
 /* Code produced by Sun's C compiler calls this function with two extra
    arguments which it makes relocatable symbols but seem always to be
@@ -30,3 +30,4 @@ ENTRY (__builtin_alloca)
 	sub %sp, %o0, %sp	/* Push some stack space.  */
 	retl			/* Return; the returned buffer leaves 96 */
 	add %sp, 96, %o0	/* bytes of register save area at the top. */
+END (__builtin_alloca)
diff --git a/sysdeps/sparc/sparc32/bsd-_setjmp.S b/sysdeps/sparc/sparc32/bsd-_setjmp.S
index bf49765f4d..4e6a2da560 100644
--- a/sysdeps/sparc/sparc32/bsd-_setjmp.S
+++ b/sysdeps/sparc/sparc32/bsd-_setjmp.S
@@ -1,40 +1 @@
-/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'.  Sparc version.
-   Copyright (C) 1994, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-#include <sysdep.h>
-
-ENTRY (_setjmp)
-
-#ifdef PIC
-	save	%sp, -64, %sp
-1:	call	2f
-	sethi	%hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
-2:	or	%l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
-	add	%g1, %o7, %g1
-	sethi	%hi(C_SYMBOL_NAME(__sigsetjmp)), %g2
-	restore
-	or	%g2, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g2
-	ld	[%g1+%g2], %g1
-#else
-	sethi	%hi(C_SYMBOL_NAME(__sigsetjmp)), %g1
-	or	%g1, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g1
-#endif
-
-	jmp %g1
-	 mov %g0, %o1		/* Pass second argument of zero.  */
+/* _setjmp is in setjmp.S  */
diff --git a/sysdeps/sparc/sparc32/bsd-setjmp.S b/sysdeps/sparc/sparc32/bsd-setjmp.S
index 16dc260765..1da848d2f1 100644
--- a/sysdeps/sparc/sparc32/bsd-setjmp.S
+++ b/sysdeps/sparc/sparc32/bsd-setjmp.S
@@ -1,40 +1 @@
-/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'.  Sparc version.
-   Copyright (C) 1994, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-#include <sysdep.h>
-
-ENTRY (setjmp)
-
-#ifdef PIC
-	save	%sp, -64, %sp
-1:	call	2f
-	sethi	%hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
-2:	or	%l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1
-	add	%g1, %o7, %g1
-	sethi	%hi(C_SYMBOL_NAME(__sigsetjmp)), %g2
-	restore
-	or	%g2, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g2
-	ld	[%g1+%g2], %g1
-#else
-	sethi	%hi(C_SYMBOL_NAME(__sigsetjmp)), %g1
-	or	%g1, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g1
-#endif
-
-	jmp %g1
-	 mov 1, %o1		/* Pass second argument of one.  */
+/* setjmp is in setjmp.S  */
diff --git a/sysdeps/sparc/sparc32/divrem.m4 b/sysdeps/sparc/sparc32/divrem.m4
index 665abf11ae..30d532ad77 100644
--- a/sysdeps/sparc/sparc32/divrem.m4
+++ b/sysdeps/sparc/sparc32/divrem.m4
@@ -47,8 +47,8 @@ define(V, `%o5')dnl
 dnl
 dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
 define(T, `%g1')dnl
-define(SC, `%g7')dnl
-ifelse(S, `true', `define(SIGN, `%g6')')dnl
+define(SC, `%g2')dnl
+ifelse(S, `true', `define(SIGN, `%g3')')dnl
 
 dnl
 dnl This is the recursive definition for developing quotient digits.
@@ -65,7 +65,7 @@ dnl modified to reflect the output R.
 dnl
 define(DEVELOP_QUOTIENT_BITS,
 `	! depth $1, accumulated bits $2
-	bl	L.$1.eval(2**N+$2)
+	bl	LOC($1.eval(2**N+$2))
 	srl	V,1,V
 	! remainder is positive
 	subcc	R,V,R
@@ -73,7 +73,7 @@ define(DEVELOP_QUOTIENT_BITS,
 	`	b	9f
 		add	Q, ($2*2+1), Q
 	', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
-L.$1.eval(2**N+$2):
+LOC($1.eval(2**N+$2)):
 	! remainder is negative
 	addcc	R,V,R
 	ifelse($1, N,
@@ -82,18 +82,10 @@ L.$1.eval(2**N+$2):
 	', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
 	ifelse($1, 1, `9:')')dnl
 
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
 #include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
 
-FUNC(NAME)
+ENTRY(NAME)
 ifelse(S, `true',
 `	! compute sign of result; if neither is negative, no problem
 	orcc	divisor, dividend, %g0	! either negative?
@@ -124,11 +116,11 @@ ifelse(OP, `div',
 
 1:
 	cmp	R, V			! if divisor exceeds dividend, done
-	blu	Lgot_result		! (and algorithm fails otherwise)
+	blu	LOC(got_result)		! (and algorithm fails otherwise)
 	clr	Q
 	sethi	%hi(1 << (WORDSIZE - TOPBITS - 1)), T
 	cmp	R, T
-	blu	Lnot_really_big
+	blu	LOC(not_really_big)
 	clr	ITER
 
 	! `Here the dividend is >= 2**(31-N) or so.  We must be careful here,
@@ -146,7 +138,7 @@ ifelse(OP, `div',
 
 	! Now compute SC.
 	2:	addcc	V, V, V
-		bcc	Lnot_too_big
+		bcc	LOC(not_too_big)
 		add	SC, 1, SC
 
 		! We get here if the divisor overflowed while shifting.
@@ -155,14 +147,14 @@ ifelse(OP, `div',
 		sll	T, TOPBITS, T	! high order bit
 		srl	V, 1, V		! rest of V
 		add	V, T, V
-		b	Ldo_single_div
+		b	LOC(do_single_div)
 		sub	SC, 1, SC
 
-	Lnot_too_big:
+	LOC(not_too_big):
 	3:	cmp	V, R
 		blu	2b
 		nop
-		be	Ldo_single_div
+		be	LOC(do_single_div)
 		nop
 	/* NB: these are commented out in the V8-Sparc manual as well */
 	/* (I do not understand this) */
@@ -177,15 +169,15 @@ ifelse(OP, `div',
 	! order bit set in the first step, just falling into the regular
 	! division loop will mess up the first time around.
 	! So we unroll slightly...
-	Ldo_single_div:
+	LOC(do_single_div):
 		subcc	SC, 1, SC
-		bl	Lend_regular_divide
+		bl	LOC(end_regular_divide)
 		nop
 		sub	R, V, R
 		mov	1, Q
-		b	Lend_single_divloop
+		b	LOC(end_single_divloop)
 		nop
-	Lsingle_divloop:
+	LOC(single_divloop):
 		sll	Q, 1, Q
 		bl	1f
 		srl	V, 1, V
@@ -197,37 +189,37 @@ ifelse(OP, `div',
 		add	R, V, R
 		sub	Q, 1, Q
 	2:
-	Lend_single_divloop:
+	LOC(end_single_divloop):
 		subcc	SC, 1, SC
-		bge	Lsingle_divloop
+		bge	LOC(single_divloop)
 		tst	R
-		b,a	Lend_regular_divide
+		b,a	LOC(end_regular_divide)
 
-Lnot_really_big:
+LOC(not_really_big):
 1:
 	sll	V, N, V
 	cmp	V, R
 	bleu	1b
 	addcc	ITER, 1, ITER
-	be	Lgot_result
+	be	LOC(got_result)
 	sub	ITER, 1, ITER
 
 	tst	R	! set up for initial iteration
-Ldivloop:
+LOC(divloop):
 	sll	Q, N, Q
 	DEVELOP_QUOTIENT_BITS(1, 0)
-Lend_regular_divide:
+LOC(end_regular_divide):
 	subcc	ITER, 1, ITER
-	bge	Ldivloop
+	bge	LOC(divloop)
 	tst	R
-	bl,a	Lgot_result
+	bl,a	LOC(got_result)
 	! non-restoring fixup here (one instruction only!)
 ifelse(OP, `div',
 `	sub	Q, 1, Q
 ', `	add	R, divisor, R
 ')
 
-Lgot_result:
+LOC(got_result):
 ifelse(S, `true',
 `	! check to see if answer should be < 0
 	tst	SIGN
@@ -236,3 +228,5 @@ ifelse(S, `true',
 1:')
 	retl
 	ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
+
+END(NAME)
diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h
index f7bcba0c2f..7c547f24ba 100644
--- a/sysdeps/sparc/sparc32/dl-machine.h
+++ b/sysdeps/sparc/sparc32/dl-machine.h
@@ -30,7 +30,7 @@
 #define OPCODE_CALL	0x40000000 /* call ?; add PC-rel word address */
 #define OPCODE_SETHI_G1	0x03000000 /* sethi ?, %g1; add value>>10 */
 #define OPCODE_JMP_G1	0x81c06000 /* jmp %g1+?; add lo 10 bits of value */
-#define OPCODE_SAVE_SP64 0x9de3bfc0 /* save %sp, -64, %sp */
+#define OPCODE_SAVE_SP	0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */
 
 
 /* Return nonzero iff E_MACHINE is compatible with the running host.  */
@@ -55,7 +55,7 @@ elf_machine_dynamic (void)
 static inline Elf32_Addr
 elf_machine_load_address (void)
 {
-  register Elf32_Addr pc __asm("%o7"), got;
+  register Elf32_Addr pc __asm("%o7"), pic __asm("%l7"), got;
 
   /* Utilize the fact that a local .got entry will be partially
      initialized at startup awaiting its RELATIVE fixup.  */
@@ -64,50 +64,187 @@ elf_machine_load_address (void)
         ".Load_address:\n\t"
         "call 1f\n\t"
         "or %1,%%lo(.Load_address),%1\n"
-        "1:\tld [%%l7+%1],%1"
-        : "=r"(pc), "=r"(got));
+        "1:\tld [%2+%1],%1"
+        : "=r"(pc), "=r"(got) : "r"(pic));
 
   return pc - got;
 }
 
-  Elf32_Addr addr;
-
-  asm (
-       "add   %%fp,0x44,%%o2\n\t"	/* o2 = point to argc */
-       "ld    [%%o2 - 4],%%o0\n\t"	/* o0 = load argc     */
-       "sll   %%o0, 2, %%o0\n\t"	/* o0 = argc * sizeof (int) */
-       "add   %%o2,%%o0,%%o2\n\t"	/* o2 = skip over argv */
-       "add   %%o2,4,%%o2\n\t"		/* skip over null after argv */
-
-       /* Now %o2 is pointing to env, skip over that as well.  */
-       "1:\n\t"
-       "ld    [%%o2],%%o0\n\t"
-        "cmp   %%o0,0\n\t"
-       "bnz   1b\n\t"
-       "add   %%o2,4,%%o2\n\t"
-
-       /* Note that above, we want to advance the NULL after envp so
-	  we always add 4.  */
-
-       /* Now, search for the AT_BASE property.  */
-       "2:\n\t"
-       "ld   [%%o2],%%o0\n\t"
-       "cmp  %%o0,0\n\t"
-       "be,a 3f\n\t"
-       "or   %%g0,%%g0,%0\n\t"
-       "cmp  %%o0,7\n\t"	/* AT_BASE = 7 */
-       "be,a 3f\n\t"
-       "ld   [%%o2+4],%0\n\t"
-       "b    2b\n\t"
-       "add  %%o2,8,%%o2\n\t"
-       /* At this point %0 has the load address for the interpreter */
-       "3:\n\t"
-       : "=r" (addr)
-       : /* no inputs */
-       : "o0", "o2");
-  return addr;
+/* Set up the loaded object described by L so its unrelocated PLT
+   entries will jump to the on-demand fixup code in dl-runtime.c.  */
+
+static inline int
+elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+{
+  Elf32_Addr *plt;
+  extern void _dl_runtime_resolve (Elf32_Word);
+
+  if (l->l_info[DT_JMPREL] && lazy)
+    {
+      /* The entries for functions in the PLT have not yet been filled in.
+	 Their initial contents will arrange when called to set the high 22
+	 bits of %g1 with an offset into the .rela.plt section and jump to
+	 the beginning of the PLT.  */
+      plt = (Elf32_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr);
+
+      /* The beginning of the PLT does:
+
+	 	save %sp, -64, %sp
+	 pltpc:	call _dl_runtime_resolve
+		nop
+		.word MAP
+
+         This saves the register window containing the arguments, and the
+	 PC value (pltpc) implicitly saved in %o7 by the call points near the
+	 location where we store the link_map pointer for this object.  */
+
+      plt[0] = OPCODE_SAVE_SP;
+      /* Construct PC-relative word address.  */
+      plt[1] = OPCODE_CALL | (((Elf32_Addr) &_dl_runtime_resolve -
+			       (Elf32_Addr) &plt[1]) >> 2);
+      plt[2] = OPCODE_NOP;	/* Fill call delay slot.  */
+      plt[3] = (Elf32_Addr) l;
+    }
+
+  return lazy;
 }
 
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.  */
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
+	.globl _dl_runtime_resolve
+	.type _dl_runtime_resolve, @function
+_dl_runtime_resolve:
+	/* Set up the arguments to fixup --
+	   %o0 = link_map out of plt0
+	   %o1 = offset of reloc entry  */
+	ld	[%o7 + 8], %o0
+	srl	%g1, 10, %o1
+	call	fixup
+	 sub	%o1, 4*12, %o1
+	jmp	%o0
+	 restore
+	.size _dl_runtime_resolve, . - _dl_runtime_resolve");
+
+/* The address of the JMP_SLOT reloc is the .plt entry, thus we don't
+   dereference the reloc's addr to get the final destination.  Ideally
+   there would be a generic way to return the value of the symbol from
+   elf_machine_relplt, but as it is, the address of the .plt entry is
+   good enough.  */
+#define ELF_FIXUP_RETURN_VALUE(map, result)  ((Elf32_Addr) &(result))
+
+/* Nonzero iff TYPE should not be allowed to resolve to one of
+   the main executable's symbols, as for a COPY reloc.  */
+#define elf_machine_lookup_noexec_p(type) ((type) == R_SPARC_COPY)
+
+/* Nonzero iff TYPE describes relocation of a PLT entry, so
+   PLT entries should not be allowed to define the value.  */
+#define elf_machine_lookup_noplt_p(type) ((type) == R_SPARC_JMP_SLOT)
+
+/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries.  */
+#define ELF_MACHINE_RELOC_NOPLT	R_SPARC_JMP_SLOT
+
+/* The SPARC never uses Elf32_Rel relocations.  */
+#define ELF_MACHINE_NO_REL 1
+
+/* The SPARC overlaps DT_RELA and DT_PLTREL.  */
+#define ELF_MACHINE_PLTREL_OVERLAP 1
+
+/* The PLT uses Elf32_Rela relocs.  */
+#define elf_machine_relplt elf_machine_rela
+
+/* Initial entry point code for the dynamic linker.
+   The C function `_dl_start' is the real entry point;
+   its return value is the user program's entry point.  */
+
+#define RTLD_START __asm__ ("\
+.text
+	.globl _start
+	.type _start,@function
+_start:
+  /* Allocate space for functions to drop their arguments.  */
+	sub	%sp, 6*4, %sp
+  /* Pass pointer to argument block to _dl_start.  */
+	call	_dl_start
+	 add	%sp, 22*4, %o0
+	/* FALTHRU */
+	.globl _dl_start_user
+	.type _dl_start_user,@function
+_dl_start_user:
+  /* Load the PIC register.  */
+1:	call	2f
+	 sethi	%hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %l7
+2:	or	%l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %l7
+	add	%l7, %o7, %l7
+  /* Save the user entry point address in %l0 */
+	mov	%o0, %l0
+  /* See if we were run as a command with the executable file name as an
+     extra leading argument.  If so, adjust the contents of the stack.  */
+	sethi	%hi(_dl_skip_args), %g2
+	or	%g2, %lo(_dl_skip_args), %g2
+	ld	[%l7+%g2], %i0
+	ld	[%i0], %i0
+	tst	%i0
+	beq	3f
+	 nop
+	/* Find out how far to shift.  */
+	ld	[%sp+22*4], %i1		/* load argc */
+	sub	%i1, %i0, %i1
+	sll	%i0, 2, %i2
+	st	%i1, [%sp+22*4]
+	add	%sp, 23*4, %i1
+	add	%i1, %i2, %i2
+	/* Copy down argv */
+21:	ld	[%i2], %i3
+	add	%i2, 4, %i2
+	tst	%i3
+	st	%i3, [%i1]
+	bne	21b
+	 add	%i1, 4, %i1
+	/* Copy down env */
+22:	ld	[%i2], %i3
+	add	%i2, 4, %i2
+	tst	%i3
+	st	%i3, [%i1]
+	bne	22b
+	 add	%i1, 4, %i1
+	/* Copy down auxiliary table.  */
+23:	ld	[%i2], %i3
+	ld	[%i2+4], %i4
+	add	%i2, 8, %i2
+	tst	%i3
+	st	%i3, [%i1]
+	st	%i4, [%i1+4]
+	bne	23b
+	 add	%i1, 8, %i1
+  /* Load _dl_default_scope[2] to pass to _dl_init_next.  */
+3:	sethi	%hi(_dl_default_scope), %g1
+	or	%g1, %lo(_dl_default_scope), %g1
+	ld	[%l7+%g1], %l1
+	ld	[%l1+2*4], %l1
+  /* Call _dl_init_next to return the address of an initializer to run.  */
+4:	call	_dl_init_next
+	 mov	%l1, %o0
+	tst	%o0
+	beq	5f
+	 nop
+	jmpl	%o0, %o7
+	 nop
+	ba,a	4b
+  /* Clear the startup flag.  */
+5:	sethi	%hi(_dl_starting_up), %g1
+	or	%g1, %lo(_dl_starting_up), %g1
+	ld	[%l7+%g1], %g1
+	st	%g0, [%g1]
+  /* Pass our finalizer function to the user in %g1.  */
+	sethi	%hi(_dl_fini), %g1
+	or	%g1, %lo(_dl_fini), %g1
+	ld	[%l7+%g1], %g1
+  /* Jump to the user's entry point and deallocate the extra stack we got.  */
+	jmp	%l0
+	 add	%sp, 6*4, %sp
+	.size   _dl_start_user,.-_dl_start_user");
+
 #ifdef RESOLVE
 /* Perform the relocation specified by RELOC and SYM (which is fully resolved).
    MAP is the object containing the reloc.  */
@@ -117,7 +254,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 		  const Elf32_Sym *sym, const struct r_found_version *version,
 		  Elf32_Addr *const reloc_addr)
 {
-  Elf32_Addr loadbase;
+  extern unsigned long _dl_hwcap;
 
   if (ELF32_R_TYPE (reloc->r_info) == R_SPARC_RELATIVE)
     {
@@ -144,6 +281,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
       switch (ELF32_R_TYPE (reloc->r_info))
 	{
 	case R_SPARC_COPY:
+#ifndef RTLD_BOOTSTRAP
 	  if (sym->st_size > refsym->st_size
 	      || (_dl_verbose && sym->st_size < refsym->st_size))
 	    {
@@ -159,14 +297,21 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 	    }
 	  memcpy (reloc_addr, (void *) value, MIN (sym->st_size,
 						   refsym->st_size));
+#endif
 	  break;
 	case R_SPARC_GLOB_DAT:
 	case R_SPARC_32:
 	  *reloc_addr = value;
 	  break;
 	case R_SPARC_JMP_SLOT:
-	  reloc_addr[1] = OPCODE_SETHI_G1 | (value >> 10);
+	  /* For thread safety, write the instructions from the bottom and
+	     flush before we overwrite the critical "b,a".  */
 	  reloc_addr[2] = OPCODE_JMP_G1 | (value & 0x3ff);
+	  if (1 || (_dl_hwcap & 1)) /* HWCAP_SPARC_FLUSH */
+	    __asm __volatile ("flush %0+8" : : "r"(reloc_addr));
+	  reloc_addr[1] = OPCODE_SETHI_G1 | (value >> 10);
+	  if (1 || (_dl_hwcap & 1)) /* HWCAP_SPARC_FLUSH */
+	    __asm __volatile ("flush %0+4" : : "r"(reloc_addr));
 	  break;
 	case R_SPARC_8:
 	  *(char *) reloc_addr = value;
@@ -218,146 +363,3 @@ elf_machine_lazy_rel (struct link_map *map, const Elf32_Rela *reloc)
 }
 
 #endif	/* RESOLVE */
-
-/* Nonzero iff TYPE should not be allowed to resolve to one of
-   the main executable's symbols, as for a COPY reloc.  */
-#define elf_machine_lookup_noexec_p(type) ((type) == R_SPARC_COPY)
-
-/* Nonzero iff TYPE describes relocation of a PLT entry, so
-   PLT entries should not be allowed to define the value.  */
-#define elf_machine_lookup_noplt_p(type) ((type) == R_SPARC_JMP_SLOT)
-
-/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries.  */
-#define ELF_MACHINE_RELOC_NOPLT	R_SPARC_JMP_SLOT
-
-/* The SPARC never uses Elf32_Rel relocations.  */
-#define ELF_MACHINE_NO_REL 1
-
-/* The SPARC overlaps DT_RELA and DT_PLTREL.  */
-#define ELF_MACHINE_PLTREL_OVERLAP 1
-
-/* Set up the loaded object described by L so its unrelocated PLT
-   entries will jump to the on-demand fixup code in dl-runtime.c.  */
-
-static inline int
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
-{
-  Elf32_Addr *plt;
-  extern void _dl_runtime_resolve (Elf32_Word);
-
-  if (l->l_info[DT_JMPREL] && lazy)
-    {
-      /* The entries for functions in the PLT have not yet been filled in.
-	 Their initial contents will arrange when called to set the high 22
-	 bits of %g1 with an offset into the .rela.plt section and jump to
-	 the beginning of the PLT.  */
-      plt = (Elf32_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr);
-
-      /* The beginning of the PLT does:
-
-	 	save %sp, -64, %sp
-	 pltpc:	call _dl_runtime_resolve
-		nop
-		.word MAP
-
-         This saves the register window containing the arguments, and the
-	 PC value (pltpc) implicitly saved in %o7 by the call points near the
-	 location where we store the link_map pointer for this object.  */
-
-      plt[0] = OPCODE_SAVE_SP64; /* save %sp, -64, %sp */
-      /* Construct PC-relative word address.  */
-      plt[1] = OPCODE_CALL | (((Elf32_Addr) &_dl_runtime_resolve -
-			       (Elf32_Addr) &plt[1]) >> 2);
-      plt[2] = OPCODE_NOP;	/* Fill call delay slot.  */
-      plt[3] = (Elf32_Addr *) l;
-    }
-
-  return lazy;
-}
-
-/* This code is used in dl-runtime.c to call the `fixup' function
-   and then redirect to the address it returns.  */
-#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
-# Trampoline for _dl_runtime_resolver
-	.globl _dl_runtime_resolve
-	.type _dl_runtime_resolve, @function
-_dl_runtime_resolve:
-	t 1
-	#call  %g0
-	# Pass two args to fixup: the PLT address computed from the PC saved
-	# in the PLT's call insn, and the reloc offset passed in %g1.
-	#ld [%o7 + 8], %o1      | Second arg, loaded from PLTPC[2].
-	#call fixup
-	#shrl %g1, 22, %o0      | First arg, set in delay slot of call.
-	# Jump to the real function.
-	#jmpl %o0, %g0
-	# In the delay slot of that jump, restore the register window
-	# saved by the first insn of the PLT.
-	#restore
-	.size _dl_runtime_resolve, . - _dl_runtime_resolve
-");
-
-/* The PLT uses Elf32_Rela relocs.  */
-#define elf_machine_relplt elf_machine_rela
-
-
-/* Mask identifying addresses reserved for the user program,
-   where the dynamic linker should not map anything.  */
-#define ELF_MACHINE_USER_ADDRESS_MASK	???
-
-/* Initial entry point code for the dynamic linker.
-   The C function `_dl_start' is the real entry point;
-   its return value is the user program's entry point.  */
-
-#define RTLD_START __asm__ ( \
-".text\n\
- .globl _start\n\
- .type _start,@function\n\
-_start:\n\
-  /* Pass pointer to argument block to _dl_start.  */\n\
-  add %sp,64,%o0\n\
-  call _dl_start\n\
-   nop\n\
-  \n\
-  mov %o0,%l0\n\
-  \n\
-2:\n\
-   call 1f\n\
-   nop\n\
-1:\n\
-  sethi %hi(_GLOBAL_OFFSET_TABLE_-(2b-.)),%l2\n\
-  sethi %hi(_dl_default_scope),%l3\n\
-  or    %l2,%lo(_GLOBAL_OFFSET_TABLE_-(2b-.)),%l2\n\
-  or    %l3,%lo(_dl_default_scope),%l3\n\
-  add   %o7,%l2,%l1\n\
-  # %l1 has the GOT. %l3 has _dl_default_scope GOT offset\n\
-  ld    [%l1+%l3],%l4\n\
-  # %l4 has pointer to _dl_default_scope.  Now, load _dl_default_scope [2]\n\
-  ld    [%l4+8],%l4\n\
-  # %l4 has _dl_default_scope [2]\n\
-  # call _dl_init_next until it returns 0, pass _dl_default_scope [2]\n\
-3:\n\
-  call  _dl_init_next\n\
-   mov   %l4,%o0\n\
-  cmp   %o0,%g0\n\
-  bz,a  4f\n\
-   nop\n\
-  call  %o0\n\
-  /* Pass pointer to argument block to this init function */\n\
-   add %sp,64,%o0\n\
-  b,a   3b\n\
-4:\n\
-  # Clear the _dl_starting_up variable and pass _dl_fini in %g1 as per ELF ABI.\n\
-  sethi %hi(_dl_starting_up),%l4\n\
-  sethi %hi(_dl_fini),%l3\n\
-  or    %l4,%lo(_dl_starting_up),%l4\n\
-  or    %l3,%lo(_dl_fini),%l3\n\
-  # clear _dl_starting_up\n\
-  ld    [%l1+%l4],%l5\n\
-  st    %g0,[%l5]\n\
-  # load out fini function for atexit in %g1\n\
-  ld    [%l3+%l1],%g1\n\
-  # jump to the user program entry point.\n\
-  jmpl %l0,%g0\n\
-  nop\n\
-");
diff --git a/sysdeps/sparc/sparc32/dotmul.S b/sysdeps/sparc/sparc32/dotmul.S
index 7ce695cb7a..821aa8bdb4 100644
--- a/sysdeps/sparc/sparc32/dotmul.S
+++ b/sysdeps/sparc/sparc32/dotmul.S
@@ -10,11 +10,13 @@
  * This code optimizes short (less than 13-bit) multiplies.
  */
 
-#include "sysdep.h"
+#include <sysdep.h>
+
+
 ENTRY(.mul)
 	mov	%o0, %y		! multiplier -> Y
 	andncc	%o0, 0xfff, %g0	! test bits 12..31
-	be	Lmul_shortway	! if zero, can do it the short way
+	be	LOC(mul_shortway)	! if zero, can do it the short way
 	andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
 
 	/*
@@ -81,7 +83,7 @@ ENTRY(.mul)
 				!  and put upper half in place
 #endif
 
-Lmul_shortway:
+LOC(mul_shortway):
 	/*
 	 * Short multiply.  12 steps, followed by a final shift step.
 	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
@@ -121,3 +123,5 @@ Lmul_shortway:
 	or	%o5, %o0, %o0	! construct low part of result
 	retl
 	sra	%o4, 20, %o1	! ... and extract high part of result
+
+END(.mul)
diff --git a/sysdeps/sparc/sparc32/elf/start.S b/sysdeps/sparc/sparc32/elf/start.S
new file mode 100644
index 0000000000..8e01b30fc7
--- /dev/null
+++ b/sysdeps/sparc/sparc32/elf/start.S
@@ -0,0 +1,86 @@
+/* Startup code for elf32-sparc
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <richard@gnu.ai.mit.edu>, 1997.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+
+	.section ".text"
+	.align 4
+	.global _start
+	.type _start,#function
+_start:
+
+  /* Terminate the stack frame, and reserve space for functions to
+     drop their arguments.  */
+	mov	%g0, %fp
+	sub	%sp, 6*4, %sp
+
+  /* Save %g1.  When starting a binary via the dynamic linker, %g1
+     contains the address of the shared library termination function,
+     which we will register below with atexit() to be called by exit().
+     If we are statically linked, this will be NULL.  */
+
+  /* Do essential libc initialization (sp points to argc, argv, and envp)  */
+	call	__libc_init_first
+	 mov	%g1, %l0
+
+  /* Now that we have the proper stack frame, register library termination
+     function, if there is any:  */
+
+	cmp	%l0, 0
+	beq	1f
+	 nop
+	call	atexit
+	 mov	%l0, %o0
+1:
+
+  /* Extract the arguments and environment as encoded on the stack.  The
+     argument info starts after one register window (16 words) past the SP.  */
+	ld	[%sp+22*4], %o0
+	add	%sp, 23*4, %o1
+	sll	%o0, 4, %o2
+	add	%o2, %o1, %o2
+	sethi	%hi(__environ), %g2
+	add	%o2, 4, %o2
+	st	%o2, [%g2+%lo(__environ)]
+
+	mov	%o0, %l0		/* tuck them away */
+	mov	%o1, %l1
+
+  /* Call _init, the entry point to our own .init section.  */
+	call	_init
+	 mov	%o2, %l2
+
+  /* Register our .fini section with atexit.  */
+	sethi	%hi(_fini), %o0
+	call	atexit
+	 add	%o0, %lo(_fini), %o0
+
+  /* Call the user's main and exit with its return value.  */
+	mov	%l0, %o0
+	mov	%l1, %o1
+	call	main
+	 mov	%l2, %o2
+	call	exit
+	 nop
+
+  /* Die very horribly if exit returns.  */
+	unimp
+
+	.size _start,.-_start
diff --git a/sysdeps/sparc/sparc32/elf/start.c b/sysdeps/sparc/sparc32/elf/start.c
deleted file mode 100644
index f1e80195ca..0000000000
--- a/sysdeps/sparc/sparc32/elf/start.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Copyright (C) 1991, 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-#include <errno.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-extern char **__environ;
-
-extern void __libc_init_first __P ((int argc, char **argv, char **envp));
-extern int main __P ((int argc, char **argv, char **envp));
-
-register long int sp asm("%sp"), fp asm("%fp");
-
-void
-_start (void)
-{
-  /* It is important that these be declared `register'.
-     Otherwise, when compiled without optimization, they are put on the
-     stack, which loses completely after we zero the FP.  */
-  register int argc;
-  register char **argv, **envp;
-  register long int g1 asm ("%g1");
-  unsigned long int copy_g1 = g1;
-
-  /* Unwind the frame built when we entered the function.  */
-  asm("restore");
-  if (copy_g1)
-    atexit (copy_g1);
-
-  /* And clear the frame pointer.  */
-  fp = 0;
-
-  /* The argument info starts after one register
-     window (64 bytes) past the SP.  */
-  argc = ((int *) sp)[16];
-  argv = (char **) &((int *) sp)[17];
-  envp = &argv[argc + 1];
-  __environ = envp;
-
-  /* Allocate 24 bytes of stack space for the register save area.  */
-  sp -= 24;
-  __libc_init_first (argc, argv, envp);
-#ifdef ELF_INIT_FINI
-  {
-    extern void _fini (void);
-    _init ();
-    atexit (_fini);
-  }
-#endif
-  exit (main (argc, argv, envp));
-}
diff --git a/sysdeps/sparc/sparc32/fpu/bits/fenv.h b/sysdeps/sparc/sparc32/fpu/bits/fenv.h
index 0560dd99b8..6a76795d6a 100644
--- a/sysdeps/sparc/sparc32/fpu/bits/fenv.h
+++ b/sysdeps/sparc/sparc32/fpu/bits/fenv.h
@@ -72,5 +72,5 @@ typedef unsigned int fenv_t;
 #endif
 
 /* For internal use only: access the fp state register.  */
-#define __fenv_stfsr(X)   __asm__("stfsr %0" : "=m"(X))
-#define __fenv_ldfsr(X)   __asm__ __volatile__("ldfsr %0" : : "m"(X))
+#define __fenv_stfsr(X)   __asm__("st %%fsr,%0" : "=m"(X))
+#define __fenv_ldfsr(X)   __asm__ __volatile__("ld %0,%%fsr" : : "m"(X))
diff --git a/sysdeps/sparc/sparc32/fpu/fpu_control.h b/sysdeps/sparc/sparc32/fpu/fpu_control.h
index 5ef3824702..c069318970 100644
--- a/sysdeps/sparc/sparc32/fpu/fpu_control.h
+++ b/sysdeps/sparc/sparc32/fpu/fpu_control.h
@@ -41,13 +41,11 @@
 
 /* Now two recommended cw */
 
-/* Linux default:
+/* Linux and IEEE default:
      - extended precision
      - rounding to nearest
-     - exceptions on overflow, zero divide and NaN */
-#define _FPU_DEFAULT  0x1e
-
-/* IEEE:  same as above, but exceptions */
+     - no exceptions.  */
+#define _FPU_DEFAULT  0x0
 #define _FPU_IEEE     0x0
 
 /* Type of the control word.  */
diff --git a/sysdeps/sparc/sparc32/lshift.S b/sysdeps/sparc/sparc32/lshift.S
index 4f0595f2fb..35aa9378b3 100644
--- a/sysdeps/sparc/sparc32/lshift.S
+++ b/sysdeps/sparc/sparc32/lshift.S
@@ -1,19 +1,19 @@
-! sparc __mpn_lshift --
-
-! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
+! Sparc __mpn_lshift --
+!
+! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+!
 ! This file is part of the GNU MP Library.
-
+!
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Library General Public License as published by
 ! the Free Software Foundation; either version 2 of the License, or (at your
 ! option) any later version.
-
+!
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 ! License for more details.
-
+!
 ! You should have received a copy of the GNU Library General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@@ -21,17 +21,14 @@
 
 
 ! INPUT PARAMETERS
-! res_ptr	%o0
-! src_ptr	%o1
-! size		%o2
-! cnt		%o3
+! RES_PTR	%o0
+! SRC_PTR	%o1
+! SIZE		%o2
+! CNT		%o3
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-	.text
-	.align	4
-	.global	C_SYMBOL_NAME(__mpn_lshift)
-C_SYMBOL_NAME(__mpn_lshift):
+ENTRY(__mpn_lshift)
 	sll	%o2,2,%g1
 	add	%o1,%g1,%o1	! make %o1 point at end of src
 	ld	[%o1-4],%g2	! load first limb
@@ -40,12 +37,13 @@ C_SYMBOL_NAME(__mpn_lshift):
 	add	%o2,-1,%o2
 	andcc	%o2,4-1,%g4	! number of limbs in first loop
 	srl	%g2,%o5,%g1	! compute function result
-	be	L0		! if multiple of 4 limbs, skip first loop
+	be	LOC(0)		! if multiple of 4 limbs, skip first loop
 	st	%g1,[%sp+80]
 
 	sub	%o2,%g4,%o2	! adjust count for main loop
 
-Loop0:	ld	[%o1-8],%g3
+LOC(loop0):
+	ld	[%o1-8],%g3
 	add	%o0,-4,%o0
 	add	%o1,-4,%o1
 	addcc	%g4,-1,%g4
@@ -53,14 +51,15 @@ Loop0:	ld	[%o1-8],%g3
 	srl	%g3,%o5,%g1
 	mov	%g3,%g2
 	or	%o4,%g1,%o4
-	bne	Loop0
+	bne	LOC(loop0)
 	 st	%o4,[%o0+0]
 
-L0:	tst	%o2
-	be	Lend
+LOC(0):	tst	%o2
+	be	LOC(end)
 	 nop
 
-Loop:	ld	[%o1-8],%g3
+LOC(loop):
+	ld	[%o1-8],%g3
 	add	%o0,-16,%o0
 	addcc	%o2,-4,%o2
 	sll	%g2,%o3,%o4
@@ -86,10 +85,13 @@ Loop:	ld	[%o1-8],%g3
 
 	add	%o1,-16,%o1
 	or	%g4,%g1,%g4
-	bne	Loop
+	bne	LOC(loop)
 	 st	%g4,[%o0+0]
 
-Lend:	sll	%g2,%o3,%g2
+LOC(end):
+	sll	%g2,%o3,%g2
 	st	%g2,[%o0-4]
 	retl
 	ld	[%sp+80],%o0
+
+END(__mpn_lshift)
diff --git a/sysdeps/sparc/sparc32/mul_1.S b/sysdeps/sparc/sparc32/mul_1.S
index 142fd8ba2a..d39ec61028 100644
--- a/sysdeps/sparc/sparc32/mul_1.S
+++ b/sysdeps/sparc/sparc32/mul_1.S
@@ -1,20 +1,20 @@
 ! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
 ! the result in a second limb vector.
-
-! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
+!
 ! This file is part of the GNU MP Library.
-
+!
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Library General Public License as published by
 ! the Free Software Foundation; either version 2 of the License, or (at your
 ! option) any later version.
-
+!
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 ! License for more details.
-
+!
 ! You should have received a copy of the GNU Library General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@@ -22,10 +22,10 @@
 
 
 ! INPUT PARAMETERS
-! res_ptr	o0
-! s1_ptr	o1
-! size		o2
-! s2_limb	o3
+! RES_PTR	o0
+! S1_PTR	o1
+! SIZE		o2
+! S2_LIMB	o3
 
 ! ADD CODE FOR SMALL MULTIPLIERS!
 !1:	ld
@@ -89,12 +89,9 @@
 !	sll	a,29,y2
 !	st	x,
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-.text
-	.align 4
-	.global	C_SYMBOL_NAME(__mpn_mul_1)
-C_SYMBOL_NAME(__mpn_mul_1):
+ENTRY(__mpn_mul_1)
 	! Make S1_PTR and RES_PTR point at the end of their blocks
 	! and put (- 4 x SIZE) in index/loop counter.
 	sll	%o2,2,%o2
@@ -103,16 +100,16 @@ C_SYMBOL_NAME(__mpn_mul_1):
 	sub	%g0,%o2,%o2
 
 	cmp	%o3,0xfff
-	bgu	Large
+	bgu	LOC(large)
 	nop
 
 	ld	[%o1+%o2],%o5
 	mov	0,%o0
-	b	L0
+	b	LOC(0)
 	 add	%o4,-4,%o4
-Loop0:
+LOC(loop0):
 	st	%g1,[%o4+%o2]
-L0:	wr	%g0,%o3,%y
+LOC(0):	wr	%g0,%o3,%y
 	sra	%o5,31,%g2
 	and	%o3,%g2,%g2
 	andcc	%g1,0,%g1
@@ -138,21 +135,22 @@ L0:	wr	%g0,%o3,%y
 	addcc	%g1,%o0,%g1
 	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
 	addcc	%o2,4,%o2	! loop counter
-	bne,a	Loop0
+	bne,a	LOC(loop0)
 	 ld	[%o1+%o2],%o5
 
 	retl
 	st	%g1,[%o4+%o2]
 
 
-Large:	ld	[%o1+%o2],%o5
+LOC(large):
+	ld	[%o1+%o2],%o5
 	mov	0,%o0
 	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
-	b	L1
+	b	LOC(1)
 	 add	%o4,-4,%o4
-Loop:
+LOC(loop):
 	st	%g3,[%o4+%o2]
-L1:	wr	%g0,%o5,%y
+LOC(1):	wr	%g0,%o5,%y
 	and	%o5,%g4,%g2	! g2 = S1_LIMB iff S2_LIMB < 0, else 0
 	andcc	%g0,%g0,%g1
 	mulscc	%g1,%o3,%g1
@@ -192,8 +190,10 @@ L1:	wr	%g0,%o5,%y
 	addcc	%g3,%o0,%g3
 	addx	%g2,%g1,%o0	! add sign-compensation and cy to hi limb
 	addcc	%o2,4,%o2	! loop counter
-	bne,a	Loop
+	bne,a	LOC(loop)
 	 ld	[%o1+%o2],%o5
 
 	retl
 	st	%g3,[%o4+%o2]
+
+END(__mpn_mul_1)
diff --git a/sysdeps/sparc/sparc32/rem.S b/sysdeps/sparc/sparc32/rem.S
index d50f1af630..effacee0d0 100644
--- a/sysdeps/sparc/sparc32/rem.S
+++ b/sysdeps/sparc/sparc32/rem.S
@@ -37,22 +37,14 @@
 
 
 
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
 #include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
 
 ENTRY(.rem)
 	! compute sign of result; if neither is negative, no problem
 	orcc	%o1, %o0, %g0	! either negative?
 	bge	2f			! no, go do the divide
-	mov	%o0, %g6		! sign of remainder matches %o0
+	mov	%o0, %g3		! sign of remainder matches %o0
 	tst	%o1
 	bge	1f
 	tst	%o0
@@ -76,11 +68,11 @@ ENTRY(.rem)
 
 1:
 	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	Lgot_result		! (and algorithm fails otherwise)
+	blu	LOC(got_result)		! (and algorithm fails otherwise)
 	clr	%o2
 	sethi	%hi(1 << (32 - 4 - 1)), %g1
 	cmp	%o3, %g1
-	blu	Lnot_really_big
+	blu	LOC(not_really_big)
 	clr	%o4
 
 	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
@@ -91,15 +83,15 @@ ENTRY(.rem)
 	1:
 		cmp	%o5, %g1
 		bgeu	3f
-		mov	1, %g7
+		mov	1, %g2
 		sll	%o5, 4, %o5
 		b	1b
 		add	%o4, 1, %o4
 
-	! Now compute %g7.
+	! Now compute %g2.
 	2:	addcc	%o5, %o5, %o5
-		bcc	Lnot_too_big
-		add	%g7, 1, %g7
+		bcc	LOC(not_too_big)
+		add	%g2, 1, %g2
 
 		! We get here if the %o1 overflowed while shifting.
 		! This means that %o3 has the high-order bit set.
@@ -107,20 +99,20 @@ ENTRY(.rem)
 		sll	%g1, 4, %g1	! high order bit
 		srl	%o5, 1, %o5		! rest of %o5
 		add	%o5, %g1, %o5
-		b	Ldo_single_div
-		sub	%g7, 1, %g7
+		b	LOC(do_single_div)
+		sub	%g2, 1, %g2
 
-	Lnot_too_big:
+	LOC(not_too_big):
 	3:	cmp	%o5, %o3
 		blu	2b
 		nop
-		be	Ldo_single_div
+		be	LOC(do_single_div)
 		nop
 	/* NB: these are commented out in the V8-Sparc manual as well */
 	/* (I do not understand this) */
 	! %o5 > %o3: went too far: back up 1 step
 	!	srl	%o5, 1, %o5
-	!	dec	%g7
+	!	dec	%g2
 	! do single-bit divide steps
 	!
 	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
@@ -129,15 +121,15 @@ ENTRY(.rem)
 	! order bit set in the first step, just falling into the regular
 	! division loop will mess up the first time around.
 	! So we unroll slightly...
-	Ldo_single_div:
-		subcc	%g7, 1, %g7
-		bl	Lend_regular_divide
+	LOC(do_single_div):
+		subcc	%g2, 1, %g2
+		bl	LOC(end_regular_divide)
 		nop
 		sub	%o3, %o5, %o3
 		mov	1, %o2
-		b	Lend_single_divloop
+		b	LOC(end_single_divloop)
 		nop
-	Lsingle_divloop:
+	LOC(single_divloop):
 		sll	%o2, 1, %o2
 		bl	1f
 		srl	%o5, 1, %o5
@@ -149,221 +141,223 @@ ENTRY(.rem)
 		add	%o3, %o5, %o3
 		sub	%o2, 1, %o2
 	2:
-	Lend_single_divloop:
-		subcc	%g7, 1, %g7
-		bge	Lsingle_divloop
+	LOC(end_single_divloop):
+		subcc	%g2, 1, %g2
+		bge	LOC(single_divloop)
 		tst	%o3
-		b,a	Lend_regular_divide
+		b,a	LOC(end_regular_divide)
 
-Lnot_really_big:
+LOC(not_really_big):
 1:
 	sll	%o5, 4, %o5
 	cmp	%o5, %o3
 	bleu	1b
 	addcc	%o4, 1, %o4
-	be	Lgot_result
+	be	LOC(got_result)
 	sub	%o4, 1, %o4
 
 	tst	%o3	! set up for initial iteration
-Ldivloop:
+LOC(divloop):
 	sll	%o2, 4, %o2
 		! depth 1, accumulated bits 0
-	bl	L.1.16
+	bl	LOC(1.16)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 2, accumulated bits 1
-	bl	L.2.17
+	bl	LOC(2.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 3, accumulated bits 3
-	bl	L.3.19
+	bl	LOC(3.19)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 7
-	bl	L.4.23
+	bl	LOC(4.23)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (7*2+1), %o2
-
-L.4.23:
+	
+LOC(4.23):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (7*2-1), %o2
-
-
-L.3.19:
+	
+	
+LOC(3.19):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 5
-	bl	L.4.21
+	bl	LOC(4.21)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (5*2+1), %o2
-
-L.4.21:
+	
+LOC(4.21):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (5*2-1), %o2
-
-
-
-L.2.17:
+	
+	
+	
+LOC(2.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 3, accumulated bits 1
-	bl	L.3.17
+	bl	LOC(3.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 3
-	bl	L.4.19
+	bl	LOC(4.19)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (3*2+1), %o2
-
-L.4.19:
+	
+LOC(4.19):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (3*2-1), %o2
-
-
-L.3.17:
+	
+	
+LOC(3.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 1
-	bl	L.4.17
+	bl	LOC(4.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (1*2+1), %o2
-
-L.4.17:
+	
+LOC(4.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (1*2-1), %o2
-
-
-
-
-L.1.16:
+	
+	
+	
+	
+LOC(1.16):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 2, accumulated bits -1
-	bl	L.2.15
+	bl	LOC(2.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 3, accumulated bits -1
-	bl	L.3.15
+	bl	LOC(3.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -1
-	bl	L.4.15
+	bl	LOC(4.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-1*2+1), %o2
-
-L.4.15:
+	
+LOC(4.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-1*2-1), %o2
-
-
-L.3.15:
+	
+	
+LOC(3.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -3
-	bl	L.4.13
+	bl	LOC(4.13)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-3*2+1), %o2
-
-L.4.13:
+	
+LOC(4.13):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-3*2-1), %o2
-
-
-
-L.2.15:
+	
+	
+	
+LOC(2.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 3, accumulated bits -3
-	bl	L.3.13
+	bl	LOC(3.13)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -5
-	bl	L.4.11
+	bl	LOC(4.11)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-5*2+1), %o2
-
-L.4.11:
+	
+LOC(4.11):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-5*2-1), %o2
-
-
-L.3.13:
+	
+	
+LOC(3.13):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -7
-	bl	L.4.9
+	bl	LOC(4.9)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-7*2+1), %o2
-
-L.4.9:
+	
+LOC(4.9):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-7*2-1), %o2
-
-
-
-
+	
+	
+	
+	
 	9:
-Lend_regular_divide:
+LOC(end_regular_divide):
 	subcc	%o4, 1, %o4
-	bge	Ldivloop
+	bge	LOC(divloop)
 	tst	%o3
-	bl,a	Lgot_result
+	bl,a	LOC(got_result)
 	! non-restoring fixup here (one instruction only!)
 	add	%o3, %o1, %o3
 
 
-Lgot_result:
+LOC(got_result):
 	! check to see if answer should be < 0
-	tst	%g6
+	tst	%g3
 	bl,a	1f
 	sub %g0, %o3, %o3
 1:
 	retl
 	mov %o3, %o0
+
+END(.rem)
diff --git a/sysdeps/sparc/sparc32/rshift.S b/sysdeps/sparc/sparc32/rshift.S
index fea4f3b926..db1d2526cd 100644
--- a/sysdeps/sparc/sparc32/rshift.S
+++ b/sysdeps/sparc/sparc32/rshift.S
@@ -1,19 +1,19 @@
 ! sparc __mpn_rshift --
-
-! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+!
 ! This file is part of the GNU MP Library.
-
+!
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Library General Public License as published by
 ! the Free Software Foundation; either version 2 of the License, or (at your
 ! option) any later version.
-
+!
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 ! License for more details.
-
+!
 ! You should have received a copy of the GNU Library General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@@ -21,28 +21,26 @@
 
 
 ! INPUT PARAMETERS
-! res_ptr	%o0
-! src_ptr	%o1
-! size		%o2
-! cnt		%o3
+! RES_PTR	%o0
+! SRC_PTR	%o1
+! SIZE		%o2
+! CNT		%o3
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-	.text
-	.align	4
-	.global	C_SYMBOL_NAME(__mpn_rshift)
-C_SYMBOL_NAME(__mpn_rshift):
+ENTRY(__mpn_rshift)
 	ld	[%o1],%g2	! load first limb
 	sub	%g0,%o3,%o5	! negate shift count
 	add	%o2,-1,%o2
 	andcc	%o2,4-1,%g4	! number of limbs in first loop
 	sll	%g2,%o5,%g1	! compute function result
-	be	L0		! if multiple of 4 limbs, skip first loop
+	be	LOC(0)		! if multiple of 4 limbs, skip first loop
 	st	%g1,[%sp+80]
 
 	sub	%o2,%g4,%o2	! adjust count for main loop
 
-Loop0:	ld	[%o1+4],%g3
+LOC(loop0):
+	ld	[%o1+4],%g3
 	add	%o0,4,%o0
 	add	%o1,4,%o1
 	addcc	%g4,-1,%g4
@@ -50,14 +48,15 @@ Loop0:	ld	[%o1+4],%g3
 	sll	%g3,%o5,%g1
 	mov	%g3,%g2
 	or	%o4,%g1,%o4
-	bne	Loop0
+	bne	LOC(loop0)
 	 st	%o4,[%o0-4]
 
-L0:	tst	%o2
-	be	Lend
+LOC(0):	tst	%o2
+	be	LOC(end)
 	 nop
 
-Loop:	ld	[%o1+4],%g3
+LOC(loop):
+	ld	[%o1+4],%g3
 	add	%o0,16,%o0
 	addcc	%o2,-4,%o2
 	srl	%g2,%o3,%o4
@@ -83,10 +82,13 @@ Loop:	ld	[%o1+4],%g3
 
 	add	%o1,16,%o1
 	or	%g4,%g1,%g4
-	bne	Loop
+	bne	LOC(loop)
 	 st	%g4,[%o0-4]
 
-Lend:	srl	%g2,%o3,%g2
+LOC(end):
+	srl	%g2,%o3,%g2
 	st	%g2,[%o0-0]
 	retl
 	ld	[%sp+80],%o0
+
+END(__mpn_rshift)
diff --git a/sysdeps/sparc/sparc32/sdiv.S b/sysdeps/sparc/sparc32/sdiv.S
index 02ed2e973c..4b325476d7 100644
--- a/sysdeps/sparc/sparc32/sdiv.S
+++ b/sysdeps/sparc/sparc32/sdiv.S
@@ -37,22 +37,14 @@
 
 
 
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
 #include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
 
 ENTRY(.div)
 	! compute sign of result; if neither is negative, no problem
 	orcc	%o1, %o0, %g0	! either negative?
 	bge	2f			! no, go do the divide
-	xor	%o1, %o0, %g6	! compute sign in any case
+	xor	%o1, %o0, %g3	! compute sign in any case
 	tst	%o1
 	bge	1f
 	tst	%o0
@@ -76,11 +68,11 @@ ENTRY(.div)
 
 1:
 	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	Lgot_result		! (and algorithm fails otherwise)
+	blu	LOC(got_result)		! (and algorithm fails otherwise)
 	clr	%o2
 	sethi	%hi(1 << (32 - 4 - 1)), %g1
 	cmp	%o3, %g1
-	blu	Lnot_really_big
+	blu	LOC(not_really_big)
 	clr	%o4
 
 	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
@@ -91,15 +83,15 @@ ENTRY(.div)
 	1:
 		cmp	%o5, %g1
 		bgeu	3f
-		mov	1, %g7
+		mov	1, %g2
 		sll	%o5, 4, %o5
 		b	1b
 		add	%o4, 1, %o4
 
-	! Now compute %g7.
+	! Now compute %g2.
 	2:	addcc	%o5, %o5, %o5
-		bcc	Lnot_too_big
-		add	%g7, 1, %g7
+		bcc	LOC(not_too_big)
+		add	%g2, 1, %g2
 
 		! We get here if the %o1 overflowed while shifting.
 		! This means that %o3 has the high-order bit set.
@@ -107,20 +99,20 @@ ENTRY(.div)
 		sll	%g1, 4, %g1	! high order bit
 		srl	%o5, 1, %o5		! rest of %o5
 		add	%o5, %g1, %o5
-		b	Ldo_single_div
-		sub	%g7, 1, %g7
+		b	LOC(do_single_div)
+		sub	%g2, 1, %g2
 
-	Lnot_too_big:
+	LOC(not_too_big):
 	3:	cmp	%o5, %o3
 		blu	2b
 		nop
-		be	Ldo_single_div
+		be	LOC(do_single_div)
 		nop
 	/* NB: these are commented out in the V8-Sparc manual as well */
 	/* (I do not understand this) */
 	! %o5 > %o3: went too far: back up 1 step
 	!	srl	%o5, 1, %o5
-	!	dec	%g7
+	!	dec	%g2
 	! do single-bit divide steps
 	!
 	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
@@ -129,15 +121,15 @@ ENTRY(.div)
 	! order bit set in the first step, just falling into the regular
 	! division loop will mess up the first time around.
 	! So we unroll slightly...
-	Ldo_single_div:
-		subcc	%g7, 1, %g7
-		bl	Lend_regular_divide
+	LOC(do_single_div):
+		subcc	%g2, 1, %g2
+		bl	LOC(end_regular_divide)
 		nop
 		sub	%o3, %o5, %o3
 		mov	1, %o2
-		b	Lend_single_divloop
+		b	LOC(end_single_divloop)
 		nop
-	Lsingle_divloop:
+	LOC(single_divloop):
 		sll	%o2, 1, %o2
 		bl	1f
 		srl	%o5, 1, %o5
@@ -149,221 +141,223 @@ ENTRY(.div)
 		add	%o3, %o5, %o3
 		sub	%o2, 1, %o2
 	2:
-	Lend_single_divloop:
-		subcc	%g7, 1, %g7
-		bge	Lsingle_divloop
+	LOC(end_single_divloop):
+		subcc	%g2, 1, %g2
+		bge	LOC(single_divloop)
 		tst	%o3
-		b,a	Lend_regular_divide
+		b,a	LOC(end_regular_divide)
 
-Lnot_really_big:
+LOC(not_really_big):
 1:
 	sll	%o5, 4, %o5
 	cmp	%o5, %o3
 	bleu	1b
 	addcc	%o4, 1, %o4
-	be	Lgot_result
+	be	LOC(got_result)
 	sub	%o4, 1, %o4
 
 	tst	%o3	! set up for initial iteration
-Ldivloop:
+LOC(divloop):
 	sll	%o2, 4, %o2
 		! depth 1, accumulated bits 0
-	bl	L.1.16
+	bl	LOC(1.16)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 2, accumulated bits 1
-	bl	L.2.17
+	bl	LOC(2.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 3, accumulated bits 3
-	bl	L.3.19
+	bl	LOC(3.19)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 7
-	bl	L.4.23
+	bl	LOC(4.23)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (7*2+1), %o2
-
-L.4.23:
+	
+LOC(4.23):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (7*2-1), %o2
-
-
-L.3.19:
+	
+	
+LOC(3.19):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 5
-	bl	L.4.21
+	bl	LOC(4.21)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (5*2+1), %o2
-
-L.4.21:
+	
+LOC(4.21):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (5*2-1), %o2
-
-
-
-L.2.17:
+	
+	
+	
+LOC(2.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 3, accumulated bits 1
-	bl	L.3.17
+	bl	LOC(3.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 3
-	bl	L.4.19
+	bl	LOC(4.19)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (3*2+1), %o2
-
-L.4.19:
+	
+LOC(4.19):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (3*2-1), %o2
-
-
-L.3.17:
+	
+	
+LOC(3.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 1
-	bl	L.4.17
+	bl	LOC(4.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (1*2+1), %o2
-
-L.4.17:
+	
+LOC(4.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (1*2-1), %o2
-
-
-
-
-L.1.16:
+	
+	
+	
+	
+LOC(1.16):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 2, accumulated bits -1
-	bl	L.2.15
+	bl	LOC(2.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 3, accumulated bits -1
-	bl	L.3.15
+	bl	LOC(3.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -1
-	bl	L.4.15
+	bl	LOC(4.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-1*2+1), %o2
-
-L.4.15:
+	
+LOC(4.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-1*2-1), %o2
-
-
-L.3.15:
+	
+	
+LOC(3.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -3
-	bl	L.4.13
+	bl	LOC(4.13)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-3*2+1), %o2
-
-L.4.13:
+	
+LOC(4.13):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-3*2-1), %o2
-
-
-
-L.2.15:
+	
+	
+	
+LOC(2.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 3, accumulated bits -3
-	bl	L.3.13
+	bl	LOC(3.13)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -5
-	bl	L.4.11
+	bl	LOC(4.11)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-5*2+1), %o2
-
-L.4.11:
+	
+LOC(4.11):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-5*2-1), %o2
-
-
-L.3.13:
+	
+	
+LOC(3.13):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -7
-	bl	L.4.9
+	bl	LOC(4.9)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-7*2+1), %o2
-
-L.4.9:
+	
+LOC(4.9):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-7*2-1), %o2
-
-
-
-
+	
+	
+	
+	
 	9:
-Lend_regular_divide:
+LOC(end_regular_divide):
 	subcc	%o4, 1, %o4
-	bge	Ldivloop
+	bge	LOC(divloop)
 	tst	%o3
-	bl,a	Lgot_result
+	bl,a	LOC(got_result)
 	! non-restoring fixup here (one instruction only!)
 	sub	%o2, 1, %o2
 
 
-Lgot_result:
+LOC(got_result):
 	! check to see if answer should be < 0
-	tst	%g6
+	tst	%g3
 	bl,a	1f
 	sub %g0, %o2, %o2
 1:
 	retl
 	mov %o2, %o0
+
+END(.div)
diff --git a/sysdeps/sparc/sparc32/setjmp.S b/sysdeps/sparc/sparc32/setjmp.S
index 772ec723a3..58d94119ef 100644
--- a/sysdeps/sparc/sparc32/setjmp.S
+++ b/sysdeps/sparc/sparc32/setjmp.S
@@ -22,33 +22,29 @@
 #define _ASM 1
 #include <bits/setjmp.h>
 
+ENTRY(_setjmp)
+	b	1f
+	 set	0, %o1
+END(_setjmp)
+
+ENTRY(setjmp)
+	set	1, %o1
+END(setjmp)
+
 ENTRY (__sigsetjmp)
-	/* Save our SP and FP; in the delay slot of the jump, save our
-	   return PC.  Save the signal mask if requested with a tail-call
-	   for simplicity; it always returns zero.  */
-	ta   ST_FLUSH_WINDOWS
-#ifdef PIC
-	mov  %o7,%g1
-2:
-	call 1f
-	nop
 1:
-	sethi %hi(_GLOBAL_OFFSET_TABLE_-(2b-.)),%g2
-	or    %g2,%lo(_GLOBAL_OFFSET_TABLE_-(2b-.)),%g2
-	add   %g2,%o7,%g2
-	sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g3
-	or    %g3,%lo(C_SYMBOL_NAME (__sigjmp_save)), %g3
-	st    %sp, [%o0 + (JB_SP * 4)]
-	st    %fp, [%o0 + (JB_FP * 4)]
-	mov   %g1,%o7
-	ld    [%g2+%g3],%g1
-	jmp   %g1
-	st   %o7, [%o0+(JB_PC*4)]
-#else
-	sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g1
-	st %sp, [%o0 + (JB_SP*4)]
-	or %lo(C_SYMBOL_NAME (__sigjmp_save)), %g1, %g1
-	st %fp, [%o0 + (JB_FP*4)]
-	jmp %g1
-	 st %o7, [%o0 + (JB_PC*4)]
-#endif /* PIC */
+	/* Save our PC, SP and FP.  Save the signal mask if requested with
+	   a tail-call for simplicity; it always returns zero.  */
+	ta	ST_FLUSH_WINDOWS
+
+	st	%o7, [%o0 + (JB_PC * 4)]
+	st	%sp, [%o0 + (JB_SP * 4)]
+	st	%fp, [%o0 + (JB_FP * 4)]
+
+	mov	%o7, %g1
+	call	__sigjmp_save
+	 mov	%g1, %o7
+END(__sigsetjmp)
+
+weak_extern(_setjmp)
+weak_extern(setjmp)
diff --git a/sysdeps/sparc/sparc32/sparcv8/addmul_1.S b/sysdeps/sparc/sparc32/sparcv8/addmul_1.S
index fb9ea7cf0e..7b871b8e8b 100644
--- a/sysdeps/sparc/sparc32/sparcv8/addmul_1.S
+++ b/sysdeps/sparc/sparc32/sparcv8/addmul_1.S
@@ -27,54 +27,52 @@
 ! size		o2
 ! s2_limb	o3
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-.text
-	.align 4
-	.global	C_SYMBOL_NAME(__mpn_addmul_1)
-C_SYMBOL_NAME(__mpn_addmul_1):
-	orcc	%g0,%g0,%g2
+ENTRY(__mpn_addmul_1)
 	ld	[%o1+0],%o4	! 1
-
 	sll	%o2,4,%g1
-	and	%g1,(4-1)<<4,%g1
-#if PIC
+	orcc	%g0,%g0,%g2
 	mov	%o7,%g4			! Save return address register
-	call	1f
-	add	%o7,LL-1f,%g3
-1:	mov	%g4,%o7			! Restore return address register
-#else
-	sethi	%hi(LL),%g3
-	or	%g3,%lo(LL),%g3
-#endif
-	jmp	%g3+%g1
-	nop
-LL:
-LL00:	add	%o0,-4,%o0
-	b	Loop00		/* 4, 8, 12, ... */
-	add	%o1,-4,%o1
-	nop
-LL01:	b	Loop01		/* 1, 5, 9, ... */
+	and	%g1,(4-1)<<4,%g1
+1:	call	2f
+	 add	%o7,3f-1b,%g3
+2:	jmp	%g3+%g1
+	 mov	%g4,%o7			! Restore return address register
+
+	.align	4
+3:
+LOC(00):
+	add	%o0,-4,%o0
+	b	LOC(loop00)		/* 4, 8, 12, ... */
+	 add	%o1,-4,%o1
 	nop
+LOC(01):
+	b	LOC(loop01)		/* 1, 5, 9, ... */
+	 nop
 	nop
 	nop
-LL10:	add	%o0,-12,%o0	/* 2, 6, 10, ... */
-	b	Loop10
-	add	%o1,4,%o1
+LOC(10):
+	add	%o0,-12,%o0	/* 2, 6, 10, ... */
+	b	LOC(loop10)
+	 add	%o1,4,%o1
 	nop
-LL11:	add	%o0,-8,%o0	/* 3, 7, 11, ... */
-	b	Loop11
-	add	%o1,-8,%o1
+LOC(11):
+	add	%o0,-8,%o0	/* 3, 7, 11, ... */
+	b	LOC(loop11)
+	 add	%o1,-8,%o1
 	nop
 
-1:	addcc	%g3,%g2,%g3	! 1
+LOC(loop):
+	addcc	%g3,%g2,%g3	! 1
 	ld	[%o1+4],%o4	! 2
 	rd	%y,%g2		! 1
 	addx	%g0,%g2,%g2
 	ld	[%o0+0],%g1	! 2
 	addcc	%g1,%g3,%g3
 	st	%g3,[%o0+0]	! 1
-Loop00:	umul	%o4,%o3,%g3	! 2
+LOC(loop00):
+	umul	%o4,%o3,%g3	! 2
 	ld	[%o0+4],%g1	! 2
 	addxcc	%g3,%g2,%g3	! 2
 	ld	[%o1+8],%o4	! 3
@@ -83,7 +81,8 @@ Loop00:	umul	%o4,%o3,%g3	! 2
 	nop
 	addcc	%g1,%g3,%g3
 	st	%g3,[%o0+4]	! 2
-Loop11:	umul	%o4,%o3,%g3	! 3
+LOC(loop11):
+	umul	%o4,%o3,%g3	! 3
 	addxcc	%g3,%g2,%g3	! 3
 	ld	[%o1+12],%o4	! 4
 	rd	%y,%g2		! 3
@@ -92,7 +91,8 @@ Loop11:	umul	%o4,%o3,%g3	! 3
 	ld	[%o0+8],%g1	! 2
 	addcc	%g1,%g3,%g3
 	st	%g3,[%o0+8]	! 3
-Loop10:	umul	%o4,%o3,%g3	! 4
+LOC(loop10):
+	umul	%o4,%o3,%g3	! 4
 	addxcc	%g3,%g2,%g3	! 4
 	ld	[%o1+0],%o4	! 1
 	rd	%y,%g2		! 4
@@ -102,9 +102,10 @@ Loop10:	umul	%o4,%o3,%g3	! 4
 	st	%g3,[%o0+12]	! 4
 	add	%o0,16,%o0
 	addx	%g0,%g2,%g2
-Loop01:	addcc	%o2,-4,%o2
-	bg	1b
-	umul	%o4,%o3,%g3	! 1
+LOC(loop01):
+	addcc	%o2,-4,%o2
+	bg	LOC(loop)
+	 umul	%o4,%o3,%g3	! 1
 
 	addcc	%g3,%g2,%g3	! 4
 	rd	%y,%g2		! 4
@@ -112,13 +113,7 @@ Loop01:	addcc	%o2,-4,%o2
 	ld	[%o0+0],%g1	! 2
 	addcc	%g1,%g3,%g3
 	st	%g3,[%o0+0]	! 4
-	addx	%g0,%g2,%o0
-
 	retl
-	 nop
-
-
-!	umul, ld, addxcc, rd, st
-
-!	umul, ld, addxcc, rd, ld, addcc, st, addx
+	 addx	%g0,%g2,%o0
 
+END(__mpn_addmul_1)
diff --git a/sysdeps/sparc/sparc32/sparcv8/dotmul.S b/sysdeps/sparc/sparc32/sparcv8/dotmul.S
new file mode 100644
index 0000000000..9b20cc3684
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/dotmul.S
@@ -0,0 +1,13 @@
+/*
+ * Sparc v8 has multiply.
+ */
+
+#include <sysdep.h>
+
+ENTRY(.mul)
+
+	smul	%o0, %o1, %o0
+	retl
+	 rd	%y, %o1
+
+END(.mul)
diff --git a/sysdeps/sparc/sparc32/sparcv8/mul_1.S b/sysdeps/sparc/sparc32/sparcv8/mul_1.S
index b641feb453..801e224c15 100644
--- a/sysdeps/sparc/sparc32/sparcv8/mul_1.S
+++ b/sysdeps/sparc/sparc32/sparcv8/mul_1.S
@@ -27,73 +27,77 @@
 ! size		o2
 ! s2_limb	o3
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-.text
-	.align	8
-	.global	C_SYMBOL_NAME(__mpn_mul_1)
-C_SYMBOL_NAME(__mpn_mul_1):
+ENTRY(__mpn_mul_1)
 	sll	%o2,4,%g1
-	and	%g1,(4-1)<<4,%g1
-#if PIC
 	mov	%o7,%g4			! Save return address register
-	call	1f
-	add	%o7,LL-1f,%g3
-1:	mov	%g4,%o7			! Restore return address register
-#else
-	sethi	%hi(LL),%g3
-	or	%g3,%lo(LL),%g3
-#endif
+	and	%g1,(4-1)<<4,%g1
+1:	call	2f
+	 add	%o7,3f-1b,%g3
+2:	mov	%g4,%o7			! Restore return address register
 	jmp	%g3+%g1
-	ld	[%o1+0],%o4	! 1
-LL:
-LL00:	add	%o0,-4,%o0
+	 ld	[%o1+0],%o4	! 1
+
+	.align	4
+3:
+LOC(00):
+	add	%o0,-4,%o0
 	add	%o1,-4,%o1
-	b	Loop00		/* 4, 8, 12, ... */
-	orcc	%g0,%g0,%g2
-LL01:	b	Loop01		/* 1, 5, 9, ... */
-	orcc	%g0,%g0,%g2
+	b	LOC(loop00)		/* 4, 8, 12, ... */
+	 orcc	%g0,%g0,%g2
+LOC(01):
+	b	LOC(loop01)		/* 1, 5, 9, ... */
+	 orcc	%g0,%g0,%g2
 	nop
 	nop
-LL10:	add	%o0,-12,%o0	/* 2, 6, 10, ... */
+LOC(10):
+	add	%o0,-12,%o0	/* 2, 6, 10, ... */
 	add	%o1,4,%o1
-	b	Loop10
-	orcc	%g0,%g0,%g2
+	b	LOC(loop10)
+	 orcc	%g0,%g0,%g2
 	nop
-LL11:	add	%o0,-8,%o0	/* 3, 7, 11, ... */
+LOC(11):
+	add	%o0,-8,%o0	/* 3, 7, 11, ... */
 	add	%o1,-8,%o1
-	b	Loop11
-	orcc	%g0,%g0,%g2
+	b	LOC(loop11)
+	 orcc	%g0,%g0,%g2
 
-Loop:	addcc	%g3,%g2,%g3	! 1
+LOC(loop):
+	addcc	%g3,%g2,%g3	! 1
 	ld	[%o1+4],%o4	! 2
 	st	%g3,[%o0+0]	! 1
 	rd	%y,%g2		! 1
-Loop00:	umul	%o4,%o3,%g3	! 2
+LOC(loop00):
+	umul	%o4,%o3,%g3	! 2
 	addxcc	%g3,%g2,%g3	! 2
 	ld	[%o1+8],%o4	! 3
 	st	%g3,[%o0+4]	! 2
 	rd	%y,%g2		! 2
-Loop11:	umul	%o4,%o3,%g3	! 3
+LOC(loop11):
+	umul	%o4,%o3,%g3	! 3
 	addxcc	%g3,%g2,%g3	! 3
 	ld	[%o1+12],%o4	! 4
 	add	%o1,16,%o1
 	st	%g3,[%o0+8]	! 3
 	rd	%y,%g2		! 3
-Loop10:	umul	%o4,%o3,%g3	! 4
+LOC(loop10):
+	umul	%o4,%o3,%g3	! 4
 	addxcc	%g3,%g2,%g3	! 4
 	ld	[%o1+0],%o4	! 1
 	st	%g3,[%o0+12]	! 4
 	add	%o0,16,%o0
 	rd	%y,%g2		! 4
 	addx	%g0,%g2,%g2
-Loop01:	addcc	%o2,-4,%o2
-	bg	Loop
-	umul	%o4,%o3,%g3	! 1
+LOC(loop01):
+	addcc	%o2,-4,%o2
+	bg	LOC(loop)
+	 umul	%o4,%o3,%g3	! 1
 
 	addcc	%g3,%g2,%g3	! 4
 	st	%g3,[%o0+0]	! 4
 	rd	%y,%g2		! 4
-
 	retl
-	addx	%g0,%g2,%o0
+	 addx	%g0,%g2,%o0
+
+END(__mpn_mul_1)
diff --git a/sysdeps/sparc/sparc32/sparcv8/rem.S b/sysdeps/sparc/sparc32/sparcv8/rem.S
new file mode 100644
index 0000000000..061e80093c
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/rem.S
@@ -0,0 +1,18 @@
+/*
+ * Sparc v8 has divide.
+ */
+
+#include <sysdep.h>
+
+ENTRY(.rem)
+
+	sra	%o0, 31, %o2
+	wr	%o2, 0, %y
+	sdivcc	%o0, %o1, %o2
+	bvs,a	1f
+	 xnor	%o2, %g0, %o2
+1:	smul	%o2, %o1, %o2
+	retl
+	 sub	%o0, %o2, %o0
+
+END(.rem)
diff --git a/sysdeps/sparc/sparc32/sparcv8/sdiv.S b/sysdeps/sparc/sparc32/sparcv8/sdiv.S
new file mode 100644
index 0000000000..81c3ac1c97
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/sdiv.S
@@ -0,0 +1,14 @@
+/*
+ * Sparc v8 has divide.
+ */
+
+#include <sysdep.h>
+
+ENTRY(.div)
+
+	sra	%o0, 31, %o2
+	wr	%o2, 0, %y
+	ret
+	 sdiv	%o0, %o1, %o0
+
+END(.div)
diff --git a/sysdeps/sparc/sparc32/sparcv8/submul_1.S b/sysdeps/sparc/sparc32/sparcv8/submul_1.S
index e40119d011..c71f228511 100644
--- a/sysdeps/sparc/sparc32/sparcv8/submul_1.S
+++ b/sysdeps/sparc/sparc32/sparcv8/submul_1.S
@@ -27,12 +27,9 @@
 ! size		o2
 ! s2_limb	o3
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-.text
-	.align 4
-	.global	C_SYMBOL_NAME(__mpn_submul_1)
-C_SYMBOL_NAME(__mpn_submul_1):
+ENTRY(__mpn_submul_1)
 	sub	%g0,%o2,%o2		! negate ...
 	sll	%o2,2,%o2		! ... and scale size
 	sub	%o1,%o2,%o1		! o1 is offset s1_ptr
@@ -40,7 +37,8 @@ C_SYMBOL_NAME(__mpn_submul_1):
 
 	mov	0,%o0			! clear cy_limb
 
-Loop:	ld	[%o1+%o2],%o4
+LOC(loop):
+	ld	[%o1+%o2],%o4
 	ld	[%g1+%o2],%g2
 	umul	%o4,%o3,%o5
 	rd	%y,%g3
@@ -51,8 +49,10 @@ Loop:	ld	[%o1+%o2],%o4
 	st	%g2,[%g1+%o2]
 
 	addcc	%o2,4,%o2
-	bne	Loop
+	bne	LOC(loop)
 	 nop
 
 	retl
 	 nop
+
+END(__mpn_submul_1)
diff --git a/sysdeps/sparc/sparc32/sparcv8/udiv.S b/sysdeps/sparc/sparc32/sparcv8/udiv.S
new file mode 100644
index 0000000000..4e3cddc63f
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/udiv.S
@@ -0,0 +1,13 @@
+/*
+ * Sparc v8 has divide.
+ */
+
+#include <sysdep.h>
+
+ENTRY(.udiv)
+
+	wr	%g0, 0, %y
+	retl
+	 udiv	%o0, %o1, %o0
+
+END(.udiv)
diff --git a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S b/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S
index 49c2398806..5b0d42923a 100644
--- a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S
+++ b/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S
@@ -27,66 +27,75 @@
 
 #include "sysdep.h"
 
-	.text
-	.align 4
-	.global	C_SYMBOL_NAME(__udiv_qrnnd)
-C_SYMBOL_NAME(__udiv_qrnnd):
+ENTRY(__udiv_qrnnd)
 	tst	%o3
-	bneg	Largedivisor
+	bneg	LOC(largedivisor)
 	mov	8,%g1
 
-	b	Lp1
+	b	LOC(p1)
 	addxcc	%o2,%o2,%o2
 
-Lplop:	bcc	Ln1
+LOC(plop):
+	bcc	LOC(n1)
 	addxcc	%o2,%o2,%o2
-Lp1:	addx	%o1,%o1,%o1
+LOC(p1):
+	addx	%o1,%o1,%o1
 	subcc	%o1,%o3,%o4
-	bcc	Ln2
+	bcc	LOC(n2)
 	addxcc	%o2,%o2,%o2
-Lp2:	addx	%o1,%o1,%o1
+LOC(p2):
+	addx	%o1,%o1,%o1
 	subcc	%o1,%o3,%o4
-	bcc	Ln3
+	bcc	LOC(n3)
 	addxcc	%o2,%o2,%o2
-Lp3:	addx	%o1,%o1,%o1
+LOC(p3):
+	addx	%o1,%o1,%o1
 	subcc	%o1,%o3,%o4
-	bcc	Ln4
+	bcc	LOC(n4)
 	addxcc	%o2,%o2,%o2
-Lp4:	addx	%o1,%o1,%o1
+LOC(p4):
+	addx	%o1,%o1,%o1
 	addcc	%g1,-1,%g1
-	bne	Lplop
+	bne	LOC(plop)
 	subcc	%o1,%o3,%o4
-	bcc	Ln5
+	bcc	LOC(n5)
 	addxcc	%o2,%o2,%o2
-Lp5:	st	%o1,[%o0]
+LOC(p5):
+	st	%o1,[%o0]
 	retl
 	xnor	%g0,%o2,%o0
 
-Lnlop:	bcc	Lp1
+LOC(nlop):
+	bcc	LOC(p1)
 	addxcc	%o2,%o2,%o2
-Ln1:	addx	%o4,%o4,%o4
+LOC(n1):
+	addx	%o4,%o4,%o4
 	subcc	%o4,%o3,%o1
-	bcc	Lp2
+	bcc	LOC(p2)
 	addxcc	%o2,%o2,%o2
-Ln2:	addx	%o4,%o4,%o4
+LOC(n2):
+	addx	%o4,%o4,%o4
 	subcc	%o4,%o3,%o1
-	bcc	Lp3
+	bcc	LOC(p3)
 	addxcc	%o2,%o2,%o2
-Ln3:	addx	%o4,%o4,%o4
+LOC(n3):
+	addx	%o4,%o4,%o4
 	subcc	%o4,%o3,%o1
-	bcc	Lp4
+	bcc	LOC(p4)
 	addxcc	%o2,%o2,%o2
-Ln4:	addx	%o4,%o4,%o4
+LOC(n4):
+	addx	%o4,%o4,%o4
 	addcc	%g1,-1,%g1
-	bne	Lnlop
+	bne	LOC(nlop)
 	subcc	%o4,%o3,%o1
-	bcc	Lp5
+	bcc	LOC(p5)
 	addxcc	%o2,%o2,%o2
-Ln5:	st	%o4,[%o0]
+LOC(n5):
+	st	%o4,[%o0]
 	retl
 	xnor	%g0,%o2,%o0
 
-Largedivisor:
+LOC(largedivisor):
 	and	%o2,1,%o5	! %o5 = n0 & 1
 
 	srl	%o2,1,%o2
@@ -98,89 +107,109 @@ Largedivisor:
 	srl	%o3,1,%g3	! %g3 = floor(d / 2)
 	add	%g3,%g2,%g3	! %g3 = ceil(d / 2)
 
-	b	LLp1
+	b	LOC(Lp1)
 	addxcc	%o2,%o2,%o2
 
-LLplop:	bcc	LLn1
+LOC(Lplop):
+	bcc	LOC(Ln1)
 	addxcc	%o2,%o2,%o2
-LLp1:	addx	%o1,%o1,%o1
+LOC(Lp1):
+	addx	%o1,%o1,%o1
 	subcc	%o1,%g3,%o4
-	bcc	LLn2
+	bcc	LOC(Ln2)
 	addxcc	%o2,%o2,%o2
-LLp2:	addx	%o1,%o1,%o1
+LOC(Lp2):
+	addx	%o1,%o1,%o1
 	subcc	%o1,%g3,%o4
-	bcc	LLn3
+	bcc	LOC(Ln3)
 	addxcc	%o2,%o2,%o2
-LLp3:	addx	%o1,%o1,%o1
+LOC(Lp3):
+	addx	%o1,%o1,%o1
 	subcc	%o1,%g3,%o4
-	bcc	LLn4
+	bcc	LOC(Ln4)
 	addxcc	%o2,%o2,%o2
-LLp4:	addx	%o1,%o1,%o1
+LOC(Lp4):
+	addx	%o1,%o1,%o1
 	addcc	%g1,-1,%g1
-	bne	LLplop
+	bne	LOC(Lplop)
 	subcc	%o1,%g3,%o4
-	bcc	LLn5
+	bcc	LOC(Ln5)
 	addxcc	%o2,%o2,%o2
-LLp5:	add	%o1,%o1,%o1	! << 1
+LOC(Lp5):
+	add	%o1,%o1,%o1	! << 1
 	tst	%g2
-	bne	Oddp
+	bne	LOC(Oddp)
 	add	%o5,%o1,%o1
 	st	%o1,[%o0]
 	retl
 	xnor	%g0,%o2,%o0
 
-LLnlop:	bcc	LLp1
+LOC(Lnlop):
+	bcc	LOC(Lp1)
 	addxcc	%o2,%o2,%o2
-LLn1:	addx	%o4,%o4,%o4
+LOC(Ln1):
+	addx	%o4,%o4,%o4
 	subcc	%o4,%g3,%o1
-	bcc	LLp2
+	bcc	LOC(Lp2)
 	addxcc	%o2,%o2,%o2
-LLn2:	addx	%o4,%o4,%o4
+LOC(Ln2):
+	addx	%o4,%o4,%o4
 	subcc	%o4,%g3,%o1
-	bcc	LLp3
+	bcc	LOC(Lp3)
 	addxcc	%o2,%o2,%o2
-LLn3:	addx	%o4,%o4,%o4
+LOC(Ln3):
+	addx	%o4,%o4,%o4
 	subcc	%o4,%g3,%o1
-	bcc	LLp4
+	bcc	LOC(Lp4)
 	addxcc	%o2,%o2,%o2
-LLn4:	addx	%o4,%o4,%o4
+LOC(Ln4):
+	addx	%o4,%o4,%o4
 	addcc	%g1,-1,%g1
-	bne	LLnlop
+	bne	LOC(Lnlop)
 	subcc	%o4,%g3,%o1
-	bcc	LLp5
+	bcc	LOC(Lp5)
 	addxcc	%o2,%o2,%o2
-LLn5:	add	%o4,%o4,%o4	! << 1
+LOC(Ln5):
+	add	%o4,%o4,%o4	! << 1
 	tst	%g2
-	bne	Oddn
+	bne	LOC(Oddn)
 	add	%o5,%o4,%o4
 	st	%o4,[%o0]
 	retl
 	xnor	%g0,%o2,%o0
 
-Oddp:	xnor	%g0,%o2,%o2
+LOC(Oddp):
+	xnor	%g0,%o2,%o2
 	! q' in %o2. r' in %o1
 	addcc	%o1,%o2,%o1
-	bcc	LLp6
+	bcc	LOC(Lp6)
 	addx	%o2,0,%o2
 	sub	%o1,%o3,%o1
-LLp6:	subcc	%o1,%o3,%g0
-	bcs	LLp7
+LOC(Lp6):
+	subcc	%o1,%o3,%g0
+	bcs	LOC(Lp7)
 	subx	%o2,-1,%o2
 	sub	%o1,%o3,%o1
-LLp7:	st	%o1,[%o0]
+LOC(Lp7):
+	st	%o1,[%o0]
 	retl
 	mov	%o2,%o0
 
-Oddn:	xnor	%g0,%o2,%o2
+LOC(Oddn):
+	xnor	%g0,%o2,%o2
 	! q' in %o2. r' in %o4
 	addcc	%o4,%o2,%o4
-	bcc	LLn6
+	bcc	LOC(Ln6)
 	addx	%o2,0,%o2
 	sub	%o4,%o3,%o4
-LLn6:	subcc	%o4,%o3,%g0
-	bcs	LLn7
+LOC(Ln6):
+	subcc	%o4,%o3,%g0
+	bcs	LOC(Ln7)
 	subx	%o2,-1,%o2
 	sub	%o4,%o3,%o4
-LLn7:	st	%o4,[%o0]
+LOC(Ln7):
+	st	%o4,[%o0]
 	retl
 	mov	%o2,%o0
+
+END(__udiv_qrnnd)
diff --git a/sysdeps/sparc/sparc32/sparcv8/umul.S b/sysdeps/sparc/sparc32/sparcv8/umul.S
new file mode 100644
index 0000000000..cec454a7dd
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/umul.S
@@ -0,0 +1,13 @@
+/*
+ * Sparc v8 has multiply.
+ */
+
+#include <sysdep.h>
+
+ENTRY(.umul)
+
+	umul	%o0, %o1, %o0
+	retl
+	 rd	%y, %o1
+
+END(.umul)
diff --git a/sysdeps/sparc/sparc32/sparcv8/urem.S b/sysdeps/sparc/sparc32/sparcv8/urem.S
new file mode 100644
index 0000000000..81e123f3da
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv8/urem.S
@@ -0,0 +1,15 @@
+/*
+ * Sparc v8 has divide.
+ */
+
+#include <sysdep.h>
+
+ENTRY(.urem)
+
+	wr	%g0, 0, %y
+	udiv	%o0, %o1, %o2
+	umul	%o2, %o1, %o2
+	retl
+	 sub	%o0, %o2, %o0
+
+END(.urem)
diff --git a/sysdeps/sparc/sparc32/sub_n.S b/sysdeps/sparc/sparc32/sub_n.S
index b7a11958e2..74400600bd 100644
--- a/sysdeps/sparc/sparc32/sub_n.S
+++ b/sysdeps/sparc/sparc32/sub_n.S
@@ -1,20 +1,20 @@
 ! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
 ! store difference in a third limb vector.
-
+!
 ! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
+!
 ! This file is part of the GNU MP Library.
-
+!
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Library General Public License as published by
 ! the Free Software Foundation; either version 2 of the License, or (at your
 ! option) any later version.
-
+!
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 ! License for more details.
-
+!
 ! You should have received a copy of the GNU Library General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@@ -22,290 +22,308 @@
 
 
 ! INPUT PARAMETERS
-#define res_ptr	%o0
-#define s1_ptr	%o1
-#define s2_ptr	%o2
-#define size	%o3
+#define RES_PTR	%o0
+#define S1_PTR	%o1
+#define S2_PTR	%o2
+#define SIZE	%o3
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-	.text
-	.align	4
-	.global	C_SYMBOL_NAME(__mpn_sub_n)
-C_SYMBOL_NAME(__mpn_sub_n):
-	xor	s2_ptr,res_ptr,%g1
+ENTRY(__mpn_sub_n)
+	xor	S2_PTR,RES_PTR,%g1
 	andcc	%g1,4,%g0
-	bne	L1			! branch if alignment differs
+	bne	LOC(1)			! branch if alignment differs
 	nop
 ! **  V1a  **
-	andcc	res_ptr,4,%g0		! res_ptr unaligned? Side effect: cy=0
-	be	L_v1			! if no, branch
+	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
+	be	LOC(v1)			! if no, branch
 	nop
-/* Add least significant limb separately to align res_ptr and s2_ptr */
-	ld	[s1_ptr],%g4
-	add	s1_ptr,4,s1_ptr
-	ld	[s2_ptr],%g2
-	add	s2_ptr,4,s2_ptr
-	add	size,-1,size
+/* Add least significant limb separately to align RES_PTR and S2_PTR */
+	ld	[S1_PTR],%g4
+	add	S1_PTR,4,S1_PTR
+	ld	[S2_PTR],%g2
+	add	S2_PTR,4,S2_PTR
+	add	SIZE,-1,SIZE
 	subcc	%g4,%g2,%o4
-	st	%o4,[res_ptr]
-	add	res_ptr,4,res_ptr
-L_v1:	addx	%g0,%g0,%o4		! save cy in register
-	cmp	size,2			! if size < 2 ...
-	bl	Lend2			! ... branch to tail code
+	st	%o4,[RES_PTR]
+	add	RES_PTR,4,RES_PTR
+LOC(v1):
+	addx	%g0,%g0,%o4		! save cy in register
+	cmp	SIZE,2			! if SIZE < 2 ...
+	bl	LOC(end2)		! ... branch to tail code
 	subcc	%g0,%o4,%g0		! restore cy
 
-	ld	[s1_ptr+0],%g4
-	addcc	size,-10,size
-	ld	[s1_ptr+4],%g1
-	ldd	[s2_ptr+0],%g2
-	blt	Lfin1
+	ld	[S1_PTR+0],%g4
+	addcc	SIZE,-10,SIZE
+	ld	[S1_PTR+4],%g1
+	ldd	[S2_PTR+0],%g2
+	blt	LOC(fin1)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop1:	subxcc	%g4,%g2,%o4
-	ld	[s1_ptr+8],%g4
+LOC(loop1):
+	subxcc	%g4,%g2,%o4
+	ld	[S1_PTR+8],%g4
 	subxcc	%g1,%g3,%o5
-	ld	[s1_ptr+12],%g1
-	ldd	[s2_ptr+8],%g2
-	std	%o4,[res_ptr+0]
+	ld	[S1_PTR+12],%g1
+	ldd	[S2_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
 	subxcc	%g4,%g2,%o4
-	ld	[s1_ptr+16],%g4
+	ld	[S1_PTR+16],%g4
 	subxcc	%g1,%g3,%o5
-	ld	[s1_ptr+20],%g1
-	ldd	[s2_ptr+16],%g2
-	std	%o4,[res_ptr+8]
+	ld	[S1_PTR+20],%g1
+	ldd	[S2_PTR+16],%g2
+	std	%o4,[RES_PTR+8]
 	subxcc	%g4,%g2,%o4
-	ld	[s1_ptr+24],%g4
+	ld	[S1_PTR+24],%g4
 	subxcc	%g1,%g3,%o5
-	ld	[s1_ptr+28],%g1
-	ldd	[s2_ptr+24],%g2
-	std	%o4,[res_ptr+16]
+	ld	[S1_PTR+28],%g1
+	ldd	[S2_PTR+24],%g2
+	std	%o4,[RES_PTR+16]
 	subxcc	%g4,%g2,%o4
-	ld	[s1_ptr+32],%g4
+	ld	[S1_PTR+32],%g4
 	subxcc	%g1,%g3,%o5
-	ld	[s1_ptr+36],%g1
-	ldd	[s2_ptr+32],%g2
-	std	%o4,[res_ptr+24]
+	ld	[S1_PTR+36],%g1
+	ldd	[S2_PTR+32],%g2
+	std	%o4,[RES_PTR+24]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-8,size
-	add	s1_ptr,32,s1_ptr
-	add	s2_ptr,32,s2_ptr
-	add	res_ptr,32,res_ptr
-	bge	Loop1
+	addcc	SIZE,-8,SIZE
+	add	S1_PTR,32,S1_PTR
+	add	S2_PTR,32,S2_PTR
+	add	RES_PTR,32,RES_PTR
+	bge	LOC(loop1)
 	subcc	%g0,%o4,%g0		! restore cy
 
-Lfin1:	addcc	size,8-2,size
-	blt	Lend1
+LOC(fin1):
+	addcc	SIZE,8-2,SIZE
+	blt	LOC(end1)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 2 limbs until less than 2 limbs remain */
-Loope1:	subxcc	%g4,%g2,%o4
-	ld	[s1_ptr+8],%g4
+LOC(loope1):
+	subxcc	%g4,%g2,%o4
+	ld	[S1_PTR+8],%g4
 	subxcc	%g1,%g3,%o5
-	ld	[s1_ptr+12],%g1
-	ldd	[s2_ptr+8],%g2
-	std	%o4,[res_ptr+0]
+	ld	[S1_PTR+12],%g1
+	ldd	[S2_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-2,size
-	add	s1_ptr,8,s1_ptr
-	add	s2_ptr,8,s2_ptr
-	add	res_ptr,8,res_ptr
-	bge	Loope1
+	addcc	SIZE,-2,SIZE
+	add	S1_PTR,8,S1_PTR
+	add	S2_PTR,8,S2_PTR
+	add	RES_PTR,8,RES_PTR
+	bge	LOC(loope1)
 	subcc	%g0,%o4,%g0		! restore cy
-Lend1:	subxcc	%g4,%g2,%o4
+LOC(end1):
+	subxcc	%g4,%g2,%o4
 	subxcc	%g1,%g3,%o5
-	std	%o4,[res_ptr+0]
+	std	%o4,[RES_PTR+0]
 	addx	%g0,%g0,%o4		! save cy in register
 
-	andcc	size,1,%g0
-	be	Lret1
+	andcc	SIZE,1,%g0
+	be	LOC(ret1)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add last limb */
-	ld	[s1_ptr+8],%g4
-	ld	[s2_ptr+8],%g2
+	ld	[S1_PTR+8],%g4
+	ld	[S2_PTR+8],%g2
 	subxcc	%g4,%g2,%o4
-	st	%o4,[res_ptr+8]
+	st	%o4,[RES_PTR+8]
 
-Lret1:	retl
+LOC(ret1):
+	retl
 	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
 
-L1:	xor	s1_ptr,res_ptr,%g1
+LOC(1):	xor	S1_PTR,RES_PTR,%g1
 	andcc	%g1,4,%g0
-	bne	L2
+	bne	LOC(2)
 	nop
 ! **  V1b  **
-	andcc	res_ptr,4,%g0		! res_ptr unaligned? Side effect: cy=0
-	be	L_v1b			! if no, branch
+	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
+	be	LOC(v1b)		! if no, branch
 	nop
-/* Add least significant limb separately to align res_ptr and s1_ptr */
-	ld	[s2_ptr],%g4
-	add	s2_ptr,4,s2_ptr
-	ld	[s1_ptr],%g2
-	add	s1_ptr,4,s1_ptr
-	add	size,-1,size
+/* Add least significant limb separately to align RES_PTR and S1_PTR */
+	ld	[S2_PTR],%g4
+	add	S2_PTR,4,S2_PTR
+	ld	[S1_PTR],%g2
+	add	S1_PTR,4,S1_PTR
+	add	SIZE,-1,SIZE
 	subcc	%g2,%g4,%o4
-	st	%o4,[res_ptr]
-	add	res_ptr,4,res_ptr
-L_v1b:	addx	%g0,%g0,%o4		! save cy in register
-	cmp	size,2			! if size < 2 ...
-	bl	Lend2			! ... branch to tail code
+	st	%o4,[RES_PTR]
+	add	RES_PTR,4,RES_PTR
+LOC(v1b):
+	addx	%g0,%g0,%o4		! save cy in register
+	cmp	SIZE,2			! if SIZE < 2 ...
+	bl	LOC(end2)		! ... branch to tail code
 	subcc	%g0,%o4,%g0		! restore cy
 
-	ld	[s2_ptr+0],%g4
-	addcc	size,-10,size
-	ld	[s2_ptr+4],%g1
-	ldd	[s1_ptr+0],%g2
-	blt	Lfin1b
+	ld	[S2_PTR+0],%g4
+	addcc	SIZE,-10,SIZE
+	ld	[S2_PTR+4],%g1
+	ldd	[S1_PTR+0],%g2
+	blt	LOC(fin1b)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop1b:	subxcc	%g2,%g4,%o4
-	ld	[s2_ptr+8],%g4
+LOC(loop1b):
+	subxcc	%g2,%g4,%o4
+	ld	[S2_PTR+8],%g4
 	subxcc	%g3,%g1,%o5
-	ld	[s2_ptr+12],%g1
-	ldd	[s1_ptr+8],%g2
-	std	%o4,[res_ptr+0]
+	ld	[S2_PTR+12],%g1
+	ldd	[S1_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
 	subxcc	%g2,%g4,%o4
-	ld	[s2_ptr+16],%g4
+	ld	[S2_PTR+16],%g4
 	subxcc	%g3,%g1,%o5
-	ld	[s2_ptr+20],%g1
-	ldd	[s1_ptr+16],%g2
-	std	%o4,[res_ptr+8]
+	ld	[S2_PTR+20],%g1
+	ldd	[S1_PTR+16],%g2
+	std	%o4,[RES_PTR+8]
 	subxcc	%g2,%g4,%o4
-	ld	[s2_ptr+24],%g4
+	ld	[S2_PTR+24],%g4
 	subxcc	%g3,%g1,%o5
-	ld	[s2_ptr+28],%g1
-	ldd	[s1_ptr+24],%g2
-	std	%o4,[res_ptr+16]
+	ld	[S2_PTR+28],%g1
+	ldd	[S1_PTR+24],%g2
+	std	%o4,[RES_PTR+16]
 	subxcc	%g2,%g4,%o4
-	ld	[s2_ptr+32],%g4
+	ld	[S2_PTR+32],%g4
 	subxcc	%g3,%g1,%o5
-	ld	[s2_ptr+36],%g1
-	ldd	[s1_ptr+32],%g2
-	std	%o4,[res_ptr+24]
+	ld	[S2_PTR+36],%g1
+	ldd	[S1_PTR+32],%g2
+	std	%o4,[RES_PTR+24]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-8,size
-	add	s1_ptr,32,s1_ptr
-	add	s2_ptr,32,s2_ptr
-	add	res_ptr,32,res_ptr
-	bge	Loop1b
+	addcc	SIZE,-8,SIZE
+	add	S1_PTR,32,S1_PTR
+	add	S2_PTR,32,S2_PTR
+	add	RES_PTR,32,RES_PTR
+	bge	LOC(loop1b)
 	subcc	%g0,%o4,%g0		! restore cy
 
-Lfin1b:	addcc	size,8-2,size
-	blt	Lend1b
+LOC(fin1b):
+	addcc	SIZE,8-2,SIZE
+	blt	LOC(end1b)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 2 limbs until less than 2 limbs remain */
-Loope1b:subxcc	%g2,%g4,%o4
-	ld	[s2_ptr+8],%g4
+LOC(loope1b):
+	subxcc	%g2,%g4,%o4
+	ld	[S2_PTR+8],%g4
 	subxcc	%g3,%g1,%o5
-	ld	[s2_ptr+12],%g1
-	ldd	[s1_ptr+8],%g2
-	std	%o4,[res_ptr+0]
+	ld	[S2_PTR+12],%g1
+	ldd	[S1_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-2,size
-	add	s1_ptr,8,s1_ptr
-	add	s2_ptr,8,s2_ptr
-	add	res_ptr,8,res_ptr
-	bge	Loope1b
+	addcc	SIZE,-2,SIZE
+	add	S1_PTR,8,S1_PTR
+	add	S2_PTR,8,S2_PTR
+	add	RES_PTR,8,RES_PTR
+	bge	LOC(loope1b)
 	subcc	%g0,%o4,%g0		! restore cy
-Lend1b:	subxcc	%g2,%g4,%o4
+LOC(end1b):
+	subxcc	%g2,%g4,%o4
 	subxcc	%g3,%g1,%o5
-	std	%o4,[res_ptr+0]
+	std	%o4,[RES_PTR+0]
 	addx	%g0,%g0,%o4		! save cy in register
 
-	andcc	size,1,%g0
-	be	Lret1b
+	andcc	SIZE,1,%g0
+	be	LOC(ret1b)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add last limb */
-	ld	[s2_ptr+8],%g4
-	ld	[s1_ptr+8],%g2
+	ld	[S2_PTR+8],%g4
+	ld	[S1_PTR+8],%g2
 	subxcc	%g2,%g4,%o4
-	st	%o4,[res_ptr+8]
+	st	%o4,[RES_PTR+8]
 
-Lret1b:	retl
+LOC(ret1b):
+	retl
 	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
 
 ! **  V2  **
-/* If we come here, the alignment of s1_ptr and res_ptr as well as the
-   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
+   alignment of S2_PTR and RES_PTR differ.  Since there are only two ways
    things can be aligned (that we care about) we now know that the alignment
-   of s1_ptr and s2_ptr are the same.  */
+   of S1_PTR and S2_PTR are the same.  */
 
-L2:	cmp	size,1
-	be	Ljone
+LOC(2):	cmp	SIZE,1
+	be	LOC(jone)
 	nop
-	andcc	s1_ptr,4,%g0		! s1_ptr unaligned? Side effect: cy=0
-	be	L_v2			! if no, branch
+	andcc	S1_PTR,4,%g0		! S1_PTR unaligned? Side effect: cy=0
+	be	LOC(v2)			! if no, branch
 	nop
-/* Add least significant limb separately to align s1_ptr and s2_ptr */
-	ld	[s1_ptr],%g4
-	add	s1_ptr,4,s1_ptr
-	ld	[s2_ptr],%g2
-	add	s2_ptr,4,s2_ptr
-	add	size,-1,size
+/* Add least significant limb separately to align S1_PTR and S2_PTR */
+	ld	[S1_PTR],%g4
+	add	S1_PTR,4,S1_PTR
+	ld	[S2_PTR],%g2
+	add	S2_PTR,4,S2_PTR
+	add	SIZE,-1,SIZE
 	subcc	%g4,%g2,%o4
-	st	%o4,[res_ptr]
-	add	res_ptr,4,res_ptr
+	st	%o4,[RES_PTR]
+	add	RES_PTR,4,RES_PTR
 
-L_v2:	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-8,size
-	blt	Lfin2
+LOC(v2):
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	SIZE,-8,SIZE
+	blt	LOC(fin2)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop2:	ldd	[s1_ptr+0],%g2
-	ldd	[s2_ptr+0],%o4
+LOC(loop2):
+	ldd	[S1_PTR+0],%g2
+	ldd	[S2_PTR+0],%o4
 	subxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+0]
+	st	%g2,[RES_PTR+0]
 	subxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+4]
-	ldd	[s1_ptr+8],%g2
-	ldd	[s2_ptr+8],%o4
+	st	%g3,[RES_PTR+4]
+	ldd	[S1_PTR+8],%g2
+	ldd	[S2_PTR+8],%o4
 	subxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+8]
+	st	%g2,[RES_PTR+8]
 	subxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+12]
-	ldd	[s1_ptr+16],%g2
-	ldd	[s2_ptr+16],%o4
+	st	%g3,[RES_PTR+12]
+	ldd	[S1_PTR+16],%g2
+	ldd	[S2_PTR+16],%o4
 	subxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+16]
+	st	%g2,[RES_PTR+16]
 	subxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+20]
-	ldd	[s1_ptr+24],%g2
-	ldd	[s2_ptr+24],%o4
+	st	%g3,[RES_PTR+20]
+	ldd	[S1_PTR+24],%g2
+	ldd	[S2_PTR+24],%o4
 	subxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+24]
+	st	%g2,[RES_PTR+24]
 	subxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+28]
+	st	%g3,[RES_PTR+28]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-8,size
-	add	s1_ptr,32,s1_ptr
-	add	s2_ptr,32,s2_ptr
-	add	res_ptr,32,res_ptr
-	bge	Loop2
+	addcc	SIZE,-8,SIZE
+	add	S1_PTR,32,S1_PTR
+	add	S2_PTR,32,S2_PTR
+	add	RES_PTR,32,RES_PTR
+	bge	LOC(loop2)
 	subcc	%g0,%o4,%g0		! restore cy
 
-Lfin2:	addcc	size,8-2,size
-	blt	Lend2
+LOC(fin2):
+	addcc	SIZE,8-2,SIZE
+	blt	LOC(end2)
 	subcc	%g0,%o4,%g0		! restore cy
-Loope2:	ldd	[s1_ptr+0],%g2
-	ldd	[s2_ptr+0],%o4
+LOC(loope2):
+	ldd	[S1_PTR+0],%g2
+	ldd	[S2_PTR+0],%o4
 	subxcc	%g2,%o4,%g2
-	st	%g2,[res_ptr+0]
+	st	%g2,[RES_PTR+0]
 	subxcc	%g3,%o5,%g3
-	st	%g3,[res_ptr+4]
+	st	%g3,[RES_PTR+4]
 	addx	%g0,%g0,%o4		! save cy in register
-	addcc	size,-2,size
-	add	s1_ptr,8,s1_ptr
-	add	s2_ptr,8,s2_ptr
-	add	res_ptr,8,res_ptr
-	bge	Loope2
+	addcc	SIZE,-2,SIZE
+	add	S1_PTR,8,S1_PTR
+	add	S2_PTR,8,S2_PTR
+	add	RES_PTR,8,RES_PTR
+	bge	LOC(loope2)
 	subcc	%g0,%o4,%g0		! restore cy
-Lend2:	andcc	size,1,%g0
-	be	Lret2
+LOC(end2):
+	andcc	SIZE,1,%g0
+	be	LOC(ret2)
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add last limb */
-Ljone:	ld	[s1_ptr],%g4
-	ld	[s2_ptr],%g2
+LOC(jone):
+	ld	[S1_PTR],%g4
+	ld	[S2_PTR],%g2
 	subxcc	%g4,%g2,%o4
-	st	%o4,[res_ptr]
+	st	%o4,[RES_PTR]
 
-Lret2:	retl
+LOC(ret2):
+	retl
 	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
+
+END(__mpn_sub_n)
diff --git a/sysdeps/sparc/sparc32/submul_1.S b/sysdeps/sparc/sparc32/submul_1.S
index a8ebd501a7..805699b7b2 100644
--- a/sysdeps/sparc/sparc32/submul_1.S
+++ b/sysdeps/sparc/sparc32/submul_1.S
@@ -1,20 +1,20 @@
 ! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
 ! the result from a second limb vector.
-
-! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
+!
 ! This file is part of the GNU MP Library.
-
+!
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Library General Public License as published by
 ! the Free Software Foundation; either version 2 of the License, or (at your
 ! option) any later version.
-
+!
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 ! License for more details.
-
+!
 ! You should have received a copy of the GNU Library General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@@ -22,17 +22,14 @@
 
 
 ! INPUT PARAMETERS
-! res_ptr	o0
-! s1_ptr	o1
-! size		o2
-! s2_limb	o3
+! RES_PTR	o0
+! S1_PTR	o1
+! SIZE		o2
+! S2_LIMB	o3
 
-#include "sysdep.h"
+#include <sysdep.h>
 
-.text
-	.align 4
-	.global	C_SYMBOL_NAME(__mpn_submul_1)
-C_SYMBOL_NAME(__mpn_submul_1):
+ENTRY(__mpn_submul_1)
 	! Make S1_PTR and RES_PTR point at the end of their blocks
 	! and put (- 4 x SIZE) in index/loop counter.
 	sll	%o2,2,%o2
@@ -41,19 +38,19 @@ C_SYMBOL_NAME(__mpn_submul_1):
 	sub	%g0,%o2,%o2
 
 	cmp	%o3,0xfff
-	bgu	Large
+	bgu	LOC(large)
 	nop
 
 	ld	[%o1+%o2],%o5
 	mov	0,%o0
-	b	L0
+	b	LOC(0)
 	 add	%o4,-4,%o4
-Loop0:
+LOC(loop0):
 	subcc	%o5,%g1,%g1
 	ld	[%o1+%o2],%o5
 	addx	%o0,%g0,%o0
 	st	%g1,[%o4+%o2]
-L0:	wr	%g0,%o3,%y
+LOC(0):	wr	%g0,%o3,%y
 	sra	%o5,31,%g2
 	and	%o3,%g2,%g2
 	andcc	%g1,0,%g1
@@ -79,7 +76,7 @@ L0:	wr	%g0,%o3,%y
 	addcc	%g1,%o0,%g1
 	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
 	addcc	%o2,4,%o2	! loop counter
-	bne	Loop0
+	bne	LOC(loop0)
 	 ld	[%o4+%o2],%o5
 
 	subcc	%o5,%g1,%g1
@@ -88,17 +85,18 @@ L0:	wr	%g0,%o3,%y
 	st	%g1,[%o4+%o2]
 
 
-Large:	ld	[%o1+%o2],%o5
+LOC(large):
+	ld	[%o1+%o2],%o5
 	mov	0,%o0
 	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
-	b	L1
+	b	LOC(1)
 	 add	%o4,-4,%o4
-Loop:
+LOC(loop):
 	subcc	%o5,%g3,%g3
 	ld	[%o1+%o2],%o5
 	addx	%o0,%g0,%o0
 	st	%g3,[%o4+%o2]
-L1:	wr	%g0,%o5,%y
+LOC(1):	wr	%g0,%o5,%y
 	and	%o5,%g4,%g2
 	andcc	%g0,%g0,%g1
 	mulscc	%g1,%o3,%g1
@@ -138,10 +136,12 @@ L1:	wr	%g0,%o5,%y
 	addcc	%g3,%o0,%g3
 	addx	%g2,%g1,%o0
 	addcc	%o2,4,%o2
-	bne	Loop
+	bne	LOC(loop)
 	 ld	[%o4+%o2],%o5
 
 	subcc	%o5,%g3,%g3
 	addx	%o0,%g0,%o0
 	retl
 	st	%g3,[%o4+%o2]
+
+END(__mpn_submul_1)
diff --git a/sysdeps/sparc/sparc32/udiv.S b/sysdeps/sparc/sparc32/udiv.S
index 4a7d1526b0..0dd48f32b5 100644
--- a/sysdeps/sparc/sparc32/udiv.S
+++ b/sysdeps/sparc/sparc32/udiv.S
@@ -37,16 +37,8 @@
 
 
 
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
 #include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
 
 ENTRY(.udiv)
 
@@ -63,11 +55,11 @@ ENTRY(.udiv)
 
 1:
 	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	Lgot_result		! (and algorithm fails otherwise)
+	blu	LOC(got_result)		! (and algorithm fails otherwise)
 	clr	%o2
 	sethi	%hi(1 << (32 - 4 - 1)), %g1
 	cmp	%o3, %g1
-	blu	Lnot_really_big
+	blu	LOC(not_really_big)
 	clr	%o4
 
 	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
@@ -78,15 +70,15 @@ ENTRY(.udiv)
 	1:
 		cmp	%o5, %g1
 		bgeu	3f
-		mov	1, %g7
+		mov	1, %g2
 		sll	%o5, 4, %o5
 		b	1b
 		add	%o4, 1, %o4
 
-	! Now compute %g7.
+	! Now compute %g2.
 	2:	addcc	%o5, %o5, %o5
-		bcc	Lnot_too_big
-		add	%g7, 1, %g7
+		bcc	LOC(not_too_big)
+		add	%g2, 1, %g2
 
 		! We get here if the %o1 overflowed while shifting.
 		! This means that %o3 has the high-order bit set.
@@ -94,20 +86,20 @@ ENTRY(.udiv)
 		sll	%g1, 4, %g1	! high order bit
 		srl	%o5, 1, %o5		! rest of %o5
 		add	%o5, %g1, %o5
-		b	Ldo_single_div
-		sub	%g7, 1, %g7
+		b	LOC(do_single_div)
+		sub	%g2, 1, %g2
 
-	Lnot_too_big:
+	LOC(not_too_big):
 	3:	cmp	%o5, %o3
 		blu	2b
 		nop
-		be	Ldo_single_div
+		be	LOC(do_single_div)
 		nop
 	/* NB: these are commented out in the V8-Sparc manual as well */
 	/* (I do not understand this) */
 	! %o5 > %o3: went too far: back up 1 step
 	!	srl	%o5, 1, %o5
-	!	dec	%g7
+	!	dec	%g2
 	! do single-bit divide steps
 	!
 	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
@@ -116,15 +108,15 @@ ENTRY(.udiv)
 	! order bit set in the first step, just falling into the regular
 	! division loop will mess up the first time around.
 	! So we unroll slightly...
-	Ldo_single_div:
-		subcc	%g7, 1, %g7
-		bl	Lend_regular_divide
+	LOC(do_single_div):
+		subcc	%g2, 1, %g2
+		bl	LOC(end_regular_divide)
 		nop
 		sub	%o3, %o5, %o3
 		mov	1, %o2
-		b	Lend_single_divloop
+		b	LOC(end_single_divloop)
 		nop
-	Lsingle_divloop:
+	LOC(single_divloop):
 		sll	%o2, 1, %o2
 		bl	1f
 		srl	%o5, 1, %o5
@@ -136,217 +128,219 @@ ENTRY(.udiv)
 		add	%o3, %o5, %o3
 		sub	%o2, 1, %o2
 	2:
-	Lend_single_divloop:
-		subcc	%g7, 1, %g7
-		bge	Lsingle_divloop
+	LOC(end_single_divloop):
+		subcc	%g2, 1, %g2
+		bge	LOC(single_divloop)
 		tst	%o3
-		b,a	Lend_regular_divide
+		b,a	LOC(end_regular_divide)
 
-Lnot_really_big:
+LOC(not_really_big):
 1:
 	sll	%o5, 4, %o5
 	cmp	%o5, %o3
 	bleu	1b
 	addcc	%o4, 1, %o4
-	be	Lgot_result
+	be	LOC(got_result)
 	sub	%o4, 1, %o4
 
 	tst	%o3	! set up for initial iteration
-Ldivloop:
+LOC(divloop):
 	sll	%o2, 4, %o2
 		! depth 1, accumulated bits 0
-	bl	L.1.16
+	bl	LOC(1.16)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 2, accumulated bits 1
-	bl	L.2.17
+	bl	LOC(2.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 3, accumulated bits 3
-	bl	L.3.19
+	bl	LOC(3.19)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 7
-	bl	L.4.23
+	bl	LOC(4.23)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (7*2+1), %o2
-
-L.4.23:
+	
+LOC(4.23):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (7*2-1), %o2
-
-
-L.3.19:
+	
+	
+LOC(3.19):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 5
-	bl	L.4.21
+	bl	LOC(4.21)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (5*2+1), %o2
-
-L.4.21:
+	
+LOC(4.21):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (5*2-1), %o2
-
-
-
-L.2.17:
+	
+	
+	
+LOC(2.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 3, accumulated bits 1
-	bl	L.3.17
+	bl	LOC(3.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 3
-	bl	L.4.19
+	bl	LOC(4.19)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (3*2+1), %o2
-
-L.4.19:
+	
+LOC(4.19):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (3*2-1), %o2
-
-
-L.3.17:
+	
+	
+LOC(3.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 1
-	bl	L.4.17
+	bl	LOC(4.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (1*2+1), %o2
-
-L.4.17:
+	
+LOC(4.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (1*2-1), %o2
-
-
-
-
-L.1.16:
+	
+	
+	
+	
+LOC(1.16):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 2, accumulated bits -1
-	bl	L.2.15
+	bl	LOC(2.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 3, accumulated bits -1
-	bl	L.3.15
+	bl	LOC(3.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -1
-	bl	L.4.15
+	bl	LOC(4.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-1*2+1), %o2
-
-L.4.15:
+	
+LOC(4.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-1*2-1), %o2
-
-
-L.3.15:
+	
+	
+LOC(3.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -3
-	bl	L.4.13
+	bl	LOC(4.13)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-3*2+1), %o2
-
-L.4.13:
+	
+LOC(4.13):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-3*2-1), %o2
-
-
-
-L.2.15:
+	
+	
+	
+LOC(2.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 3, accumulated bits -3
-	bl	L.3.13
+	bl	LOC(3.13)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -5
-	bl	L.4.11
+	bl	LOC(4.11)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-5*2+1), %o2
-
-L.4.11:
+	
+LOC(4.11):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-5*2-1), %o2
-
-
-L.3.13:
+	
+	
+LOC(3.13):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -7
-	bl	L.4.9
+	bl	LOC(4.9)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-7*2+1), %o2
-
-L.4.9:
+	
+LOC(4.9):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-7*2-1), %o2
-
-
-
-
+	
+	
+	
+	
 	9:
-Lend_regular_divide:
+LOC(end_regular_divide):
 	subcc	%o4, 1, %o4
-	bge	Ldivloop
+	bge	LOC(divloop)
 	tst	%o3
-	bl,a	Lgot_result
+	bl,a	LOC(got_result)
 	! non-restoring fixup here (one instruction only!)
 	sub	%o2, 1, %o2
 
 
-Lgot_result:
+LOC(got_result):
 
 	retl
 	mov %o2, %o0
+
+END(.udiv)
diff --git a/sysdeps/sparc/sparc32/udiv_qrnnd.S b/sysdeps/sparc/sparc32/udiv_qrnnd.S
index 5b67f874c3..2fbb8a5f54 100644
--- a/sysdeps/sparc/sparc32/udiv_qrnnd.S
+++ b/sysdeps/sparc/sparc32/udiv_qrnnd.S
@@ -1,50 +1,52 @@
 ! SPARC  __udiv_qrnnd division support, used from longlong.h.
-
+!
 ! Copyright (C) 1993, 1994, 1997 Free Software Foundation, Inc.
-
+!
 ! This file is part of the GNU MP Library.
-
+!
 ! The GNU MP Library is free software; you can redistribute it and/or modify
 ! it under the terms of the GNU Library General Public License as published by
 ! the Free Software Foundation; either version 2 of the License, or (at your
 ! option) any later version.
-
+!
 ! The GNU MP Library is distributed in the hope that it will be useful, but
 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 ! License for more details.
-
+!
 ! You should have received a copy of the GNU Library General Public License
 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 ! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
+!
 ! Added PIC support - May/96, Miguel de Icaza
-
+!
 ! INPUT PARAMETERS
 ! rem_ptr	i0
 ! n1		i1
 ! n0		i2
 ! d		i3
 
-#include "sysdep.h"
+#include <sysdep.h>
 #undef ret	/* Kludge for glibc */
 
+#ifdef PIC
 	.text
+#else
+	.section .rodata,#alloc
+#endif
 	.align	8
 
 	.type two_to_32,@object
+	.size two_to_32,8
 two_to_32:
 	.double	0r4294967296
-	.size two_to_32,8
 
 	.type two_to_31,@object
+	.size two_to_31,8
 two_to_31:
 	.double	0r2147483648
-	.size two_to_31,8
 
-	.align	4
-	.global	__udiv_qrnnd
-	.type	__udiv_qrnnd,@function
+	.text
 ENTRY(__udiv_qrnnd)
 	!#PROLOGUE# 0
 	save	%sp,-104,%sp
@@ -52,57 +54,58 @@ ENTRY(__udiv_qrnnd)
 	st	%i1,[%fp-8]
 	ld	[%fp-8],%f10
 #ifdef PIC
-.Lbase:	call	1f
+LOC(base):
+	call	1f
 	fitod	%f10,%f4
-1:	ldd	[%o7-(.Lbase-two_to_32)],%f8
+1:	ldd	[%o7-(LOC(base)-two_to_32)],%f8
 #else
 	sethi	%hi(two_to_32),%o7
 	fitod	%f10,%f4
 	ldd	[%o7+%lo(two_to_32)],%f8
 #endif
 	cmp	%i1,0
-	bge	L248
+	bge	LOC(248)
 	mov	%i0,%i5
 	faddd	%f4,%f8,%f4
-.L248:
+LOC(248):
 	st	%i2,[%fp-8]
 	ld	[%fp-8],%f10
 	fmuld	%f4,%f8,%f6
 	cmp	%i2,0
-	bge	L249
+	bge	LOC(249)
 	fitod	%f10,%f2
 	faddd	%f2,%f8,%f2
-.L249:
+LOC(249):
 	st	%i3,[%fp-8]
 	faddd	%f6,%f2,%f2
 	ld	[%fp-8],%f10
 	cmp	%i3,0
-	bge	L250
+	bge	LOC(250)
 	fitod	%f10,%f4
 	faddd	%f4,%f8,%f4
-.L250:
+LOC(250):
 	fdivd	%f2,%f4,%f2
 #ifdef PIC
-	ldd	[%o7-(.Lbase-two_to_31)],%f4
+	ldd	[%o7-(LOC(base)-two_to_31)],%f4
 #else
 	sethi	%hi(two_to_31),%o7
 	ldd	[%o7+%lo(two_to_31)],%f4
 #endif
 	fcmped	%f2,%f4
 	nop
-	fbge,a	L251
+	fbge,a	LOC(251)
 	fsubd	%f2,%f4,%f2
 	fdtoi	%f2,%f2
 	st	%f2,[%fp-8]
-	b	L252
+	b	LOC(252)
 	ld	[%fp-8],%i4
-.L251:
+LOC(251):
 	fdtoi	%f2,%f2
 	st	%f2,[%fp-8]
 	ld	[%fp-8],%i4
 	sethi	%hi(-2147483648),%g2
 	xor	%i4,%g2,%i4
-.L252:
+LOC(252):
 	wr	%g0,%i4,%y
 	sra	%i3,31,%g2
 	and	%i4,%g2,%g2
@@ -144,7 +147,7 @@ ENTRY(__udiv_qrnnd)
 	rd	%y,%g3
 	subcc	%i2,%g3,%o7
 	subxcc	%i1,%i0,%g0
-	be	L253
+	be	LOC(253)
 	cmp	%o7,%i3
 
 	add	%i4,-1,%i0
@@ -152,14 +155,14 @@ ENTRY(__udiv_qrnnd)
 	st	%o7,[%i5]
 	ret
 	restore
-.L253:
-	blu	L246
+LOC(253):
+	blu	LOC(246)
 	mov	%i4,%i0
 	add	%i4,1,%i0
 	sub	%o7,%i3,%o7
-.L246:
+LOC(246):
 	st	%o7,[%i5]
 	ret
 	restore
 
-	.size	__udiv_qrnnd, .-__udiv_qrnnd
+END(__udiv_qrnnd)
diff --git a/sysdeps/sparc/sparc32/umul.S b/sysdeps/sparc/sparc32/umul.S
index 7a26c295cb..096554a2bc 100644
--- a/sysdeps/sparc/sparc32/umul.S
+++ b/sysdeps/sparc/sparc32/umul.S
@@ -14,13 +14,14 @@
  *	bnz	overflow	(or tnz)
  */
 
-#include "DEFS.h"
-FUNC(.umul)
+#include <sysdep.h>
+
+ENTRY(.umul)
 	or	%o0, %o1, %o4
-	mov	%o0, %y		! multiplier -> Y
-	andncc	%o4, 0xfff, %g0	! test bits 12..31 of *both* args
-	be	Lmul_shortway	! if zero, can do it the short way
-	andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
+	mov	%o0, %y			! multiplier -> Y
+	andncc	%o4, 0xfff, %g0		! test bits 12..31 of *both* args
+	be	LOC(mul_shortway)	! if zero, can do it the short way
+	 andcc	%g0, %g0, %o4		! zero the partial product; clear N & V
 
 	/*
 	 * Long multiply.  32 steps, followed by a final shift step.
@@ -59,7 +60,6 @@ FUNC(.umul)
 	mulscc	%o4, %o1, %o4	! 32
 	mulscc	%o4, %g0, %o4	! final shift
 
-
 	/*
 	 * Normally, with the shift-and-add approach, if both numbers are
 	 * positive you get the correct result.  With 32-bit two's-complement
@@ -97,20 +97,20 @@ FUNC(.umul)
 #if 0
 	tst	%o1
 	bl,a	1f		! if %o1 < 0 (high order bit = 1),
-	add	%o4, %o0, %o4	! %o4 += %o0 (add y to upper half)
+	 add	%o4, %o0, %o4	! %o4 += %o0 (add y to upper half)
 1:	rd	%y, %o0		! get lower half of product
 	retl
-	addcc	%o4, %g0, %o1	! put upper half in place and set Z for %o1==0
+	 addcc	%o4, %g0, %o1	! put upper half in place and set Z for %o1==0
 #else
 	/* Faster code from tege@sics.se.  */
 	sra	%o1, 31, %o2	! make mask from sign bit
 	and	%o0, %o2, %o2	! %o2 = 0 or %o0, depending on sign of %o1
 	rd	%y, %o0		! get lower half of product
 	retl
-	addcc	%o4, %o2, %o1	! add compensation and put upper half in place
+	 addcc	%o4, %o2, %o1	! add compensation and put upper half in place
 #endif
 
-Lmul_shortway:
+LOC(mul_shortway):
 	/*
 	 * Short multiply.  12 steps, followed by a final shift step.
 	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
@@ -150,4 +150,6 @@ Lmul_shortway:
 	srl	%o5, 20, %o5	! shift low bits right 20
 	or	%o5, %o0, %o0
 	retl
-	addcc	%g0, %g0, %o1	! %o1 = zero, and set Z
+	 addcc	%g0, %g0, %o1	! %o1 = zero, and set Z
+
+END(.umul)
diff --git a/sysdeps/sparc/sparc32/urem.S b/sysdeps/sparc/sparc32/urem.S
index e72c33dd63..5644e7a892 100644
--- a/sysdeps/sparc/sparc32/urem.S
+++ b/sysdeps/sparc/sparc32/urem.S
@@ -37,16 +37,8 @@
 
 
 
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
 #include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
 
 ENTRY(.urem)
 
@@ -63,11 +55,11 @@ ENTRY(.urem)
 
 1:
 	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	Lgot_result		! (and algorithm fails otherwise)
+	blu	LOC(got_result)		! (and algorithm fails otherwise)
 	clr	%o2
 	sethi	%hi(1 << (32 - 4 - 1)), %g1
 	cmp	%o3, %g1
-	blu	Lnot_really_big
+	blu	LOC(not_really_big)
 	clr	%o4
 
 	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
@@ -78,15 +70,15 @@ ENTRY(.urem)
 	1:
 		cmp	%o5, %g1
 		bgeu	3f
-		mov	1, %g7
+		mov	1, %g2
 		sll	%o5, 4, %o5
 		b	1b
 		add	%o4, 1, %o4
 
-	! Now compute %g7.
+	! Now compute %g2.
 	2:	addcc	%o5, %o5, %o5
-		bcc	Lnot_too_big
-		add	%g7, 1, %g7
+		bcc	LOC(not_too_big)
+		add	%g2, 1, %g2
 
 		! We get here if the %o1 overflowed while shifting.
 		! This means that %o3 has the high-order bit set.
@@ -94,20 +86,20 @@ ENTRY(.urem)
 		sll	%g1, 4, %g1	! high order bit
 		srl	%o5, 1, %o5		! rest of %o5
 		add	%o5, %g1, %o5
-		b	Ldo_single_div
-		sub	%g7, 1, %g7
+		b	LOC(do_single_div)
+		sub	%g2, 1, %g2
 
-	Lnot_too_big:
+	LOC(not_too_big):
 	3:	cmp	%o5, %o3
 		blu	2b
 		nop
-		be	Ldo_single_div
+		be	LOC(do_single_div)
 		nop
 	/* NB: these are commented out in the V8-Sparc manual as well */
 	/* (I do not understand this) */
 	! %o5 > %o3: went too far: back up 1 step
 	!	srl	%o5, 1, %o5
-	!	dec	%g7
+	!	dec	%g2
 	! do single-bit divide steps
 	!
 	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
@@ -116,15 +108,15 @@ ENTRY(.urem)
 	! order bit set in the first step, just falling into the regular
 	! division loop will mess up the first time around.
 	! So we unroll slightly...
-	Ldo_single_div:
-		subcc	%g7, 1, %g7
-		bl	Lend_regular_divide
+	LOC(do_single_div):
+		subcc	%g2, 1, %g2
+		bl	LOC(end_regular_divide)
 		nop
 		sub	%o3, %o5, %o3
 		mov	1, %o2
-		b	Lend_single_divloop
+		b	LOC(end_single_divloop)
 		nop
-	Lsingle_divloop:
+	LOC(single_divloop):
 		sll	%o2, 1, %o2
 		bl	1f
 		srl	%o5, 1, %o5
@@ -136,217 +128,219 @@ ENTRY(.urem)
 		add	%o3, %o5, %o3
 		sub	%o2, 1, %o2
 	2:
-	Lend_single_divloop:
-		subcc	%g7, 1, %g7
-		bge	Lsingle_divloop
+	LOC(end_single_divloop):
+		subcc	%g2, 1, %g2
+		bge	LOC(single_divloop)
 		tst	%o3
-		b,a	Lend_regular_divide
+		b,a	LOC(end_regular_divide)
 
-Lnot_really_big:
+LOC(not_really_big):
 1:
 	sll	%o5, 4, %o5
 	cmp	%o5, %o3
 	bleu	1b
 	addcc	%o4, 1, %o4
-	be	Lgot_result
+	be	LOC(got_result)
 	sub	%o4, 1, %o4
 
 	tst	%o3	! set up for initial iteration
-Ldivloop:
+LOC(divloop):
 	sll	%o2, 4, %o2
 		! depth 1, accumulated bits 0
-	bl	L.1.16
+	bl	LOC(1.16)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 2, accumulated bits 1
-	bl	L.2.17
+	bl	LOC(2.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 3, accumulated bits 3
-	bl	L.3.19
+	bl	LOC(3.19)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 7
-	bl	L.4.23
+	bl	LOC(4.23)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (7*2+1), %o2
-
-L.4.23:
+	
+LOC(4.23):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (7*2-1), %o2
-
-
-L.3.19:
+	
+	
+LOC(3.19):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 5
-	bl	L.4.21
+	bl	LOC(4.21)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (5*2+1), %o2
-
-L.4.21:
+	
+LOC(4.21):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (5*2-1), %o2
-
-
-
-L.2.17:
+	
+	
+	
+LOC(2.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 3, accumulated bits 1
-	bl	L.3.17
+	bl	LOC(3.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 3
-	bl	L.4.19
+	bl	LOC(4.19)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (3*2+1), %o2
-
-L.4.19:
+	
+LOC(4.19):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (3*2-1), %o2
-
-
-L.3.17:
+	
+	
+LOC(3.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits 1
-	bl	L.4.17
+	bl	LOC(4.17)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (1*2+1), %o2
-
-L.4.17:
+	
+LOC(4.17):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (1*2-1), %o2
-
-
-
-
-L.1.16:
+	
+	
+	
+	
+LOC(1.16):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 2, accumulated bits -1
-	bl	L.2.15
+	bl	LOC(2.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 3, accumulated bits -1
-	bl	L.3.15
+	bl	LOC(3.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -1
-	bl	L.4.15
+	bl	LOC(4.15)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-1*2+1), %o2
-
-L.4.15:
+	
+LOC(4.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-1*2-1), %o2
-
-
-L.3.15:
+	
+	
+LOC(3.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -3
-	bl	L.4.13
+	bl	LOC(4.13)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-3*2+1), %o2
-
-L.4.13:
+	
+LOC(4.13):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-3*2-1), %o2
-
-
-
-L.2.15:
+	
+	
+	
+LOC(2.15):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 3, accumulated bits -3
-	bl	L.3.13
+	bl	LOC(3.13)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -5
-	bl	L.4.11
+	bl	LOC(4.11)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-5*2+1), %o2
-
-L.4.11:
+	
+LOC(4.11):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-5*2-1), %o2
-
-
-L.3.13:
+	
+	
+LOC(3.13):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 			! depth 4, accumulated bits -7
-	bl	L.4.9
+	bl	LOC(4.9)
 	srl	%o5,1,%o5
 	! remainder is positive
 	subcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-7*2+1), %o2
-
-L.4.9:
+	
+LOC(4.9):
 	! remainder is negative
 	addcc	%o3,%o5,%o3
 		b	9f
 		add	%o2, (-7*2-1), %o2
-
-
-
-
+	
+	
+	
+	
 	9:
-Lend_regular_divide:
+LOC(end_regular_divide):
 	subcc	%o4, 1, %o4
-	bge	Ldivloop
+	bge	LOC(divloop)
 	tst	%o3
-	bl,a	Lgot_result
+	bl,a	LOC(got_result)
 	! non-restoring fixup here (one instruction only!)
 	add	%o3, %o1, %o3
 
 
-Lgot_result:
+LOC(got_result):
 
 	retl
 	mov %o3, %o0
+
+END(.urem)
diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h
index eb46ba92be..e302f4a1fe 100644
--- a/sysdeps/sparc/sparc64/dl-machine.h
+++ b/sysdeps/sparc/sparc64/dl-machine.h
@@ -220,6 +220,13 @@ elf_machine_lazy_rel (struct link_map *map, const Elf64_Rela *reloc)
 /* The SPARC overlaps DT_RELA and DT_PLTREL.  */
 #define ELF_MACHINE_PLTREL_OVERLAP 1
 
+/* The return value from dl-runtime's fixup, if it should be special.  */
+#define ELF_FIXUP_RETURN_VALUE(map, result)				\
+  ((map)->l_info[DT_SPARC(PLTFMT)]					\
+   && (map)->l_info[DT_SPARC(PLTFMT)]->d_un.d_val == 2			\
+   ? (result) + (map)->l_info[DT_PLTGOT]->d_un.d_ptr + (map)->l_addr	\
+   : (result))
+
 /* Set up the loaded object described by L so its unrelocated PLT
    entries will jump to the on-demand fixup code in dl-runtime.c.  */
 
@@ -232,10 +239,10 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   if (l->l_info[DT_JMPREL] && lazy)
     {
       got = (Elf64_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr);
-      got[1] = (Elf64_Addr) l;  /* Identify this shared object.  */
       /* This function will get called to fix up the GOT entry indicated by
          the offset on the stack, and then jump to the resolved address.  */
-      got[2] = (Elf64_Addr) &_dl_runtime_resolve;
+      got[1] = (Elf64_Addr) &_dl_runtime_resolve;
+      got[2] = (Elf64_Addr) l;  /* Identify this shared object.  */
     }
 
   return lazy;
@@ -248,9 +255,9 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	.type _dl_runtime_resolve, @function
 _dl_runtime_resolve:
 	save %sp, -160, %sp
-	mov %g5, %o0
+	mov %g1, %o0
 	call fixup
-	 mov %g6, %o1
+	 mov %g2, %o1
 	jmp %o0
 	 restore
 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
diff --git a/sysdeps/sparc/sparc64/fpu/fpu_control.h b/sysdeps/sparc/sparc64/fpu/fpu_control.h
index fd8abb19f1..bedc034eaa 100644
--- a/sysdeps/sparc/sparc64/fpu/fpu_control.h
+++ b/sysdeps/sparc/sparc64/fpu/fpu_control.h
@@ -41,13 +41,11 @@
 
 /* Now two recommended cw */
 
-/* Linux default:
+/* Linux and IEEE default:
      - extended precision
      - rounding to nearest
      - no exceptions  */
 #define _FPU_DEFAULT  0x0
-
-/* IEEE:  same as above  */
 #define _FPU_IEEE     0x0
 
 /* Type of the control word.  */