about summary refs log tree commit diff
path: root/stdlib
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>1995-10-16 01:37:51 +0000
committerRoland McGrath <roland@gnu.org>1995-10-16 01:37:51 +0000
commit8f5ca04bc7fd53741d80117df992995ace8f6d2d (patch)
treee39c13fc198b22ec55647259a8080051988e8c69 /stdlib
parent5d82cf5c55f56ae10d3b0a205d1fcc7de1cf56a0 (diff)
downloadglibc-8f5ca04bc7fd53741d80117df992995ace8f6d2d.tar.gz
glibc-8f5ca04bc7fd53741d80117df992995ace8f6d2d.tar.xz
glibc-8f5ca04bc7fd53741d80117df992995ace8f6d2d.zip
Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>
	* malloc/malloc.c (_malloc_internal): Performance fix.  Move
	if statement out of loop.

	* stdio/_itoa.c, stdio/_itoa.h: Complete rewrite.  Much faster
	implementation using GMP functions.  Contributed by
	Torbjorn Granlund and Ulrich Drepper.

	* stdio/test_rdwr.c: Include <errno.h>.

	* sysdeps/i386/i586/Implies: New file.

	New highly optimized string functions for i[345]86.
	* sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files.
        * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files.
        * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files.
        * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files.
        * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files.
        * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files.
        * sysdeps/i386/i586/strlen.S: New file.
	* sysdeps/i386/memchr.c: Removed.  There is now an assembler version.

	* sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did
	not correspond to used values.

	* sysdeps/unix/sysv/linux/nfs/nfs.h: New file.  Simply a wrapper
        around a kernel header file.
	* sysdeps/unix/sysv/linux/Dist: Add it.
	* sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers):
	Likewise.

	* sysdeps/unix/sysv/linux/local_lim.h: Rewrite.  Instead of
        defining ourself we use a kernel header file.

	* sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system
        call handler for i586.

	* sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up.
Sat Oct 14 02:52:36 1995  Ulrich Drepper  <drepper@ipd.info.uni-karlsruhe.de>

	* malloc/malloc.c (_malloc_internal): Performance fix.  Move
	if statement out of loop.

	* stdio/_itoa.c, stdio/_itoa.h: Complete rewrite.  Much faster
	implementation using GMP functions.  Contributed by
	Torbjorn Granlund and Ulrich Drepper.

	* stdio/test_rdwr.c: Include <errno.h>.

	* sysdeps/i386/i586/Implies: New file.

	New highly optimized string functions for i[345]86.
	* sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files.
        * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files.
        * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files.
        * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files.
        * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files.
        * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files.
        * sysdeps/i386/i586/strlen.S: New file.
	* sysdeps/i386/memchr.c: Removed.  There is now an assembler version.

	* sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did
	not correspond to used values.

	* sysdeps/unix/sysv/linux/nfs/nfs.h: New file.  Simply a wrapper
        around a kernel header file.
	* sysdeps/unix/sysv/linux/Dist: Add it.
	* sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers):
	Likewise.

	* sysdeps/unix/sysv/linux/local_lim.h: Rewrite.  Instead of
        defining ourself we use a kernel header file.

	* sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system
        call handler for i586.

	* sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up.
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/gmp-impl.h66
-rw-r--r--stdlib/gmp.h40
-rw-r--r--stdlib/longlong.h229
3 files changed, 220 insertions, 115 deletions
diff --git a/stdlib/gmp-impl.h b/stdlib/gmp-impl.h
index ccffe7bb1e..48d3af9761 100644
--- a/stdlib/gmp-impl.h
+++ b/stdlib/gmp-impl.h
@@ -19,11 +19,17 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
 
 #if ! defined (alloca)
-#if defined (__GNUC__) || defined (__sparc__) || defined (sparc)
+#if defined (__GNUC__)
 #define alloca __builtin_alloca
 #endif
 #endif
 
+#if ! defined (alloca)
+#if defined (__sparc__) || defined (sparc) || defined (__sgi)
+#include <alloca.h>
+#endif
+#endif
+
 #ifndef NULL
 #define NULL 0L
 #endif
@@ -168,6 +174,7 @@ void _mp_default_free ();
     else								\
       ____mpn_sqr_n (prodp, up, size, tspace);				\
   } while (0);
+#define assert(trueval) do {if (!(trueval)) abort ();} while (0)
 
 /* Structure for conversion between internal binary format and
    strings in base 2..36.  */
@@ -197,9 +204,11 @@ struct bases
 extern const struct bases __mp_bases[];
 extern mp_size_t __gmp_default_fp_limb_precision;
 
-/* Divide the two-limb number in (NH,,NL) by D, with DI being a 32 bit
-   approximation to (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
-   Put the quotient in Q and the remainder in R.  */
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+   limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+   If this would yield overflow, DI should be the largest possible number
+   (i.e., only ones).  For correct operation, the most significant bit of D
+   has to be set.  Put the quotient in Q and the remainder in R.  */
 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
   do {									\
     mp_limb _q, _ql, _r;						\
@@ -226,6 +235,8 @@ extern mp_size_t __gmp_default_fp_limb_precision;
     (r) = _r;								\
     (q) = _q;								\
   } while (0)
+/* Like udiv_qrnnd_preinv, but for for any value D.  DNORM is D shifted left
+   so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
   do {									\
     mp_limb n2, n10, n1, nadj, q1;					\
@@ -243,6 +254,8 @@ extern mp_size_t __gmp_default_fp_limb_precision;
     (r) = _xl + ((d) & _xh);						\
     (q) = _xh - q1;							\
   } while (0)
+/* Exactly like udiv_qrnnd_preinv, but branch-free.  It is not clear which
+   version to use.  */
 #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
   do {									\
     mp_limb n2, n10, n1, nadj, q1;					\
@@ -262,22 +275,49 @@ extern mp_size_t __gmp_default_fp_limb_precision;
   } while (0)
 
 #if defined (__GNUC__)
-/* Define stuff for longlong.h asm macros.  */
-#if __GNUC_NEW_ATTR_MODE_SYNTAX
-typedef unsigned int UQItype	__attribute__ ((mode ("QI")));
-typedef 	 int SItype	__attribute__ ((mode ("SI")));
-typedef unsigned int USItype	__attribute__ ((mode ("SI")));
-typedef		 int DItype	__attribute__ ((mode ("DI")));
-typedef unsigned int UDItype	__attribute__ ((mode ("DI")));
-#else
+/* Define stuff for longlong.h.  */
 typedef unsigned int UQItype	__attribute__ ((mode (QI)));
 typedef 	 int SItype	__attribute__ ((mode (SI)));
 typedef unsigned int USItype	__attribute__ ((mode (SI)));
 typedef		 int DItype	__attribute__ ((mode (DI)));
 typedef unsigned int UDItype	__attribute__ ((mode (DI)));
-#endif
+#else
+typedef unsigned char UQItype;
+typedef 	 long SItype;
+typedef unsigned long USItype;
 #endif
 
 typedef mp_limb UWtype;
 typedef unsigned int UHWtype;
 #define W_TYPE_SIZE BITS_PER_MP_LIMB
+
+
+#ifndef IEEE_DOUBLE_BIG_ENDIAN
+#define IEEE_DOUBLE_BIG_ENDIAN 1
+#endif
+
+#if IEEE_DOUBLE_BIG_ENDIAN
+union ieee_double_extract
+{
+  struct
+    {
+      unsigned long sig:1;
+      unsigned long exp:11;
+      unsigned long manh:20;
+      unsigned long manl:32;
+    } s;
+  double d;
+};
+#else
+union ieee_double_extract
+{
+  struct
+    {
+      unsigned long manl:32;
+      unsigned long manh:20;
+      unsigned long exp:11;
+      unsigned long sig:1;
+    } s;
+  double d;
+};
+#endif
diff --git a/stdlib/gmp.h b/stdlib/gmp.h
index 95c2f1beba..0b2cb29014 100644
--- a/stdlib/gmp.h
+++ b/stdlib/gmp.h
@@ -24,13 +24,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
 #define __need_size_t
 #include <stddef.h>
 
-#ifdef __STDC__
+#if defined (__STDC__)
 #define __gmp_const const
 #else
 #define __gmp_const
 #endif
 
-#ifdef __GNUC__
+#if defined (__GNUC__)
 #define __gmp_inline inline
 #else
 #define __gmp_inline
@@ -40,9 +40,14 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
 typedef unsigned int		mp_limb;
 typedef int			mp_limb_signed;
 #else
+#if _LONG_LONG_LIMB
+typedef unsigned long long int	mp_limb;
+typedef long long int		mp_limb_signed;
+#else
 typedef unsigned long int	mp_limb;
 typedef long int		mp_limb_signed;
 #endif
+#endif
 
 typedef mp_limb *		mp_ptr;
 typedef __gmp_const mp_limb *	mp_srcptr;
@@ -52,9 +57,9 @@ typedef long int		mp_exp_t;
 #ifndef __MP_SMALL__
 typedef struct
 {
-  long int alloc;		/* Number of *limbs* allocated and pointed
+  mp_size_t alloc;		/* Number of *limbs* allocated and pointed
 				   to by the D field.  */
-  long int size;		/* abs(SIZE) is the number of limbs
+  mp_size_t size;		/* abs(SIZE) is the number of limbs
 				   the last field points to.  If SIZE
 				   is negative this is a negative
 				   number.  */
@@ -130,12 +135,16 @@ typedef __mpf_struct *mpf_ptr;
 typedef __gmp_const __mpq_struct *mpq_srcptr;
 typedef __mpq_struct *mpq_ptr;
 
-#ifdef __STDC__
+#if defined (__STDC__)
 #define _PROTO(x) x
 #else
 #define _PROTO(x) ()
 #endif
 
+#if defined (FILE) || defined (_STDIO_H_) || defined (__STDIO_H__) || defined (H_STDIO)
+#define _GMP_H_HAVE_FILE 1
+#endif
+
 void mp_set_memory_functions _PROTO((void *(*) (size_t),
 				     void *(*) (void *, size_t, size_t),
 				     void (*) (void *, size_t)));
@@ -165,7 +174,7 @@ unsigned long int mpz_get_ui _PROTO ((mpz_srcptr));
 mp_limb mpz_getlimbn _PROTO ((mpz_srcptr, mp_size_t));
 mp_size_t mpz_hamdist _PROTO ((mpz_srcptr, mpz_srcptr));
 void mpz_init _PROTO ((mpz_ptr));
-#ifdef FILE
+#ifdef _GMP_H_HAVE_FILE
 void mpz_inp_raw _PROTO ((mpz_ptr, FILE *));
 int mpz_inp_str _PROTO ((mpz_ptr, FILE *, int));
 #endif
@@ -180,7 +189,7 @@ void mpz_mul _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
 void mpz_mul_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
 void mpz_mul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
 void mpz_neg _PROTO ((mpz_ptr, mpz_srcptr));
-#ifdef FILE
+#ifdef _GMP_H_HAVE_FILE
 void mpz_out_raw _PROTO ((FILE *, mpz_srcptr));
 void mpz_out_str _PROTO ((FILE *, int, mpz_srcptr));
 #endif
@@ -218,6 +227,8 @@ void mpz_tdiv_qr_ui _PROTO((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
 void mpz_tdiv_r _PROTO((mpz_ptr, mpz_srcptr, mpz_srcptr));
 void mpz_tdiv_r_ui _PROTO((mpz_ptr, mpz_srcptr, unsigned long int));
 
+void mpz_array_init (mpz_ptr, size_t, mp_size_t);
+
 /**************** Rational (i.e. Q) routines.  ****************/
 
 void mpq_init _PROTO ((mpq_ptr));
@@ -253,7 +264,7 @@ void mpf_dump _PROTO ((mpf_srcptr));
 char *mpf_get_str _PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
 void mpf_init _PROTO ((mpf_ptr));
 void mpf_init2 _PROTO ((mpf_ptr, mp_size_t));
-#ifdef FILE
+#ifdef _GMP_H_HAVE_FILE
 void mpf_inp_str _PROTO ((mpf_ptr, FILE *, int));
 #endif
 void mpf_init_set _PROTO ((mpf_ptr, mpf_srcptr));
@@ -265,7 +276,7 @@ void mpf_mul _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
 void mpf_mul_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
 void mpf_mul_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
 void mpf_neg _PROTO ((mpf_ptr, mpf_srcptr));
-#ifdef FILE
+#ifdef _GMP_H_HAVE_FILE
 void mpf_out_str _PROTO ((mpf_ptr, int, size_t, FILE *));
 #endif
 void mpf_set _PROTO ((mpf_ptr, mpf_srcptr));
@@ -335,7 +346,7 @@ mp_limb __mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb));
 
 
 static __gmp_inline mp_limb
-#if __STDC__
+#if defined (__STDC__)
 __mpn_add_1 (register mp_ptr res_ptr,
 	     register mp_srcptr s1_ptr,
 	     register mp_size_t s1_size,
@@ -377,7 +388,7 @@ __mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb)
 }
 
 static __gmp_inline mp_limb
-#if __STDC__
+#if defined (__STDC__)
 __mpn_add (register mp_ptr res_ptr,
 	   register mp_srcptr s1_ptr,
 	   register mp_size_t s1_size,
@@ -406,7 +417,7 @@ __mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
 }
 
 static __gmp_inline mp_limb
-#if __STDC__
+#if defined (__STDC__)
 __mpn_sub_1 (register mp_ptr res_ptr,
 	     register mp_srcptr s1_ptr,
 	     register mp_size_t s1_size,
@@ -448,7 +459,7 @@ __mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb)
 }
 
 static __gmp_inline mp_limb
-#if __STDC__
+#if defined (__STDC__)
 __mpn_sub (register mp_ptr res_ptr,
 	   register mp_srcptr s1_ptr,
 	   register mp_size_t s1_size,
@@ -477,7 +488,7 @@ __mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
 }
 
 static __gmp_inline mp_size_t
-#if __STDC__
+#if defined (__STDC__)
 __mpn_normal_size (mp_srcptr ptr, mp_size_t size)
 #else
 __mpn_normal_size (ptr, size)
@@ -512,7 +523,6 @@ __mpn_normal_size (ptr, size)
 /* Useful synonyms, but not quite compatible with GMP 1.  */
 #define mpz_div		mpz_fdiv_q
 #define mpz_divmod	mpz_fdiv_qr
-#define mpz_mod		mpz_fdiv_r
 #define mpz_div_ui	mpz_fdiv_q_ui
 #define mpz_divmod_ui	mpz_fdiv_qr_ui
 #define mpz_mod_ui	mpz_fdiv_r_ui
diff --git a/stdlib/longlong.h b/stdlib/longlong.h
index 97c469d8c0..bbb92e3af8 100644
--- a/stdlib/longlong.h
+++ b/stdlib/longlong.h
@@ -97,7 +97,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
 #define __AND_CLOBBER_CC , "cc"
 #endif /* __GNUC__ < 2 */
 
-#if (defined (__a29k__) || defined (___AM29K__)) && W_TYPE_SIZE == 32
+#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add %1,%4,%5
 	addc %0,%2,%3"							\
@@ -152,6 +152,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
     (pl) = __m0 * __m1;							\
   } while (0)
 #define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do { UDItype __r;							\
     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
@@ -159,12 +160,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
   } while (0)
 extern UDItype __udiv_qrnnd ();
 #define UDIV_TIME 220
-#endif
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha__ */
 
 #if defined (__arm__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("adds %1,%4,%5
-	adc %0,%2,%3"							\
+  __asm__ ("adds	%1, %4, %5
+	adc	%0, %2, %3"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%r" ((USItype)(ah)),					\
@@ -172,8 +174,8 @@ extern UDItype __udiv_qrnnd ();
 	     "%r" ((USItype)(al)),					\
 	     "rI" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subs %1,%4,%5
-	sbc %0,%2,%3"							\
+  __asm__ ("subs	%1, %4, %5
+	sbc	%0, %2, %3"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "r" ((USItype)(ah)),					\
@@ -181,19 +183,19 @@ extern UDItype __udiv_qrnnd ();
 	     "r" ((USItype)(al)),					\
 	     "rI" ((USItype)(bl)))
 #define umul_ppmm(xh, xl, a, b) \
-  __asm__ ("; Inlined umul_ppmm
-	mov	r0,%2 lsr 16
-	mov	r2,%3 lsr 16
-	bic	r1,%2,r0 lsl 16
-	bic	r2,%3,r2 lsl 16
-	mul	%1,r1,r2
-	mul	r2,r0,r2
-	mul	r1,%0,r1
-	mul	%0,r0,%0
-	adds	r1,r2,r1
-	addcs	%0,%0,0x10000
-	adds	%1,%1,r1 lsl 16
-	adc	%0,%0,r1 lsr 16"					\
+  __asm__ ("%@ Inlined umul_ppmm
+	mov	%|r0, %2, lsr #16
+	mov	%|r2, %3, lsr #16
+	bic	%|r1, %2, %|r0, lsl #16
+	bic	%|r2, %3, %|r2, lsl #16
+	mul	%1, %|r1, %|r2
+	mul	%|r2, %|r0, %|r2
+	mul	%|r1, %0, %|r1
+	mul	%0, %|r0, %0
+	adds	%|r1, %|r2, %|r1
+	addcs	%0, %0, #65536
+	adds	%1, %1, %|r1, lsl #16
+	adc	%0, %0, %|r1, lsr #16"					\
 	   : "=&r" ((USItype)(xh)),					\
 	     "=r" ((USItype)(xl))					\
 	   : "r" ((USItype)(a)),					\
@@ -296,9 +298,9 @@ extern UDItype __udiv_qrnnd ();
 	   struct {USItype __h, __l;} __i;				\
 	  } __xx;							\
     __asm__ ("xmpyu %1,%2,%0"						\
-	     : "=x" (__xx.__ll)						\
-	     : "x" ((USItype)(u)),					\
-	       "x" ((USItype)(v)));					\
+	     : "=fx" (__xx.__ll)					\
+	     : "fx" ((USItype)(u)),					\
+	       "fx" ((USItype)(v)));					\
     (wh) = __xx.__i.__h;						\
     (wl) = __xx.__i.__l;						\
   } while (0)
@@ -308,12 +310,14 @@ extern UDItype __udiv_qrnnd ();
 #define UMUL_TIME 40
 #define UDIV_TIME 80
 #endif
+#ifndef LONGLONG_STANDALONE
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do { USItype __r;							\
     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
     (r) = __r;								\
   } while (0)
 extern USItype __udiv_qrnnd ();
+#endif /* LONGLONG_STANDALONE */
 #define count_leading_zeros(count, x) \
   do {									\
     USItype __tmp;							\
@@ -419,8 +423,12 @@ extern USItype __udiv_qrnnd ();
   } while (0)
 #define count_trailing_zeros(count, x) \
   __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
 #define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
 #define UDIV_TIME 40
+#endif
 #endif /* 80x86 */
 
 #if defined (__i960__) && W_TYPE_SIZE == 32
@@ -442,7 +450,7 @@ extern USItype __udiv_qrnnd ();
     __w; })  
 #endif /* __i960__ */
 
-#if defined (__mc68000__) && W_TYPE_SIZE == 32
+#if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add%.l %5,%1
 	addx%.l %3,%0"							\
@@ -489,38 +497,34 @@ extern USItype __udiv_qrnnd ();
 	   : "=d" ((USItype)(count))					\
 	   : "od" ((USItype)(x)), "n" (0))
 #else /* not mc68020 */
-#define umul_ppmm(xh, xl, a, b) \
-  __asm__ ("| Inlined umul_ppmm
-	move%.l	%2,%/d0
-	move%.l	%3,%/d1
-	move%.l	%/d0,%/d2
-	swap	%/d0
-	move%.l	%/d1,%/d3
-	swap	%/d1
-	move%.w	%/d2,%/d4
-	mulu	%/d3,%/d4
-	mulu	%/d1,%/d2
-	mulu	%/d0,%/d3
-	mulu	%/d0,%/d1
-	move%.l	%/d4,%/d0
-	eor%.w	%/d0,%/d0
-	swap	%/d0
-	add%.l	%/d0,%/d2
-	add%.l	%/d3,%/d2
+#define umul_ppmmxx(xh, xl, a, b) \
+  do { USItype __umul_tmp1, __umul_tmp2;				\
+	__asm__ ("| Inlined umul_ppmm
+	move%.l	%5,%3
+	move%.l	%2,%0
+	move%.w	%3,%1
+	swap	%3
+	swap	%0
+	mulu	%2,%1
+	mulu	%3,%0
+	mulu	%2,%3
+	swap	%2
+	mulu	%5,%2
+	add%.l	%3,%2
 	jcc	1f
-	add%.l	#65536,%/d1
-1:	swap	%/d2
-	moveq	#0,%/d0
-	move%.w	%/d2,%/d0
-	move%.w	%/d4,%/d2
-	move%.l	%/d2,%1
-	add%.l	%/d1,%/d0
-	move%.l	%/d0,%0"						\
-	   : "=g" ((USItype)(xh)),					\
-	     "=g" ((USItype)(xl))					\
-	   : "g" ((USItype)(a)),					\
-	     "g" ((USItype)(b))						\
-	   : "d0", "d1", "d2", "d3", "d4")
+	add%.l	%#0x10000,%0
+1:	move%.l	%2,%3
+	clr%.w	%2
+	swap	%2
+	swap	%3
+	clr%.w	%3
+	add%.l	%3,%1
+	addx%.l	%2,%0
+	| End inlined umul_ppmm"					\
+	      : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)),		\
+	        "=d" (__umul_tmp1), "=&d" (__umul_tmp2)			\
+	      : "%2" ((USItype)(a)), "d" ((USItype)(b)));		\
+  } while (0)
 #define UMUL_TIME 100
 #define UDIV_TIME 400
 #endif /* not mc68020 */
@@ -553,7 +557,7 @@ extern USItype __udiv_qrnnd ();
 	     : "r" ((USItype)(x)));					\
     (count) = __cbtmp ^ 31;						\
   } while (0)
-#if defined (__mc88110__)
+#if defined (__m88110__)
 #define umul_ppmm(wh, wl, u, v) \
   do {									\
     union {UDItype __ll;						\
@@ -582,10 +586,18 @@ extern USItype __udiv_qrnnd ();
 #else
 #define UMUL_TIME 17
 #define UDIV_TIME 150
-#endif /* __mc88110__ */
+#endif /* __m88110__ */
 #endif /* __m88000__ */
 
 #if defined (__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("multu %2,%3"						\
+	   : "=l" ((USItype)(w0)),					\
+	     "=h" ((USItype)(w1))					\
+	   : "d" ((USItype)(u)),					\
+	     "d" ((USItype)(v)))
+#else
 #define umul_ppmm(w1, w0, u, v) \
   __asm__ ("multu %2,%3
 	mflo %0
@@ -594,11 +606,20 @@ extern USItype __udiv_qrnnd ();
 	     "=d" ((USItype)(w1))					\
 	   : "d" ((USItype)(u)),					\
 	     "d" ((USItype)(v)))
+#endif
 #define UMUL_TIME 10
 #define UDIV_TIME 100
 #endif /* __mips__ */
 
 #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("dmultu %2,%3"						\
+	   : "=l" ((UDItype)(w0)),					\
+	     "=h" ((UDItype)(w1))					\
+	   : "d" ((UDItype)(u)),					\
+	     "d" ((UDItype)(v)))
+#else
 #define umul_ppmm(w1, w0, u, v) \
   __asm__ ("dmultu %2,%3
 	mflo %0
@@ -607,8 +628,9 @@ extern USItype __udiv_qrnnd ();
 	     "=d" ((UDItype)(w1))					\
 	   : "d" ((UDItype)(u)),					\
 	     "d" ((UDItype)(v)))
-#define UMUL_TIME 10
-#define UDIV_TIME 100
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
 #endif /* __mips__ */
 
 #if defined (__ns32000__) && W_TYPE_SIZE == 32
@@ -647,7 +669,7 @@ extern USItype __udiv_qrnnd ();
   } while (0)
 #endif /* __ns32000__ */
 
-#if (defined (__powerpc__) || defined (___IBMR2__)) && W_TYPE_SIZE == 32
+#if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (bh) && (bh) == 0)				\
@@ -676,14 +698,14 @@ extern USItype __udiv_qrnnd ();
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (ah) && (ah) == 0)				\
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"	\
 	       : "=r" ((USItype)(sh)),					\
 		 "=&r" ((USItype)(sl))					\
 	       : "r" ((USItype)(bh)),					\
 		 "rI" ((USItype)(al)),					\
 		 "r" ((USItype)(bl)));					\
     else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0)		\
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"	\
 	       : "=r" ((USItype)(sh)),					\
 		 "=&r" ((USItype)(sl))					\
 	       : "r" ((USItype)(bh)),					\
@@ -716,7 +738,7 @@ extern USItype __udiv_qrnnd ();
   __asm__ ("{cntlz|cntlzw} %0,%1"					\
 	   : "=r" ((USItype)(count))					\
 	   : "r" ((USItype)(x)))
-#if defined (__powerpc__)
+#if defined (_ARCH_PPC)
 #define umul_ppmm(ph, pl, m0, m1) \
   do {									\
     USItype __m0 = (m0), __m1 = (m1);					\
@@ -785,16 +807,15 @@ extern USItype __udiv_qrnnd ();
 	     "g" ((USItype)(bh)),					\
 	     "1" ((USItype)(al)),					\
 	     "g" ((USItype)(bl)))
-/* This insn doesn't work on ancient pyramids.  */
+/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
 #define umul_ppmm(w1, w0, u, v) \
   ({union {UDItype __ll;						\
 	   struct {USItype __h, __l;} __i;				\
 	  } __xx;							\
-  __xx.__i.__l = u;							\
-  __asm__ ("uemul %3,%0"						\
-	   : "=r" (__xx.__i.__h),					\
-	     "=r" (__xx.__i.__l)					\
-	   : "1" (__xx.__i.__l),					\
+  __asm__ ("movw %1,%R0
+	uemul %2,%0"							\
+	   : "=&r" (__xx.__ll)						\
+	   : "g" ((USItype) (u)),					\
 	     "g" ((USItype)(v)));					\
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 #endif /* __pyr__ */
@@ -868,6 +889,20 @@ extern USItype __udiv_qrnnd ();
   } while (0)
 #endif
 
+#if defined (__sh2__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ (								\
+       "dmulu.l	%2,%3
+	sts	macl,%1
+	sts	mach,%0"						\
+	   : "=r" ((USItype)(w1)),					\
+	     "=r" ((USItype)(w0))					\
+	   : "r" ((USItype)(u)),					\
+	     "r" ((USItype)(v))						\
+	   : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
 #if defined (__sparc__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addcc %r4,%5,%1
@@ -901,17 +936,21 @@ extern USItype __udiv_qrnnd ();
 	   : "r" ((USItype)(u)),					\
 	     "r" ((USItype)(v)))
 #define UMUL_TIME 5
-/* We might want to leave this undefined for `SuperSPARC (tm)' since
-   its implementation is crippled and often traps.  */
+#ifndef SUPERSPARC	/* SuperSPARC's udiv only handles 53 bit dividends */
 #define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
-	   : "=&r" ((USItype)(q)),					\
-	     "=&r" ((USItype)(r))					\
-	   : "r" ((USItype)(n1)),					\
-	     "r" ((USItype)(n0)),					\
-	     "r" ((USItype)(d)))
+  do {									\
+    USItype __q;							\
+    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
+	     : "=r" ((USItype)(__q))					\
+	     : "r" ((USItype)(n1)),					\
+	       "r" ((USItype)(n0)),					\
+	       "r" ((USItype)(d)));					\
+    (r) = (n0) - __q * (d);						\
+    (q) = __q;								\
+  } while (0)
 #define UDIV_TIME 25
-#else
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
 #if defined (__sparclite__)
 /* This has hardware multiply but not divide.  It also has two additional
    instructions scan (ffs from high bit) and divscc.  */
@@ -973,9 +1012,10 @@ extern USItype __udiv_qrnnd ();
   __asm__ ("scan %1,0,%0"						\
 	   : "=r" ((USItype)(x))					\
 	   : "r" ((USItype)(count)))
-#else
-/* SPARC without integer multiplication and divide instructions.
-   (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
+#ifndef umul_ppmm
 #define umul_ppmm(w1, w0, u, v) \
   __asm__ ("! Inlined umul_ppmm
 	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr
@@ -1023,6 +1063,9 @@ extern USItype __udiv_qrnnd ();
 	     "r" ((USItype)(v))						\
 	   : "%g1", "%g2" __AND_CLOBBER_CC)
 #define UMUL_TIME 39		/* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do { USItype __r;							\
     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
@@ -1030,8 +1073,8 @@ extern USItype __udiv_qrnnd ();
   } while (0)
 extern USItype __udiv_qrnnd ();
 #define UDIV_TIME 140
-#endif /* __sparclite__ */
-#endif /* __sparc_v8__ */
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
 #endif /* __sparc__ */
 
 #if defined (__vax__) && W_TYPE_SIZE == 32
@@ -1075,7 +1118,7 @@ extern USItype __udiv_qrnnd ();
     __xx.__i.__h = n1; __xx.__i.__l = n0;				\
     __asm__ ("ediv %3,%2,%0,%1"						\
 	     : "=g" (q), "=g" (r)					\
-	     : "g" (__n1n0.ll), "g" (d));				\
+	     : "g" (__xx.ll), "g" (d));					\
   } while (0)
 #endif /* __vax__ */
 
@@ -1173,11 +1216,12 @@ extern USItype __udiv_qrnnd ();
   do {									\
     UWtype __x0, __x1, __x2, __x3;					\
     UHWtype __ul, __vl, __uh, __vh;					\
+    UWtype __u = (u), __v = (v);					\
 									\
-    __ul = __ll_lowpart (u);						\
-    __uh = __ll_highpart (u);						\
-    __vl = __ll_lowpart (v);						\
-    __vh = __ll_highpart (v);						\
+    __ul = __ll_lowpart (__u);						\
+    __uh = __ll_highpart (__u);						\
+    __vl = __ll_lowpart (__v);						\
+    __vh = __ll_highpart (__v);						\
 									\
     __x0 = (UWtype) __ul * __vl;					\
     __x1 = (UWtype) __ul * __vh;					\
@@ -1194,6 +1238,17 @@ extern USItype __udiv_qrnnd ();
   } while (0)
 #endif
 
+#if !defined (umul_ppmm)
+#define smul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __w1;							\
+    UWtype __m0 = (u), __m1 = (v);					\
+    umul_ppmm (__w1, w0, __m0, __m1);					\
+    (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1)			\
+		- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0);		\
+  } while (0)
+#endif
+
 /* Define this unconditionally, so it can be used for debugging.  */
 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
   do {									\