about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc32
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/powerpc32')
-rw-r--r--sysdeps/powerpc/powerpc32/power4/fpu/mpa.c803
1 files changed, 534 insertions, 269 deletions
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
index 11404ab8b8..16cb57785d 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
@@ -51,91 +51,135 @@ const mp_no mptwo = {1, {1.0, 2.0}};
 
 /* Compare mantissa of two multiple precision numbers regardless of the sign
    and exponent of the numbers.  */
-static int mcr(const mp_no *x, const mp_no *y, int p) {
+static int
+mcr (const mp_no *x, const mp_no *y, int p)
+{
   long i;
   long p2 = p;
-  for (i=1; i<=p2; i++) {
-    if      (X[i] == Y[i])  continue;
-    else if (X[i] >  Y[i])  return  1;
-    else                    return -1; }
+  for (i = 1; i <= p2; i++)
+    {
+      if (X[i] == Y[i])
+	continue;
+      else if (X[i] > Y[i])
+	return 1;
+      else
+	return -1;
+    }
   return 0;
 }
 
 /* Compare the absolute values of two multiple precision numbers.  */
-int __acr(const mp_no *x, const mp_no *y, int p) {
+int
+__acr (const mp_no *x, const mp_no *y, int p)
+{
   long i;
 
-  if      (X[0] == ZERO) {
-    if    (Y[0] == ZERO) i= 0;
-    else                 i=-1;
-  }
-  else if (Y[0] == ZERO) i= 1;
-  else {
-    if      (EX >  EY)   i= 1;
-    else if (EX <  EY)   i=-1;
-    else                 i= mcr(x,y,p);
-  }
+  if (X[0] == ZERO)
+    {
+      if (Y[0] == ZERO)
+	i = 0;
+      else
+	i = -1;
+    }
+  else if (Y[0] == ZERO)
+    i = 1;
+  else
+    {
+      if (EX > EY)
+	i = 1;
+      else if (EX < EY)
+	i = -1;
+      else
+	i = mcr (x, y, p);
+    }
 
   return i;
 }
 
 /* Copy multiple precision number X into Y.  They could be the same
    number.  */
-void __cpy(const mp_no *x, mp_no *y, int p) {
+void
+__cpy (const mp_no *x, mp_no *y, int p)
+{
   long i;
 
   EY = EX;
-  for (i=0; i <= p; i++)    Y[i] = X[i];
+  for (i = 0; i <= p; i++)
+    Y[i] = X[i];
 
   return;
 }
 
 /* Convert a multiple precision number *X into a double precision
    number *Y, normalized case  (|x| >= 2**(-1022))).  */
-static void norm(const mp_no *x, double *y, int p)
+static void
+norm (const mp_no *x, double *y, int p)
 {
-  #define R  RADIXI
+#define R  RADIXI
   long i;
-  double a,c,u,v,z[5];
-  if (p<5) {
-    if      (p==1) c = X[1];
-    else if (p==2) c = X[1] + R* X[2];
-    else if (p==3) c = X[1] + R*(X[2]  +   R* X[3]);
-    else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
-  }
-  else {
-    for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
-        {a *= TWO;   z[1] *= TWO; }
-
-    for (i=2; i<5; i++) {
-      z[i] = X[i]*a;
-      u = (z[i] + CUTTER)-CUTTER;
-      if  (u > z[i])  u -= RADIX;
-      z[i] -= u;
-      z[i-1] += u*RADIXI;
-    }
-
-    u = (z[3] + TWO71) - TWO71;
-    if (u > z[3])   u -= TWO19;
-    v = z[3]-u;
-
-    if (v == TWO18) {
-      if (z[4] == ZERO) {
-        for (i=5; i <= p; i++) {
-          if (X[i] == ZERO)   continue;
-          else                {z[3] += ONE;   break; }
-        }
-      }
-      else              z[3] += ONE;
-    }
-
-    c = (z[1] + R *(z[2] + R * z[3]))/a;
-  }
+  double a, c, u, v, z[5];
+  if (p < 5)
+    {
+      if (p == 1)
+	c = X[1];
+      else if (p == 2)
+	c = X[1] + R * X[2];
+      else if (p == 3)
+	c = X[1] + R * (X[2] + R * X[3]);
+      else if (p == 4)
+	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
+    }
+  else
+    {
+      for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
+	{
+	  a *= TWO;
+	  z[1] *= TWO;
+	}
+
+      for (i = 2; i < 5; i++)
+	{
+	  z[i] = X[i] * a;
+	  u = (z[i] + CUTTER) - CUTTER;
+	  if (u > z[i])
+	    u -= RADIX;
+	  z[i] -= u;
+	  z[i - 1] += u * RADIXI;
+	}
+
+      u = (z[3] + TWO71) - TWO71;
+      if (u > z[3])
+	u -= TWO19;
+      v = z[3] - u;
+
+      if (v == TWO18)
+	{
+	  if (z[4] == ZERO)
+	    {
+	      for (i = 5; i <= p; i++)
+		{
+		  if (X[i] == ZERO)
+		    continue;
+		  else
+		    {
+		      z[3] += ONE;
+		      break;
+		    }
+		}
+	    }
+	  else
+	    z[3] += ONE;
+	}
+
+      c = (z[1] + R * (z[2] + R * z[3])) / a;
+    }
 
   c *= X[0];
 
-  for (i=1; i<EX; i++)   c *= RADIX;
-  for (i=1; i>EX; i--)   c *= RADIXI;
+  for (i = 1; i < EX; i++)
+    c *= RADIX;
+  for (i = 1; i > EX; i--)
+    c *= RADIXI;
 
   *y = c;
   return;
@@ -144,46 +188,112 @@ static void norm(const mp_no *x, double *y, int p)
 
 /* Convert a multiple precision number *X into a double precision
    number *Y, Denormal case  (|x| < 2**(-1022))).  */
-static void denorm(const mp_no *x, double *y, int p)
+static void
+denorm (const mp_no *x, double *y, int p)
 {
-  long i,k;
+  long i, k;
   long p2 = p;
-  double c,u,z[5];
+  double c, u, z[5];
 
 #define R RADIXI
-  if (EX<-44 || (EX==-44 && X[1]<TWO5))
-     { *y=ZERO; return; }
-
-  if      (p2==1) {
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=ZERO;  z[3]=ZERO;  k=3;}
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=ZERO;  k=2;}
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
-  }
-  else if (p2==2) {
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  z[3]=ZERO;  k=3;}
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=X[2];  k=2;}
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
-  }
-  else {
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  k=3;}
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  k=2;}
-    else              {z[1]=     TWO10;  z[2]=ZERO;  k=1;}
-    z[3] = X[k];
-  }
+  if (EX < -44 || (EX == -44 && X[1] < TWO5))
+    {
+      *y = ZERO;
+      return;
+    }
+
+  if (p2 == 1)
+    {
+      if (EX == -42)
+	{
+	  z[1] = X[1] + TWO10;
+	  z[2] = ZERO;
+	  z[3] = ZERO;
+	  k = 3;
+	}
+      else if (EX == -43)
+	{
+	  z[1] = TWO10;
+	  z[2] = X[1];
+	  z[3] = ZERO;
+	  k = 2;
+	}
+      else
+	{
+	  z[1] = TWO10;
+	  z[2] = ZERO;
+	  z[3] = X[1];
+	  k = 1;
+	}
+    }
+  else if (p2 == 2)
+    {
+      if (EX == -42)
+	{
+	  z[1] = X[1] + TWO10;
+	  z[2] = X[2];
+	  z[3] = ZERO;
+	  k = 3;
+	}
+      else if (EX == -43)
+	{
+	  z[1] = TWO10;
+	  z[2] = X[1];
+	  z[3] = X[2];
+	  k = 2;
+	}
+      else
+	{
+	  z[1] = TWO10;
+	  z[2] = ZERO;
+	  z[3] = X[1];
+	  k = 1;
+	}
+    }
+  else
+    {
+      if (EX == -42)
+	{
+	  z[1] = X[1] + TWO10;
+	  z[2] = X[2];
+	  k = 3;
+	}
+      else if (EX == -43)
+	{
+	  z[1] = TWO10;
+	  z[2] = X[1];
+	  k = 2;
+	}
+      else
+	{
+	  z[1] = TWO10;
+	  z[2] = ZERO;
+	  k = 1;
+	}
+      z[3] = X[k];
+    }
 
   u = (z[3] + TWO57) - TWO57;
-  if  (u > z[3])   u -= TWO5;
+  if (u > z[3])
+    u -= TWO5;
 
-  if (u==z[3]) {
-    for (i=k+1; i <= p2; i++) {
-      if (X[i] == ZERO)   continue;
-      else {z[3] += ONE;   break; }
+  if (u == z[3])
+    {
+      for (i = k + 1; i <= p2; i++)
+	{
+	  if (X[i] == ZERO)
+	    continue;
+	  else
+	    {
+	      z[3] += ONE;
+	      break;
+	    }
+	}
     }
-  }
 
-  c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
+  c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
 
-  *y = c*TWOM1032;
+  *y = c * TWOM1032;
   return;
 
 #undef R
@@ -191,39 +301,65 @@ static void denorm(const mp_no *x, double *y, int p)
 
 /* Convert multiple precision number *X into double precision number *Y.  The
    result is correctly rounded to the nearest/even.  */
-void __mp_dbl(const mp_no *x, double *y, int p) {
-
-  if (X[0] == ZERO)  {*y = ZERO;  return; }
+void
+__mp_dbl (const mp_no *x, double *y, int p)
+{
+  if (X[0] == ZERO)
+    {
+      *y = ZERO;
+      return;
+    }
 
-  if      (EX> -42)                 norm(x,y,p);
-  else if (EX==-42 && X[1]>=TWO10)  norm(x,y,p);
-  else                              denorm(x,y,p);
+  if (EX > -42)
+    norm (x, y, p);
+  else if (EX == -42 && X[1] >= TWO10)
+    norm (x, y, p);
+  else
+    denorm (x, y, p);
 }
 
 /* Get the multiple precision equivalent of X into *Y.  If the precision is too
    small, the result is truncated.  */
-void __dbl_mp(double x, mp_no *y, int p) {
-
-  long i,n;
+void
+__dbl_mp (double x, mp_no *y, int p)
+{
+  long i, n;
   long p2 = p;
   double u;
 
   /* Sign.  */
-  if      (x == ZERO)  {Y[0] = ZERO;  return; }
-  else if (x >  ZERO)   Y[0] = ONE;
-  else                 {Y[0] = MONE;  x=-x;   }
+  if (x == ZERO)
+    {
+      Y[0] = ZERO;
+      return;
+    }
+  else if (x > ZERO)
+    Y[0] = ONE;
+  else
+    {
+      Y[0] = MONE;
+      x = -x;
+    }
 
   /* Exponent.  */
-  for (EY=ONE; x >= RADIX; EY += ONE)   x *= RADIXI;
-  for (      ; x <  ONE;   EY -= ONE)   x *= RADIX;
+  for (EY = ONE; x >= RADIX; EY += ONE)
+    x *= RADIXI;
+  for (; x < ONE; EY -= ONE)
+    x *= RADIX;
 
   /* Digits.  */
-  n=MIN(p2,4);
-  for (i=1; i<=n; i++) {
-    u = (x + TWO52) - TWO52;
-    if (u>x)   u -= ONE;
-    Y[i] = u;     x -= u;    x *= RADIX; }
-  for (   ; i<=p2; i++)     Y[i] = ZERO;
+  n = MIN (p2, 4);
+  for (i = 1; i <= n; i++)
+    {
+      u = (x + TWO52) - TWO52;
+      if (u > x)
+	u -= ONE;
+      Y[i] = u;
+      x -= u;
+      x *= RADIX;
+    }
+  for (; i <= p2; i++)
+    Y[i] = ZERO;
   return;
 }
 
@@ -231,93 +367,132 @@ void __dbl_mp(double x, mp_no *y, int p) {
    sign of the sum *Z is not changed.  X and Y may overlap but not X and Z or
    Y and Z.  No guard digit is used.  The result equals the exact sum,
    truncated.  */
-static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
-
-  long i,j,k;
+static void
+add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
+{
+  long i, j, k;
   long p2 = p;
 
   EZ = EX;
 
-  i=p2;    j=p2+ EY - EX;    k=p2+1;
-
-  if (j<1)
-     {__cpy(x,z,p);  return; }
-  else   Z[k] = ZERO;
-
-  for (; j>0; i--,j--) {
-    Z[k] += X[i] + Y[j];
-    if (Z[k] >= RADIX) {
-      Z[k]  -= RADIX;
-      Z[--k] = ONE; }
-    else
-      Z[--k] = ZERO;
-  }
-
-  for (; i>0; i--) {
-    Z[k] += X[i];
-    if (Z[k] >= RADIX) {
-      Z[k]  -= RADIX;
-      Z[--k] = ONE; }
-    else
-      Z[--k] = ZERO;
-  }
-
-  if (Z[1] == ZERO) {
-    for (i=1; i<=p2; i++)    Z[i] = Z[i+1]; }
-  else   EZ += ONE;
+  i = p2;
+  j = p2 + EY - EX;
+  k = p2 + 1;
+
+  if (j < 1)
+    {
+      __cpy (x, z, p);
+      return;
+    }
+  else
+    Z[k] = ZERO;
+
+  for (; j > 0; i--, j--)
+    {
+      Z[k] += X[i] + Y[j];
+      if (Z[k] >= RADIX)
+	{
+	  Z[k] -= RADIX;
+	  Z[--k] = ONE;
+	}
+      else
+	Z[--k] = ZERO;
+    }
+
+  for (; i > 0; i--)
+    {
+      Z[k] += X[i];
+      if (Z[k] >= RADIX)
+	{
+	  Z[k] -= RADIX;
+	  Z[--k] = ONE;
+	}
+      else
+	Z[--k] = ZERO;
+    }
+
+  if (Z[1] == ZERO)
+    {
+      for (i = 1; i <= p2; i++)
+	Z[i] = Z[i + 1];
+    }
+  else
+    EZ += ONE;
 }
 
 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
    The sign of the difference *Z is not changed.  X and Y may overlap but not X
    and Z or Y and Z.  One guard digit is used.  The error is less than one
    ULP.  */
-static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
-
-  long i,j,k;
+static void
+sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
+{
+  long i, j, k;
   long p2 = p;
 
   EZ = EX;
 
-  if (EX == EY) {
-    i=j=k=p2;
-    Z[k] = Z[k+1] = ZERO; }
-  else {
-    j= EX - EY;
-    if (j > p2)  {__cpy(x,z,p);  return; }
-    else {
-      i=p2;   j=p2+1-j;   k=p2;
-      if (Y[j] > ZERO) {
-        Z[k+1] = RADIX - Y[j--];
-        Z[k]   = MONE; }
-      else {
-        Z[k+1] = ZERO;
-        Z[k]   = ZERO;   j--;}
-    }
-  }
-
-  for (; j>0; i--,j--) {
-    Z[k] += (X[i] - Y[j]);
-    if (Z[k] < ZERO) {
-      Z[k]  += RADIX;
-      Z[--k] = MONE; }
-    else
-      Z[--k] = ZERO;
-  }
-
-  for (; i>0; i--) {
-    Z[k] += X[i];
-    if (Z[k] < ZERO) {
-      Z[k]  += RADIX;
-      Z[--k] = MONE; }
-    else
-      Z[--k] = ZERO;
-  }
-
-  for (i=1; Z[i] == ZERO; i++) ;
+  if (EX == EY)
+    {
+      i = j = k = p2;
+      Z[k] = Z[k + 1] = ZERO;
+    }
+  else
+    {
+      j = EX - EY;
+      if (j > p2)
+	{
+	  __cpy (x, z, p);
+	  return;
+	}
+      else
+	{
+	  i = p2;
+	  j = p2 + 1 - j;
+	  k = p2;
+	  if (Y[j] > ZERO)
+	    {
+	      Z[k + 1] = RADIX - Y[j--];
+	      Z[k] = MONE;
+	    }
+	  else
+	    {
+	      Z[k + 1] = ZERO;
+	      Z[k] = ZERO;
+	      j--;
+	    }
+	}
+    }
+
+  for (; j > 0; i--, j--)
+    {
+      Z[k] += (X[i] - Y[j]);
+      if (Z[k] < ZERO)
+	{
+	  Z[k] += RADIX;
+	  Z[--k] = MONE;
+	}
+      else
+	Z[--k] = ZERO;
+    }
+
+  for (; i > 0; i--)
+    {
+      Z[k] += X[i];
+      if (Z[k] < ZERO)
+	{
+	  Z[k] += RADIX;
+	  Z[--k] = MONE;
+	}
+      else
+	Z[--k] = ZERO;
+    }
+
+  for (i = 1; Z[i] == ZERO; i++);
   EZ = EZ - i + 1;
-  for (k=1; i <= p2+1; )
+  for (k = 1; i <= p2 + 1;)
     Z[k++] = Z[i++];
-  for (; k <= p2; )
+  for (; k <= p2;)
     Z[k++] = ZERO;
 
   return;
@@ -326,111 +501,186 @@ static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
 /* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
    and Z or Y and Z.  One guard digit is used.  The error is less than one
    ULP.  */
-void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
-
+void
+__add (const mp_no *x, const mp_no *y, mp_no *z, int p)
+{
   int n;
 
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  return; }
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);  return; }
-
-  if (X[0] == Y[0])   {
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] = X[0]; }
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
-  }
-  else                       {
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] = X[0]; }
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
-    else                      Z[0] = ZERO;
-  }
+  if (X[0] == ZERO)
+    {
+      __cpy (y, z, p);
+      return;
+    }
+  else if (Y[0] == ZERO)
+    {
+      __cpy (x, z, p);
+      return;
+    }
+
+  if (X[0] == Y[0])
+    {
+      if (__acr (x, y, p) > 0)
+	{
+	  add_magnitudes (x, y, z, p);
+	  Z[0] = X[0];
+	}
+      else
+	{
+	  add_magnitudes (y, x, z, p);
+	  Z[0] = Y[0];
+	}
+    }
+  else
+    {
+      if ((n = __acr (x, y, p)) == 1)
+	{
+	  sub_magnitudes (x, y, z, p);
+	  Z[0] = X[0];
+	}
+      else if (n == -1)
+	{
+	  sub_magnitudes (y, x, z, p);
+	  Z[0] = Y[0];
+	}
+      else
+	Z[0] = ZERO;
+    }
   return;
 }
 
 /* Subtract *Y from *X and return the result in *Z.  X and Y may overlap but
    not X and Z or Y and Z.  One guard digit is used.  The error is less than
    one ULP.  */
-void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
-
+void
+__sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
+{
   int n;
 
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  Z[0] = -Z[0];  return; }
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);                 return; }
-
-  if (X[0] != Y[0])    {
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
-  }
-  else                       {
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
-    else                      Z[0] = ZERO;
-  }
+  if (X[0] == ZERO)
+    {
+      __cpy (y, z, p);
+      Z[0] = -Z[0];
+      return;
+    }
+  else if (Y[0] == ZERO)
+    {
+      __cpy (x, z, p);
+      return;
+    }
+
+  if (X[0] != Y[0])
+    {
+      if (__acr (x, y, p) > 0)
+	{
+	  add_magnitudes (x, y, z, p);
+	  Z[0] = X[0];
+	}
+      else
+	{
+	  add_magnitudes (y, x, z, p);
+	  Z[0] = -Y[0];
+	}
+    }
+  else
+    {
+      if ((n = __acr (x, y, p)) == 1)
+	{
+	  sub_magnitudes (x, y, z, p);
+	  Z[0] = X[0];
+	}
+      else if (n == -1)
+	{
+	  sub_magnitudes (y, x, z, p);
+	  Z[0] = -Y[0];
+	}
+      else
+	Z[0] = ZERO;
+    }
   return;
 }
 
 /* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
    and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
    digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
-void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
-
+void
+__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
+{
   long i, i1, i2, j, k, k2;
   long p2 = p;
   double u, zk, zk2;
 
-                      /* Is z=0? */
-  if (X[0]*Y[0]==ZERO)
-     { Z[0]=ZERO;  return; }
+  /* Is z=0? */
+  if (X[0] * Y[0] == ZERO)
+    {
+      Z[0] = ZERO;
+      return;
+    }
 
-                       /* Multiply, add and carry */
-  k2 = (p2<3) ? p2+p2 : p2+3;
-  zk = Z[k2]=ZERO;
-  for (k=k2; k>1; ) {
-    if (k > p2)  {i1=k-p2; i2=p2+1; }
-    else        {i1=1;   i2=k;   }
-#if 1
-    /* Rearrange this inner loop to allow the fmadd instructions to be
-       independent and execute in parallel on processors that have
-       dual symmetrical FP pipelines.  */
-    if (i1 < (i2-1))
+  /* Multiply, add and carry */
+  k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
+  zk = Z[k2] = ZERO;
+  for (k = k2; k > 1;)
     {
-	/* Make sure we have at least 2 iterations.  */
-	if (((i2 - i1) & 1L) == 1L)
+      if (k > p2)
 	{
-                /* Handle the odd iterations case.  */
-		zk2 = x->d[i2-1]*y->d[i1];
+	  i1 = k - p2;
+	  i2 = p2 + 1;
 	}
-	else
-		zk2 = 0.0;
-	/* Do two multiply/adds per loop iteration, using independent
-           accumulators; zk and zk2.  */
-	for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2) 
+      else
 	{
-		zk += x->d[i]*y->d[j];
-		zk2 += x->d[i+1]*y->d[j-1];
+	  i1 = 1;
+	  i2 = k;
+	}
+#if 1
+      /* Rearrange this inner loop to allow the fmadd instructions to be
+         independent and execute in parallel on processors that have
+         dual symmetrical FP pipelines.  */
+      if (i1 < (i2 - 1))
+	{
+	  /* Make sure we have at least 2 iterations.  */
+	  if (((i2 - i1) & 1L) == 1L)
+	    {
+	      /* Handle the odd iterations case.  */
+	      zk2 = x->d[i2 - 1] * y->d[i1];
+	    }
+	  else
+	    zk2 = 0.0;
+	  /* Do two multiply/adds per loop iteration, using independent
+	     accumulators; zk and zk2.  */
+	  for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
+	    {
+	      zk += x->d[i] * y->d[j];
+	      zk2 += x->d[i + 1] * y->d[j - 1];
+	    }
+	  zk += zk2;		/* Final sum.  */
+	}
+      else
+	{
+	  /* Special case when iterations is 1.  */
+	  zk += x->d[i1] * y->d[i1];
 	}
-	zk += zk2; /* Final sum.  */
-    }
-    else
-    {
-        /* Special case when iterations is 1.  */
-	zk += x->d[i1]*y->d[i1];
-    }
 #else
-    /* The original code.  */
-    for (i=i1,j=i2-1; i<i2; i++,j--)  zk += X[i]*Y[j];
+      /* The original code.  */
+      for (i = i1, j = i2 - 1; i < i2; i++, j--)
+	zk += X[i] * Y[j];
 #endif
 
-    u = (zk + CUTTER)-CUTTER;
-    if  (u > zk)  u -= RADIX;
-    Z[k]  = zk - u;
-    zk = u*RADIXI;
-    --k;
-  }
+      u = (zk + CUTTER) - CUTTER;
+      if (u > zk)
+	u -= RADIX;
+      Z[k] = zk - u;
+      zk = u * RADIXI;
+      --k;
+    }
   Z[k] = zk;
 
   /* Is there a carry beyond the most significant digit?  */
-  if (Z[1] == ZERO) {
-    for (i=1; i<=p2; i++)  Z[i]=Z[i+1];
-    EZ = EX + EY - 1; }
+  if (Z[1] == ZERO)
+    {
+      for (i = 1; i <= p2; i++)
+	Z[i] = Z[i + 1];
+      EZ = EX + EY - 1;
+    }
   else
     EZ = EX + EY;
 
@@ -444,22 +694,31 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
    - For P > 3: 2.001 * R ^ (1 - P)
 
    *X = 0 is not permissible.  */
-void __inv(const mp_no *x, mp_no *y, int p) {
+void
+__inv (const mp_no *x, mp_no *y, int p)
+{
   long i;
   double t;
-  mp_no z,w;
-  static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
-                            4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
-
-  __cpy(x,&z,p);  z.e=0;  __mp_dbl(&z,&t,p);
-  t=ONE/t;   __dbl_mp(t,y,p);    EY -= EX;
-
-  for (i=0; i<np1[p]; i++) {
-    __cpy(y,&w,p);
-    __mul(x,&w,y,p);
-    __sub(&mptwo,y,&z,p);
-    __mul(&w,&z,y,p);
-  }
+  mp_no z, w;
+  static const int np1[] =
+    { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+  };
+
+  __cpy (x, &z, p);
+  z.e = 0;
+  __mp_dbl (&z, &t, p);
+  t = ONE / t;
+  __dbl_mp (t, y, p);
+  EY -= EX;
+
+  for (i = 0; i < np1[p]; i++)
+    {
+      __cpy (y, &w, p);
+      __mul (x, &w, y, p);
+      __sub (&mptwo, y, &z, p);
+      __mul (&w, &z, y, p);
+    }
   return;
 }
 
@@ -470,11 +729,17 @@ void __inv(const mp_no *x, mp_no *y, int p) {
    - For P > 3: 3.001 * R ^ (1 - P)
 
    *X = 0 is not permissible.  */
-void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
-
+void
+__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
+{
   mp_no w;
 
-  if (X[0] == ZERO)    Z[0] = ZERO;
-  else                {__inv(y,&w,p);   __mul(x,&w,z,p);}
+  if (X[0] == ZERO)
+    Z[0] = ZERO;
+  else
+    {
+      __inv (y, &w, p);
+      __mul (x, &w, z, p);
+    }
   return;
 }