about summary refs log tree commit diff
path: root/sysdeps/libm-ieee754/e_pow.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/libm-ieee754/e_pow.c')
-rw-r--r--sysdeps/libm-ieee754/e_pow.c160
1 files changed, 102 insertions, 58 deletions
diff --git a/sysdeps/libm-ieee754/e_pow.c b/sysdeps/libm-ieee754/e_pow.c
index 4b9ba3d5eb..02b16c5171 100644
--- a/sysdeps/libm-ieee754/e_pow.c
+++ b/sysdeps/libm-ieee754/e_pow.c
@@ -9,6 +9,9 @@
  * is preserved.
  * ====================================================
  */
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+   for performance improvement on pipelined processors.
+*/
 
 #if defined(LIBM_SCCS) && !defined(lint)
 static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
@@ -61,6 +64,33 @@ static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
 
 #include "math.h"
 #include "math_private.h"
+#define zero      C[0]
+#define one	  C[1]
+#define two	  C[2]
+#define two53	  C[3]
+#define huge	  C[4]
+#define tiny      C[5]
+#define L1        C[6]
+#define L2        C[7]
+#define L3        C[8]
+#define L4        C[9]
+#define L5        C[10]
+#define L6        C[11]
+#define P1        C[12]
+#define P2        C[13]
+#define P3        C[14]
+#define P4        C[15]
+#define P5        C[16]
+#define lg2       C[17]
+#define lg2_h     C[18]
+#define lg2_l     C[19]
+#define ovt       C[20]
+#define cp        C[21]
+#define cp_h      C[22]
+#define cp_l      C[23]
+#define ivln2     C[24]
+#define ivln2_h   C[25]
+#define ivln2_l   C[26]
 
 #ifdef __STDC__
 static const double
@@ -70,34 +100,34 @@ static double
 bp[] = {1.0, 1.5,},
 dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
 dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
-zero    =  0.0,
-one	=  1.0,
-two	=  2.0,
-two53	=  9007199254740992.0,	/* 0x43400000, 0x00000000 */
-huge	=  1.0e300,
-tiny    =  1.0e-300,
-	/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
-L1  =  5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */
-L2  =  4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */
-L3  =  3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */
-L4  =  2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */
-L5  =  2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */
-L6  =  2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */
-P1   =  1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
-P2   = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
-P3   =  6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
-P4   = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
-P5   =  4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
-lg2  =  6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
-lg2_h  =  6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
-lg2_l  = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */
-ovt =  8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */
-cp    =  9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
-cp_h  =  9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */
-cp_l  = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
-ivln2    =  1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
-ivln2_h  =  1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
-ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
+C[] = {
+0.0,
+1.0,
+2.0,
+9007199254740992.0	,
+1.0e300,
+1.0e-300,
+5.99999999999994648725e-01 ,
+4.28571428578550184252e-01 ,
+3.33333329818377432918e-01 ,
+2.72728123808534006489e-01 ,
+2.30660745775561754067e-01 ,
+2.06975017800338417784e-01 ,
+1.66666666666666019037e-01 ,
+-2.77777777770155933842e-03 ,
+6.61375632143793436117e-05 ,
+-1.65339022054652515390e-06 ,
+4.13813679705723846039e-08 ,
+6.93147180559945286227e-01 ,
+6.93147182464599609375e-01 ,
+-1.90465429995776804525e-09 ,
+8.0085662595372944372e-0017 ,
+9.61796693925975554329e-01 ,
+9.61796700954437255859e-01 ,
+-7.02846165095275826516e-09 ,
+1.44269504088896338700e+00 ,
+1.44269502162933349609e+00 ,
+1.92596299112661746887e-08 };
 
 #ifdef __STDC__
 	double __ieee754_pow(double x, double y)
@@ -107,7 +137,7 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 #endif
 {
 	double z,ax,z_h,z_l,p_h,p_l;
-	double y1,t1,t2,r,s,t,u,v,w;
+	double y1,t1,t2,r,s,t,u,v,w, t12,t14,r_1,r_2,r_3;
 	int32_t i,j,k,yisint,n;
 	int32_t hx,hy,ix,iy;
 	u_int32_t lx,ly;
@@ -117,7 +147,7 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 	ix = hx&0x7fffffff;  iy = hy&0x7fffffff;
 
     /* y==zero: x**0 = 1 */
-	if((iy|ly)==0) return one;
+	if((iy|ly)==0) return C[1];
 
     /* +-NaN return x+y */
 	if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
@@ -150,12 +180,12 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 	        if(((ix-0x3ff00000)|lx)==0)
 		    return  y - y;	/* inf**+-1 is NaN */
 	        else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */
-		    return (hy>=0)? y: zero;
+		    return (hy>=0)? y: C[0];
 	        else			/* (|x|<1)**-,+inf = inf,0 */
-		    return (hy<0)?-y: zero;
+		    return (hy<0)?-y: C[0];
 	    }
 	    if(iy==0x3ff00000) {	/* y is  +-1 */
-		if(hy<0) return one/x; else return x;
+		if(hy<0) return C[1]/x; else return x;
 	    }
 	    if(hy==0x40000000) return x*x; /* y is  2 */
 	    if(hy==0x3fe00000) {	/* y is  0.5 */
@@ -169,7 +199,7 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 	if(lx==0) {
 	    if(ix==0x7ff00000||ix==0||ix==0x3ff00000){
 		z = ax;			/*x is +-0,+-inf,+-1*/
-		if(hy<0) z = one/z;	/* z = (1/|x|) */
+		if(hy<0) z = C[1]/z;	/* z = (1/|x|) */
 		if(hx<0) {
 		    if(((ix-0x3ff00000)|yisint)==0) {
 			z = (z-z)/(z-z); /* (-1)**non-int is NaN */
@@ -186,27 +216,27 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
     /* |y| is huge */
 	if(iy>0x41e00000) { /* if |y| > 2**31 */
 	    if(iy>0x43f00000){	/* if |y| > 2**64, must o/uflow */
-		if(ix<=0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
-		if(ix>=0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
+		if(ix<=0x3fefffff) return (hy<0)? C[4]*C[4]:C[5]*C[5];
+		if(ix>=0x3ff00000) return (hy>0)? C[4]*C[4]:C[5]*C[5];
 	    }
 	/* over/underflow if x is not close to one */
-	    if(ix<0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
-	    if(ix>0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
+	    if(ix<0x3fefffff) return (hy<0)? C[4]*C[4]:C[5]*C[5];
+	    if(ix>0x3ff00000) return (hy>0)? C[4]*C[4]:C[5]*C[5];
 	/* now |1-x| is tiny <= 2**-20, suffice to compute
 	   log(x) by x-x^2/2+x^3/3-x^4/4 */
 	    t = x-1;		/* t has 20 trailing zeros */
 	    w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25));
-	    u = ivln2_h*t;	/* ivln2_h has 21 sig. bits */
-	    v = t*ivln2_l-w*ivln2;
+	    u = C[25]*t;	/* ivln2_h has 21 sig. bits */
+	    v = t*C[26]-w*C[24];
 	    t1 = u+v;
 	    SET_LOW_WORD(t1,0);
 	    t2 = v-(t1-u);
 	} else {
-	    double s2,s_h,s_l,t_h,t_l;
+	    double s2,s_h,s_l,t_h,t_l,s22,s24,s26,r1,r2,r3;
 	    n = 0;
 	/* take care subnormal number */
 	    if(ix<0x00100000)
-		{ax *= two53; n -= 53; GET_HIGH_WORD(ix,ax); }
+		{ax *= C[3]; n -= 53; GET_HIGH_WORD(ix,ax); }
 	    n  += ((ix)>>20)-0x3ff;
 	    j  = ix&0x000fffff;
 	/* determine interval */
@@ -218,18 +248,25 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 
 	/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 	    u = ax-bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
-	    v = one/(ax+bp[k]);
+	    v = C[1]/(ax+bp[k]);
 	    s = u*v;
 	    s_h = s;
 	    SET_LOW_WORD(s_h,0);
 	/* t_h=ax+bp[k] High */
-	    t_h = zero;
+	    t_h = C[0];
 	    SET_HIGH_WORD(t_h,((ix>>1)|0x20000000)+0x00080000+(k<<18));
 	    t_l = ax - (t_h-bp[k]);
 	    s_l = v*((u-s_h*t_h)-s_h*t_l);
 	/* compute log(ax) */
 	    s2 = s*s;
+#ifdef DO_NOT_USE_THIS
 	    r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
+#else
+	    r1 = C[10]+s2*C[11]; s22=s2*s2;
+	    r2 = C[8]+s2*C[9]; s24=s22*s22;
+	    r3 = C[6]+s2*C[7]; s26=s24*s22;
+            r = r3*s22 + r2*s24 + r1*s26;
+#endfi
 	    r += s_l*(s_h+s);
 	    s2  = s_h*s_h;
 	    t_h = 3.0+s2+r;
@@ -242,8 +279,8 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 	    p_h = u+v;
 	    SET_LOW_WORD(p_h,0);
 	    p_l = v-(p_h-u);
-	    z_h = cp_h*p_h;		/* cp_h+cp_l = 2/(3*log2) */
-	    z_l = cp_l*p_h+p_l*cp+dp_l[k];
+	    z_h = C[22]*p_h;		/* cp_h+cp_l = 2/(3*log2) */
+	    z_l = C[23]*p_h+p_l*C[21]+dp_l[k];
 	/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 	    t = (double)n;
 	    t1 = (((z_h+z_l)+dp_h[k])+t);
@@ -251,9 +288,9 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 	    t2 = z_l-(((t1-t)-dp_h[k])-z_h);
 	}
 
-	s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
+	s = C[1]; /* s (sign of result -ve**odd) = -1 else = 1 */
 	if(((((u_int32_t)hx>>31)-1)|(yisint-1))==0)
-	    s = -one;/* (-ve)**(odd int) */
+	    s = -C[1];/* (-ve)**(odd int) */
 
     /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 	y1  = y;
@@ -264,15 +301,15 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 	EXTRACT_WORDS(j,i,z);
 	if (j>=0x40900000) {				/* z >= 1024 */
 	    if(((j-0x40900000)|i)!=0)			/* if z > 1024 */
-		return s*huge*huge;			/* overflow */
+		return s*C[4]*C[4];			/* overflow */
 	    else {
-		if(p_l+ovt>z-p_h) return s*huge*huge;	/* overflow */
+		if(p_l+C[20]>z-p_h) return s*C[4]*C[4];	/* overflow */
 	    }
 	} else if((j&0x7fffffff)>=0x4090cc00 ) {	/* z <= -1075 */
 	    if(((j-0xc090cc00)|i)!=0) 		/* z < -1075 */
-		return s*tiny*tiny;		/* underflow */
+		return s*C[5]*C[5];		/* underflow */
 	    else {
-		if(p_l<=z-p_h) return s*tiny*tiny;	/* underflow */
+		if(p_l<=z-p_h) return s*C[5]*C[5];	/* underflow */
 	    }
 	}
     /*
@@ -284,7 +321,7 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 	if(i>0x3fe00000) {		/* if |z| > 0.5, set n = [z+0.5] */
 	    n = j+(0x00100000>>(k+1));
 	    k = ((n&0x7fffffff)>>20)-0x3ff;	/* new k for n */
-	    t = zero;
+	    t = C[0];
 	    SET_HIGH_WORD(t,n&~(0x000fffff>>k));
 	    n = ((n&0x000fffff)|0x00100000)>>(20-k);
 	    if(j<0) n = -n;
@@ -292,14 +329,21 @@ ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 	}
 	t = p_l+p_h;
 	SET_LOW_WORD(t,0);
-	u = t*lg2_h;
-	v = (p_l-(t-p_h))*lg2+t*lg2_l;
+	u = t*C[18];
+	v = (p_l-(t-p_h))*C[17]+t*C[19];
 	z = u+v;
 	w = v-(z-u);
 	t  = z*z;
-	t1  = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
-	r  = (z*t1)/(t1-two)-(w+z*w);
-	z  = one-(r-z);
+#ifdef DO_NOT_USE_THIS
+	t1  = z - t*(C[12]+t*(C[13]+t*(C[14]+t*(C[15]+t*C[16]))));
+#else
+	r_1 = C[15]+t*C[16]; t12 = t*t;
+	r_2 = C[13]+t*C[14]; t14 = t12*t12;
+	r_3 = t*C[12];
+	t1 = z - r_3 - t12*r_2 - t14*r_1;
+#endif
+	r  = (z*t1)/(t1-C[2])-(w+z*w);
+	z  = C[1]-(r-z);
 	GET_HIGH_WORD(j,z);
 	j += (n<<20);
 	if((j>>20)<=0) z = __scalbn(z,n);	/* subnormal output */