about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2013-01-01 21:59:46 +0100
committerSzabolcs Nagy <nsz@port70.net>2013-01-01 21:59:46 +0100
commit697acde67e0da4d73b46445ed536fe9923d515c7 (patch)
treebecd1024b588787ad27232af7152a6139baf4a21 /src
parentd18a410bbf259e5fee9fb8b4b0335ec64991d5db (diff)
downloadmusl-697acde67e0da4d73b46445ed536fe9923d515c7.tar.gz
musl-697acde67e0da4d73b46445ed536fe9923d515c7.tar.xz
musl-697acde67e0da4d73b46445ed536fe9923d515c7.zip
math: bessel cleanup (j0.c and j0f.c)
a common code path in j0 and y0 was factored out so the resulting
object code is smaller

unsigned int arithmetics is used for bit manipulation

the logic of j0 got a bit simplified (x < 1 case was handled
separately with a bit higher precision than now, but there are large
errors in other domains anyway so that branch has been removed)

some threshold values were adjusted in j0f and y0f
Diffstat (limited to 'src')
-rw-r--r--src/math/j0.c193
-rw-r--r--src/math/j0f.c171
2 files changed, 161 insertions, 203 deletions
diff --git a/src/math/j0.c b/src/math/j0.c
index 986968e8..b281e136 100644
--- a/src/math/j0.c
+++ b/src/math/j0.c
@@ -59,10 +59,46 @@
 static double pzero(double), qzero(double);
 
 static const double
-huge      = 1e300,
 invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
-tpi       = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
+tpi       = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */
+
+/* common method when |x|>=2 */
+static double common(uint32_t ix, double x, int y0)
+{
+	double s,c,ss,cc,z;
+
+	/*
+	 * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x-pi/4)-q0(x)*sin(x-pi/4))
+	 * y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x-pi/4)+q0(x)*cos(x-pi/4))
+	 *
+	 * sin(x-pi/4) = (sin(x) - cos(x))/sqrt(2)
+	 * cos(x-pi/4) = (sin(x) + cos(x))/sqrt(2)
+	 * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+	 */
+	s = sin(x);
+	c = cos(x);
+	if (y0)
+		c = -c;
+	cc = s+c;
+	/* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
+	if (ix < 0x7fe00000) {
+		ss = s-c;
+		z = -cos(2*x);
+		if (s*c < 0)
+			cc = z/ss;
+		else
+			ss = z/cc;
+		if (ix < 0x48000000) {
+			if (y0)
+				ss = -ss;
+			cc = pzero(x)*cc-qzero(x)*ss;
+		}
+	}
+	return invsqrtpi*cc/sqrt(x);
+}
+
 /* R0/S0 on [0, 2.00] */
+static const double
 R02 =  1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
 R03 = -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
 R04 =  1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
@@ -74,56 +110,37 @@ S04 =  1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */
 
 double j0(double x)
 {
-	double z, s,c,ss,cc,r,u,v;
-	int32_t hx,ix;
+	double z,r,s;
+	uint32_t ix;
 
-	GET_HIGH_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_HIGH_WORD(ix, x);
+	ix &= 0x7fffffff;
+
+	/* j0(+-inf)=0, j0(nan)=nan */
 	if (ix >= 0x7ff00000)
-		return 1.0/(x*x);
+		return 1/(x*x);
 	x = fabs(x);
-	if (ix >= 0x40000000) {  /* |x| >= 2.0 */
-		s = sin(x);
-		c = cos(x);
-		ss = s-c;
-		cc = s+c;
-		if (ix < 0x7fe00000) {  /* make sure x+x does not overflow */
-			z = -cos(x+x);
-			if (s*c < 0.0)
-				cc = z/ss;
-			else
-				ss = z/cc;
-		}
-		/*
-		 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
-		 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
-		 */
-		if (ix > 0x48000000)
-			z = (invsqrtpi*cc)/sqrt(x);
-		else {
-			u = pzero(x);
-			v = qzero(x);
-			z = invsqrtpi*(u*cc-v*ss)/sqrt(x);
-		}
-		return z;
-	}
-	if (ix < 0x3f200000) {  /* |x| < 2**-13 */
-		/* raise inexact if x != 0 */
-		if (huge+x > 1.0) {
-			if (ix < 0x3e400000)  /* |x| < 2**-27 */
-				return 1.0;
-			return 1.0 - 0.25*x*x;
-		}
+
+	if (ix >= 0x40000000) {  /* |x| >= 2 */
+		/* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
+		return common(ix,x,0);
 	}
-	z = x*x;
-	r = z*(R02+z*(R03+z*(R04+z*R05)));
-	s = 1.0+z*(S01+z*(S02+z*(S03+z*S04)));
-	if (ix < 0x3FF00000) {   /* |x| < 1.00 */
-		return 1.0 + z*(-0.25+(r/s));
-	} else {
-		u = 0.5*x;
-		return (1.0+u)*(1.0-u) + z*(r/s);
+
+	/* 1 - x*x/4 + x*x*R(x^2)/S(x^2) */
+	if (ix >= 0x3f200000) {  /* |x| >= 2**-13 */
+		/* up to 4ulp error close to 2 */
+		z = x*x;
+		r = z*(R02+z*(R03+z*(R04+z*R05)));
+		s = 1+z*(S01+z*(S02+z*(S03+z*S04)));
+		return (1+x/2)*(1-x/2) + z*(r/s);
 	}
+
+	/* 1 - x*x/4 */
+	/* prevent underflow */
+	/* inexact should be raised when x!=0, this is not done correctly */
+	if (ix >= 0x38000000)  /* |x| >= 2**-127 */
+		x = 0.25*x*x;
+	return 1 - x;
 }
 
 static const double
@@ -141,61 +158,33 @@ v04  =  4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
 
 double y0(double x)
 {
-	double z,s,c,ss,cc,u,v;
-	int32_t hx,ix,lx;
+	double z,u,v;
+	uint32_t ix,lx;
 
-	EXTRACT_WORDS(hx, lx, x);
-	ix = 0x7fffffff & hx;
-	/* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0  */
+	EXTRACT_WORDS(ix, lx, x);
+
+	/* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
+	if ((ix<<1 | lx) == 0)
+		return -1/0.0;
+	if (ix>>31)
+		return 0/0.0;
 	if (ix >= 0x7ff00000)
-		return 1.0/(x+x*x);
-	if ((ix|lx) == 0)
-		return -1.0/0.0;
-	if (hx < 0)
-		return 0.0/0.0;
-	if (ix >= 0x40000000) {  /* |x| >= 2.0 */
-		/* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
-		 * where x0 = x-pi/4
-		 *      Better formula:
-		 *              cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
-		 *                      =  1/sqrt(2) * (sin(x) + cos(x))
-		 *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
-		 *                      =  1/sqrt(2) * (sin(x) - cos(x))
-		 * To avoid cancellation, use
-		 *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
-		 * to compute the worse one.
-		 */
-		s = sin(x);
-		c = cos(x);
-		ss = s-c;
-		cc = s+c;
-		/*
-		 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
-		 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
-		 */
-		if (ix < 0x7fe00000) {  /* make sure x+x does not overflow */
-			z = -cos(x+x);
-			if (s*c < 0.0)
-				cc = z/ss;
-			else
-				ss = z/cc;
-		}
-		if (ix > 0x48000000)
-			z = (invsqrtpi*ss)/sqrt(x);
-		else {
-			u = pzero(x);
-			v = qzero(x);
-			z = invsqrtpi*(u*ss+v*cc)/sqrt(x);
-		}
-		return z;
+		return 1/x;
+
+	if (ix >= 0x40000000) {  /* x >= 2 */
+		/* large ulp errors near zeros: 3.958, 7.086,.. */
+		return common(ix,x,1);
 	}
-	if (ix <= 0x3e400000) {  /* x < 2**-27 */
-		return u00 + tpi*log(x);
+
+	/* U(x^2)/V(x^2) + (2/pi)*j0(x)*log(x) */
+	if (ix >= 0x3e400000) {  /* x >= 2**-27 */
+		/* large ulp error near the first zero, x ~= 0.89 */
+		z = x*x;
+		u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
+		v = 1.0+z*(v01+z*(v02+z*(v03+z*v04)));
+		return u/v + tpi*(j0(x)*log(x));
 	}
-	z = x*x;
-	u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
-	v = 1.0+z*(v01+z*(v02+z*(v03+z*v04)));
-	return u/v + tpi*(j0(x)*log(x));
+	return u00 + tpi*log(x);
 }
 
 /* The asymptotic expansions of pzero is
@@ -275,14 +264,14 @@ static double pzero(double x)
 {
 	const double *p,*q;
 	double z,r,s;
-	int32_t ix;
+	uint32_t ix;
 
 	GET_HIGH_WORD(ix, x);
 	ix &= 0x7fffffff;
 	if      (ix >= 0x40200000){p = pR8; q = pS8;}
 	else if (ix >= 0x40122E8B){p = pR5; q = pS5;}
 	else if (ix >= 0x4006DB6D){p = pR3; q = pS3;}
-	else if (ix >= 0x40000000){p = pR2; q = pS2;}
+	else /*ix >= 0x40000000*/ {p = pR2; q = pS2;}
 	z = 1.0/(x*x);
 	r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
 	s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
@@ -371,14 +360,14 @@ static double qzero(double x)
 {
 	const double *p,*q;
 	double s,r,z;
-	int32_t ix;
+	uint32_t ix;
 
 	GET_HIGH_WORD(ix, x);
 	ix &= 0x7fffffff;
 	if      (ix >= 0x40200000){p = qR8; q = qS8;}
 	else if (ix >= 0x40122E8B){p = qR5; q = qS5;}
 	else if (ix >= 0x4006DB6D){p = qR3; q = qS3;}
-	else if (ix >= 0x40000000){p = qR2; q = qS2;}
+	else /*ix >= 0x40000000*/ {p = qR2; q = qS2;}
 	z = 1.0/(x*x);
 	r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
 	s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
diff --git a/src/math/j0f.c b/src/math/j0f.c
index 2ee2824b..79bab62a 100644
--- a/src/math/j0f.c
+++ b/src/math/j0f.c
@@ -18,10 +18,39 @@
 static float pzerof(float), qzerof(float);
 
 static const float
-huge      = 1e30,
 invsqrtpi = 5.6418961287e-01, /* 0x3f106ebb */
-tpi       = 6.3661974669e-01, /* 0x3f22f983 */
+tpi       = 6.3661974669e-01; /* 0x3f22f983 */
+
+static float common(uint32_t ix, float x, int y0)
+{
+	float z,s,c,ss,cc;
+	/*
+	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
+	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
+	 */
+	s = sinf(x);
+	c = cosf(x);
+	if (y0)
+		c = -c;
+	cc = s+c;
+	if (ix < 0x7f000000) {
+		ss = s-c;
+		z = -cosf(2*x);
+		if (s*c < 0)
+			cc = z/ss;
+		else
+			ss = z/cc;
+		if (ix < 0x58800000) {
+			if (y0)
+				ss = -ss;
+			cc = pzerof(x)*cc-qzerof(x)*ss;
+		}
+	}
+	return invsqrtpi*cc/sqrtf(x);
+}
+
 /* R0/S0 on [0, 2.00] */
+static const float
 R02 =  1.5625000000e-02, /* 0x3c800000 */
 R03 = -1.8997929874e-04, /* 0xb947352e */
 R04 =  1.8295404516e-06, /* 0x35f58e88 */
@@ -33,56 +62,29 @@ S04 =  1.1661400734e-09; /* 0x30a045e8 */
 
 float j0f(float x)
 {
-	float z, s,c,ss,cc,r,u,v;
-	int32_t hx,ix;
+	float z,r,s;
+	uint32_t ix;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_FLOAT_WORD(ix, x);
+	ix &= 0x7fffffff;
 	if (ix >= 0x7f800000)
-		return 1.0f/(x*x);
+		return 1/(x*x);
 	x = fabsf(x);
-	if (ix >= 0x40000000) {  /* |x| >= 2.0 */
-		s = sinf(x);
-		c = cosf(x);
-		ss = s-c;
-		cc = s+c;
-		if (ix < 0x7f000000) {  /* make sure x+x does not overflow */
-			z = -cosf(x+x);
-			if (s*c < 0.0f)
-				cc = z/ss;
-			else
-				ss = z/cc;
-		}
-		/*
-		 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
-		 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
-		 */
-		if (ix > 0x80000000)
-			z = (invsqrtpi*cc)/sqrtf(x);
-		else {
-			u = pzerof(x);
-			v = qzerof(x);
-			z = invsqrtpi*(u*cc-v*ss)/sqrtf(x);
-		}
-		return z;
-	}
-	if (ix < 0x39000000) {  /* |x| < 2**-13 */
-		/* raise inexact if x != 0 */
-		if (huge+x > 1.0f) {
-			if (ix < 0x32000000)  /* |x| < 2**-27 */
-				return 1.0f;
-			return 1.0f - 0.25f*x*x;
-		}
+
+	if (ix >= 0x40000000) {  /* |x| >= 2 */
+		/* large ulp error near zeros */
+		return common(ix, x, 0);
 	}
-	z = x*x;
-	r =  z*(R02+z*(R03+z*(R04+z*R05)));
-	s =  1.0f+z*(S01+z*(S02+z*(S03+z*S04)));
-	if(ix < 0x3F800000) {   /* |x| < 1.00 */
-		return 1.0f + z*(-0.25f + (r/s));
-	} else {
-		u = 0.5f*x;
-		return (1.0f+u)*(1.0f-u) + z*(r/s);
+	if (ix >= 0x3a000000) {  /* |x| >= 2**-11 */
+		/* up to 4ulp error near 2 */
+		z = x*x;
+		r = z*(R02+z*(R03+z*(R04+z*R05)));
+		s = 1+z*(S01+z*(S02+z*(S03+z*S04)));
+		return (1+x/2)*(1-x/2) + z*(r/s);
 	}
+	if (ix >= 0x21800000)  /* |x| >= 2**-60 */
+		x = 0.25f*x*x;
+	return 1 - x;
 }
 
 static const float
@@ -100,61 +102,28 @@ v04  =  4.4111031494e-10; /* 0x2ff280c2 */
 
 float y0f(float x)
 {
-	float z,s,c,ss,cc,u,v;
-	int32_t hx,ix;
+	float z,u,v;
+	uint32_t ix;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = 0x7fffffff & hx;
-	/* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0  */
+	GET_FLOAT_WORD(ix, x);
+	if ((ix & 0x7fffffff) == 0)
+		return -1/0.0f;
+	if (ix>>31)
+		return 0/0.0f;
 	if (ix >= 0x7f800000)
-		return 1.0f/(x+x*x);
-	if (ix == 0)
-		return -1.0f/0.0f;
-	if (hx < 0)
-		return 0.0f/0.0f;
+		return 1/x;
 	if (ix >= 0x40000000) {  /* |x| >= 2.0 */
-		/* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
-		 * where x0 = x-pi/4
-		 *      Better formula:
-		 *              cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
-		 *                      =  1/sqrt(2) * (sin(x) + cos(x))
-		 *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
-		 *                      =  1/sqrt(2) * (sin(x) - cos(x))
-		 * To avoid cancellation, use
-		 *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
-		 * to compute the worse one.
-		 */
-		s = sinf(x);
-		c = cosf(x);
-		ss = s-c;
-		cc = s+c;
-		/*
-		 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
-		 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
-		 */
-		if (ix < 0x7f000000) {  /* make sure x+x not overflow */
-			z = -cosf(x+x);
-			if (s*c < 0.0f)
-				cc = z/ss;
-			else
-				ss = z/cc;
-		}
-		if (ix > 0x80000000)
-			z = (invsqrtpi*ss)/sqrtf(x);
-		else {
-			u = pzerof(x);
-			v = qzerof(x);
-			z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
-		}
-		return z;
+		/* large ulp error near zeros */
+		return common(ix,x,1);
 	}
-	if (ix <= 0x32000000) {  /* x < 2**-27 */
-		return u00 + tpi*logf(x);
+	if (ix >= 0x39000000) {  /* x >= 2**-13 */
+		/* large ulp error at x ~= 0.89 */
+		z = x*x;
+		u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
+		v = 1+z*(v01+z*(v02+z*(v03+z*v04)));
+		return u/v + tpi*(j0f(x)*logf(x));
 	}
-	z = x*x;
-	u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
-	v = 1.0f+z*(v01+z*(v02+z*(v03+z*v04)));
-	return u/v + tpi*(j0f(x)*logf(x));
+	return u00 + tpi*logf(x);
 }
 
 /* The asymptotic expansions of pzero is
@@ -233,14 +202,14 @@ static float pzerof(float x)
 {
 	const float *p,*q;
 	float z,r,s;
-	int32_t ix;
+	uint32_t ix;
 
 	GET_FLOAT_WORD(ix, x);
 	ix &= 0x7fffffff;
 	if      (ix >= 0x41000000){p = pR8; q = pS8;}
 	else if (ix >= 0x40f71c58){p = pR5; q = pS5;}
 	else if (ix >= 0x4036db68){p = pR3; q = pS3;}
-	else if (ix >= 0x40000000){p = pR2; q = pS2;}
+	else /*ix >= 0x40000000*/ {p = pR2; q = pS2;}
 	z = 1.0f/(x*x);
 	r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
 	s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
@@ -329,14 +298,14 @@ static float qzerof(float x)
 {
 	const float *p,*q;
 	float s,r,z;
-	int32_t ix;
+	uint32_t ix;
 
 	GET_FLOAT_WORD(ix, x);
 	ix &= 0x7fffffff;
 	if      (ix >= 0x41000000){p = qR8; q = qS8;}
 	else if (ix >= 0x40f71c58){p = qR5; q = qS5;}
 	else if (ix >= 0x4036db68){p = qR3; q = qS3;}
-	else if (ix >= 0x40000000){p = qR2; q = qS2;}
+	else /*ix >= 0x40000000*/ {p = qR2; q = qS2;}
 	z = 1.0f/(x*x);
 	r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
 	s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));