1 files changed, 59 insertions, 40 deletions
diff --git a/src/math/log10.c b/src/math/log10.c
index ed65d9be..81026876 100644
--- a/src/math/log10.c
+++ b/src/math/log10.c
@@ -10,72 +10,91 @@
  * ====================================================
  */
 /*
- * Return the base 10 logarithm of x.  See e_log.c and k_log.h for most
- * comments.
+ * Return the base 10 logarithm of x.  See log.c for most comments.
  *
- *    log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2)
- * in not-quite-routine extra precision.
+ * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2
+ * as in log.c, then combine and scale in extra precision:
+ *    log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2)
  */
 
-#include "libm.h"
-#include "__log1p.h"
+#include <math.h>
+#include <stdint.h>
 
 static const double
-two54     = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
 ivln10hi  = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */
 ivln10lo  = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */
 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
-log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
+log10_2lo = 3.69423907715893078616e-13, /* 0x3D59FEF3, 0x11F12B36 */
+Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
+Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
+Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
+Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
+Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
+Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
+Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
 
 double log10(double x)
 {
-	double f,hfsq,hi,lo,r,val_hi,val_lo,w,y,y2;
-	int32_t i,k,hx;
-	uint32_t lx;
-
-	EXTRACT_WORDS(hx, lx, x);
+	union {double f; uint64_t i;} u = {x};
+	double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo;
+	uint32_t hx;
+	int k;
 
+	hx = u.i>>32;
 	k = 0;
-	if (hx < 0x00100000) {  /* x < 2**-1022  */
-		if (((hx&0x7fffffff)|lx) == 0)
-			return -two54/0.0;  /* log(+-0)=-inf */
-		if (hx<0)
-			return (x-x)/0.0;   /* log(-#) = NaN */
-		/* subnormal number, scale up x */
+	if (hx < 0x00100000 || hx>>31) {
+		if (u.i<<1 == 0)
+			return -1/(x*x);  /* log(+-0)=-inf */
+		if (hx>>31)
+			return (x-x)/0.0; /* log(-#) = NaN */
+		/* subnormal number, scale x up */
 		k -= 54;
-		x *= two54;
-		GET_HIGH_WORD(hx, x);
-	}
-	if (hx >= 0x7ff00000)
-		return x+x;
-	if (hx == 0x3ff00000 && lx == 0)
-		return 0.0;  /* log(1) = +0 */
-	k += (hx>>20) - 1023;
-	hx &= 0x000fffff;
-	i = (hx+0x95f64)&0x100000;
-	SET_HIGH_WORD(x, hx|(i^0x3ff00000));  /* normalize x or x/2 */
-	k += i>>20;
-	y = (double)k;
+		x *= 0x1p54;
+		u.f = x;
+		hx = u.i>>32;
+	} else if (hx >= 0x7ff00000) {
+		return x;
+	} else if (hx == 0x3ff00000 && u.i<<32 == 0)
+		return 0;
+
+	/* reduce x into [sqrt(2)/2, sqrt(2)] */
+	hx += 0x3ff00000 - 0x3fe6a09e;
+	k += (int)(hx>>20) - 0x3ff;
+	hx = (hx&0x000fffff) + 0x3fe6a09e;
+	u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
+	x = u.f;
+
 	f = x - 1.0;
 	hfsq = 0.5*f*f;
-	r = __log1p(f);
+	s = f/(2.0+f);
+	z = s*s;
+	w = z*z;
+	t1 = w*(Lg2+w*(Lg4+w*Lg6));
+	t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+	R = t2 + t1;
 
 	/* See log2.c for details. */
+	/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
 	hi = f - hfsq;
-	SET_LOW_WORD(hi, 0);
-	lo = (f - hi) - hfsq + r;
+	u.f = hi;
+	u.i &= (uint64_t)-1<<32;
+	hi = u.f;
+	lo = f - hi - hfsq + s*(hfsq+R);
+
+	/* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
 	val_hi = hi*ivln10hi;
-	y2 = y*log10_2hi;
-	val_lo = y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi;
+	dk = k;
+	y = dk*log10_2hi;
+	val_lo = dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi;
 
 	/*
-	 * Extra precision in for adding y*log10_2hi is not strictly needed
+	 * Extra precision in for adding y is not strictly needed
 	 * since there is no very large cancellation near x = sqrt(2) or
 	 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
 	 * with some parallelism and it reduces the error for many args.
 	 */
-	w = y2 + val_hi;
-	val_lo += (y2 - w) + val_hi;
+	w = y + val_hi;
+	val_lo += (y - w) + val_hi;
 	val_hi = w;
 
 	return val_lo + val_hi;