about summary refs log tree commit diff
path: root/src/internal/floatscan.c
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2012-04-10 11:52:55 -0400
committerRich Felker <dalias@aerifal.cx>2012-04-10 11:52:55 -0400
commit415c4cd7fdb3e8b7476fbb2be2390f4592cf5165 (patch)
tree8dd9bb181dd259227dd6101633f81880cd0157f3 /src/internal/floatscan.c
parent3be616c1df7ee176b5e00b9f493136ca7385ec46 (diff)
downloadmusl-415c4cd7fdb3e8b7476fbb2be2390f4592cf5165.tar.gz
musl-415c4cd7fdb3e8b7476fbb2be2390f4592cf5165.tar.xz
musl-415c4cd7fdb3e8b7476fbb2be2390f4592cf5165.zip
new floating point parser/converter
this version is intended to be fully conformant to the ISO C, POSIX,
and IEEE standards for conversion of decimal/hex floating point
strings to float, double, and long double (ld64 or ld80 only at
present) values. in particular, all results are intended to be rounded
correctly according to the current rounding mode. further, this
implementation aims to set the floating point underflow, overflow, and
inexact flags to reflect the conversion performed.

a moderate amount of testing has been performed (by nsz and myself)
prior to integration of the code in musl, but it still may have bugs.

so far, only strto(d|ld|f) use the new code. scanf integration will be
done as a separate commit, and i will add implementations of the wide
character functions later.
Diffstat (limited to 'src/internal/floatscan.c')
-rw-r--r--src/internal/floatscan.c438
1 files changed, 438 insertions, 0 deletions
diff --git a/src/internal/floatscan.c b/src/internal/floatscan.c
new file mode 100644
index 00000000..15ad5e12
--- /dev/null
+++ b/src/internal/floatscan.c
@@ -0,0 +1,438 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+
+#include "floatscan.h"
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+
+#define LD_B1B_DIG 2
+#define LD_B1B_MAX 9007199, 254740991
+#define KMAX 128
+
+#else /* LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 */
+
+#define LD_B1B_DIG 3
+#define LD_B1B_MAX 18, 446744073, 709551615
+#define KMAX 2048
+
+#endif
+
+#define MASK (KMAX-1)
+
+
+#if 1
+#include "stdio_impl.h"
+#undef ungetc
+#define ungetc(c,f) ((f)->rpos--,(c))
+#undef getc
+#define getc getc_unlocked
+#endif
+
+
+static long long scanexp(FILE *f, off_t *pcnt)
+{
+	int c;
+	int x;
+	long long y;
+	int neg = 0;
+	
+	*pcnt += (c=getc(f))>=0;
+	if (c=='+' || c=='-') {
+		neg = (c=='-');
+		*pcnt += (c=getc(f))>=0;
+		if (c-'0'>=10U) {
+			if (c>=0) {
+				ungetc(c, f);
+				--*pcnt;
+			}
+			return LLONG_MIN;
+		}
+	}
+	for (x=0; c-'0'<10U && x<INT_MAX/10; *pcnt += (c=getc(f))>=0)
+		x = 10*x + c-'0';
+	for (y=x; c-'0'<10U && x<LLONG_MAX/10; *pcnt += (c=getc(f))>=0)
+		y = 10*y + c-'0';
+	for (; c-'0'<10U; *pcnt += (c=getc(f))>=0);
+	if (c>=0) {
+		ungetc(c, f);
+		--*pcnt;
+	}
+	return neg ? -y : y;
+}
+
+
+static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
+{
+	uint32_t x[KMAX];
+	static const uint32_t th[] = { LD_B1B_MAX };
+	int i, j, k, a, z;
+	long long lrp=-1, dc=0;
+	int gotdig = 0;
+	int rp;
+	int e10=0;
+	int e2;
+	long double y;
+	long double frac=0;
+	long double bias=0;
+
+	j=0;
+	k=0;
+
+	if (c<0) *pcnt += (c=getc(f))>=0;
+
+	/* Don't let leading zeros consume buffer space */
+	for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig=1;
+
+	x[0] = 0;
+	for (; c-'0'<10U || c=='.'; *pcnt += (c=getc(f))>=0) {
+		if (c == '.') {
+			if (lrp!=-1) break;
+			lrp = dc;
+		} else if (k < KMAX) {
+			dc++;
+			if (j) x[k] = x[k]*10 + c-'0';
+			else x[k] = c-'0';
+			if (++j==9) {
+				k++;
+				j=0;
+			}
+			gotdig=1;
+		} else {
+			dc++;
+			x[KMAX-1] |= c-'0';
+		}
+	}
+	if (lrp==-1) lrp=dc;
+
+	if (gotdig && (c|32)=='e') {
+		e10 = scanexp(f, pcnt);
+		if (e10 == LLONG_MIN) {
+			if (!pok) {
+				*pcnt = 0;
+				return 0;
+			}
+			e10 = 0;
+		}
+		lrp += e10;
+	} else if (c>=0) {
+		ungetc(c, f);
+		--*pcnt;
+	}
+	if (!gotdig) {
+		*pcnt = 0;
+		return 0;
+	}
+
+	if (!x[0])
+		return sign * 0.0;
+	if (lrp==dc && (!k || (k==1 && !j)) && (bits>30 || x[0]>>bits==0))
+		return sign * (long double)x[0];
+	if (lrp > -emin/2)
+		return sign * LDBL_MAX * LDBL_MAX;
+	if (lrp < emin-2*LDBL_MANT_DIG)
+		return sign * LDBL_MIN * LDBL_MIN;
+
+	if (k<KMAX && j) {
+		for (; j<9; j++) x[k]*=10;
+		k++;
+		j=0;
+	}
+
+	a = 0;
+	z = k;
+	e2 = 0;
+	rp = lrp;
+
+	while (rp < 18+9*LD_B1B_DIG) {
+		uint32_t carry = 0;
+		e2 -= 29;
+		for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
+			uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
+			if (tmp > 1000000000) {
+				carry = tmp / 1000000000;
+				x[k] = tmp % 1000000000;
+			} else {
+				carry = 0;
+				x[k] = tmp;
+			}
+			if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
+			if (k==a) break;
+		}
+		if (carry) {
+			rp += 9;
+			if (a == z) {
+				z = (z-1 & MASK);
+				x[z-1 & MASK] |= x[z];
+			}
+			a = (a-1 & MASK);
+			x[a] = carry;
+		}
+	}
+
+	if (rp % 9) {
+		static const int p10s[] = {
+			100000000, 10000000, 1000000, 100000,
+			10000, 1000, 100, 10
+		};
+		int rpm9 = rp % 9;
+		int p10 = p10s[rpm9-1];
+		uint32_t carry = 0;
+		for (k=a; k!=z; k=(k+1 & MASK)) {
+			uint32_t tmp = x[k] % p10;
+			x[k] = x[k]/p10 + carry;
+			carry = 1000000000/p10 * tmp;
+			if (k==a && !x[k]) {
+				a = (a+1 & MASK);
+				rp -= 9;
+			}
+		}
+		if (carry) {
+			if ((z+1 & MASK) != a) {
+				x[z] = carry;
+				z = (z+1 & MASK);
+			} else x[z-1 & MASK] |= 1;
+		}
+		rp += 9-rpm9;
+	}
+
+	for (;;) {
+		uint32_t carry = 0;
+		int sh = 1;
+		for (i=0; i<LD_B1B_DIG; i++) {
+			k = (a+i & MASK);
+			if (k == z || x[k] < th[i]) {
+				i=LD_B1B_DIG;
+				break;
+			}
+			if (x[a+i & MASK] > th[i]) break;
+		}
+		if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
+		/* FIXME: find a way to compute optimal sh */
+		if (rp > 9+9*LD_B1B_DIG) sh = 9;
+		e2 += sh;
+		for (k=a; k!=z; k=(k+1 & MASK)) {
+			uint32_t tmp = x[k] & (1<<sh)-1;
+			x[k] = (x[k]>>sh) + carry;
+			carry = (1000000000>>sh) * tmp;
+			if (k==a && !x[k]) {
+				a = (a+1 & MASK);
+				rp -= 9;
+			}
+		}
+		if (carry) {
+			if ((z+1 & MASK) != a) {
+				x[z] = carry;
+				z = (z+1 & MASK);
+			} else x[z-1 & MASK] |= 1;
+		}
+	}
+
+	for (y=i=0; i<LD_B1B_DIG && (a+i & MASK)!=z; i++)
+		y = 1000000000.0L * y + x[a+i & MASK];
+
+	y *= sign;
+
+	if (bits > LDBL_MANT_DIG+e2-emin) {
+		bits = LDBL_MANT_DIG+e2-emin;
+		if (bits<0) bits=0;
+	}
+
+	if (bits < LDBL_MANT_DIG) {
+		bias = copysignl(scalbn(1, 2*LDBL_MANT_DIG-bits-1), y);
+		frac = fmodl(y, scalbn(1, LDBL_MANT_DIG-bits));
+		y -= frac;
+		y += bias;
+	}
+
+	if ((a+i & MASK) != z) {
+		uint32_t t = x[a+i & MASK];
+		if (t < 500000000 && (t || (a+i+1 & MASK) != z))
+			frac += 0.25*sign;
+		else if (t > 500000000)
+			frac += 0.75*sign;
+		else if (t == 500000000) {
+			if ((a+i+1 & MASK) == z)
+				frac += 0.5*sign;
+			else
+				frac += 0.75*sign;
+		}
+		if (LDBL_MANT_DIG-bits >= 2 && !fmodl(frac, 1))
+			frac++;
+	}
+
+	y += frac;
+	y -= bias;
+
+	y = scalbnl(y, e2);
+
+	return y;
+}
+
+static long double hexfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
+{
+	uint32_t x = 0;
+	long double y = 0;
+	long double scale = 1;
+	long double bias = 0;
+	int gottail = 0, gotrad = 0, gotdig = 0;
+	long long rp = 0;
+	long long dc = 0;
+	long long e2 = 0;
+	int d;
+
+	if (c<0) *pcnt += (c=getc(f))>=0;
+
+	/* Skip leading zeros */
+	for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig = 1;
+
+	if (c=='.') {
+		gotrad = 1;
+		*pcnt += (c=getc(f))>=0;
+		/* Count zeros after the radix point before significand */
+		for (rp=0; c=='0'; *pcnt += (c=getc(f))>=0, rp--) gotdig = 1;
+	}
+
+	for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; *pcnt += (c=getc(f))>=0) {
+		if (c=='.') {
+			if (gotrad) break;
+			rp = dc;
+			gotrad = 1;
+		} else {
+			gotdig = 1;
+			if (c > '9') d = (c|32)+10-'a';
+			else d = c-'0';
+			if (dc<8) {
+				x = x*16 + d;
+			} else if (dc < LDBL_MANT_DIG/4+1) {
+				y += d*(scale/=16);
+			} else if (d && !gottail) {
+				y += 0.5*scale;
+				gottail = 1;
+			}
+			dc++;
+		}
+	}
+	if (!gotdig) {
+		if (c>=0) {
+			ungetc(c, f);
+			--*pcnt;
+		}
+		if (pok) *pcnt -= 1+gotrad; /* uncount the rp, x of 0x */
+		else *pcnt = 0;
+		return 0;
+	}
+	if (!gotrad) rp = dc;
+	while (dc<8) x *= 16, dc++;
+	if ((c|32)=='p') {
+		e2 = scanexp(f, pcnt);
+		if (e2 == LLONG_MIN) {
+			if (!pok) {
+				*pcnt = 0;
+				return 0;
+			}
+			e2 = 0;
+		}
+	}
+	e2 += 4*rp - 32;
+
+	if (!x) return sign * 0.0;
+	if (e2 > -emin) return sign * LDBL_MAX * LDBL_MAX;
+	if (e2 < emin-2*LDBL_MANT_DIG) return sign * LDBL_MIN * LDBL_MIN;
+
+	while (x < 0x80000000) {
+		if (y>=0.5) {
+			x += x + 1;
+			y += y - 1;
+		} else {
+			x += x;
+			y += y;
+		}
+		e2--;
+	}
+
+	if (bits > 32+e2-emin) {
+		bits = 32+e2-emin;
+		if (bits<0) bits=0;
+	}
+
+	if (bits < LDBL_MANT_DIG)
+		bias = copysignl(scalbn(1, 32+LDBL_MANT_DIG-bits-1), sign);
+
+	if (bits<32 && y && !(x&1)) x++, y=0;
+
+	y = bias + sign*(long double)x + sign*y;
+	y -= bias;
+
+	return scalbnl(y, e2);
+}
+
+long double __floatscan(FILE *f, int c, int prec, int pok, off_t *pcnt)
+{
+	int sign = 1;
+	int i;
+	int bits;
+	int emin;
+
+	*pcnt = 0;
+
+	switch (prec) {
+	case 0:
+		bits = 24;
+		emin = -149;
+		break;
+	case 1:
+		bits = 53;
+		emin = -1074;
+		break;
+	case 2:
+		bits = LDBL_MANT_DIG;
+		emin = -16445;
+		break;
+	default:
+		return 0;
+	}
+
+	if (c<0) *pcnt += (c=getc(f))>=0;
+
+	if (c=='+' || c=='-') {
+		sign -= 2*(c=='-');
+		*pcnt += (c=getc(f))>=0;
+	}
+
+	for (i=0; i<8 && (c|32)=="infinity"[i]; i++)
+		if (i<7) c = getc(f);
+	if (i==3 || i==8 || (i>3 && pok)) {
+		if (i==3 && c>=0) ungetc(c, f);
+		if (i==8) *pcnt += 7;
+		else *pcnt += 2;
+		return sign * INFINITY;
+	}
+	if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++)
+		if (i<3) c = getc(f);
+	if (i==3) {
+		*pcnt += 2;
+		return sign>0 ? NAN : -NAN;
+	}
+
+	if (i) {
+		if (c>=0) ungetc(c, f);
+		*pcnt = 0;
+		return 0;
+	}
+
+	if (c=='0') {
+		*pcnt += (c=getc(f))>=0;
+		if ((c|32) == 'x')
+			return hexfloat(f, -1, bits, emin, sign, pok, pcnt);
+		if (c>=0) {
+			ungetc(c, f);
+			--*pcnt;
+		}
+		c = '0';
+	}
+
+	return decfloat(f, c, bits, emin, sign, pok, pcnt);
+}