new restartable integer parsing framework.

this fixes a number of bugs in integer parsing due to lazy haphazard wrapping, as well as some misinterpretations of the standard. the new parser is able to work character-at-a-time or on whole strings, making it easy to support the wide functions without unbounded space for conversion. it will also be possible to update scanf to use the new parser.
author: Rich Felker <dalias@aerifal.cx> 2011-07-14 00:51:45 -0400
committer: Rich Felker <dalias@aerifal.cx> 2011-07-14 00:51:45 -0400
commit: ecc9c5fcfa4831b290cc1a63c0346cbb0c1fcf42 (patch)
tree: 2fb20d623af9622cb8ac9f461e542ca23fc6d791 /src/internal/intparse.c
parent: 0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf (diff)
download: musl-ecc9c5fcfa4831b290cc1a63c0346cbb0c1fcf42.tar.gz
musl-ecc9c5fcfa4831b290cc1a63c0346cbb0c1fcf42.tar.xz
musl-ecc9c5fcfa4831b290cc1a63c0346cbb0c1fcf42.zip
1 files changed, 105 insertions, 0 deletions
diff --git a/src/internal/intparse.c b/src/internal/intparse.c
new file mode 100644
index 00000000..21b07b74
--- /dev/null
+++ b/src/internal/intparse.c
@@ -0,0 +1,105 @@
+#include <stdint.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "intparse.h"
+
+/* Lookup table for digit values. -1==255>=36 -> invalid */
+static const unsigned char digits[] = {
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
+-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
+-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+};
+
+#define SLIM (UINT_MAX/36-1)
+#define LLIM (UINTMAX_MAX/36-1)
+
+int __intparse(struct intparse *v, const void *buf, size_t n)
+{
+	const unsigned char *s = buf;
+	int d, b = v->base;
+
+	v->cnt += n;
+	for (; n; n--, s++) switch (v->state) {
+	case 0:
+		v->state++;
+		if (*s=='+' || *s=='-') {
+			v->neg = *s=='-';
+			continue;
+		}
+	case 1:
+		v->state++;
+		if (*s=='0' && (!b || b==16)) continue;
+		if (!b) v->base = b = 10;
+		v->state++;
+		goto firstdigit;
+	case 2:
+		v->state++;
+		if ((!b || b==16) && (*s|32) == 'x') {
+			v->base = b = 16;
+			continue;
+		}
+		if (!b) v->base = b = 8;
+		goto seconddigit;
+	case 3:
+	firstdigit:
+		if (digits[*s] >= b) {
+			v->err = EINVAL;
+			return 0;
+		}
+	seconddigit:
+		v->state++;
+	case 4:
+		if (b==10) {
+			for (; n && *s-'0'<10U && v->small<=SLIM; n--, s++)
+				v->small = v->small * 10 + (*s-'0');
+		} else if ((b&-b) == b) {
+			int bs = "\0\1\2\4\7\3\6\5"[(0x17*b)>>5&7];
+			for (; n && (d=digits[*s])<b && v->small<=SLIM; n--, s++)
+				v->small = (v->small<<bs) + d;
+		} else {
+			for (; n && (d=digits[*s])<b && v->small<=SLIM; n--, s++)
+				v->small = v->small * b + d;
+		}
+		if (!n) return 1;
+		v->state++;
+		v->val = v->small;
+	case 5:
+		for (; n && (d=digits[*s])<b && v->val<=LLIM; n--, s++)
+			v->val = v->val * b + d;
+		if (!n) return 1;
+		if (d >= b) goto finished;
+		if (v->val < (UINTMAX_MAX-d)/b)
+			v->val = v->val * b + d;
+		else
+			v->err = ERANGE;
+		v->state++;
+		n--; s++;
+	case 6:
+		if (n && digits[*s]<b) {
+			v->err = ERANGE;
+			v->val = UINTMAX_MAX;
+
+			n--; s++;
+		}
+		for (; n && digits[*s]<b; n--, s++);
+		if (!n) return 1;
+	}
+	return 1;
+finished:
+	v->cnt -= n;
+	return 0;
+}
author	Rich Felker <dalias@aerifal.cx>	2011-07-14 00:51:45 -0400
committer	Rich Felker <dalias@aerifal.cx>	2011-07-14 00:51:45 -0400
commit	ecc9c5fcfa4831b290cc1a63c0346cbb0c1fcf42 (patch)
tree	2fb20d623af9622cb8ac9f461e542ca23fc6d791 /src/internal/intparse.c
parent	0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf (diff)
download	musl-ecc9c5fcfa4831b290cc1a63c0346cbb0c1fcf42.tar.gz musl-ecc9c5fcfa4831b290cc1a63c0346cbb0c1fcf42.tar.xz musl-ecc9c5fcfa4831b290cc1a63c0346cbb0c1fcf42.zip