about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2012-04-16 16:03:45 -0400
committerRich Felker <dalias@aerifal.cx>2012-04-16 16:03:45 -0400
commit18efeb320b763e541a7dbf61a7da1cbe13ab2be9 (patch)
treef8eb42a87b9c07ad8d9de0380356af3541b425aa
parentcc762434d91a2f441a1d2f44962ab1d4854b607b (diff)
downloadmusl-18efeb320b763e541a7dbf61a7da1cbe13ab2be9.tar.gz
musl-18efeb320b763e541a7dbf61a7da1cbe13ab2be9.tar.xz
musl-18efeb320b763e541a7dbf61a7da1cbe13ab2be9.zip
new scanf implementation and corresponding integer parser/converter
advantages over the old code:
- correct results for floating point (old code was bogus)
- wide/regular scanf separated so scanf does not pull in wide code
- well-defined behavior on integers that overflow dest type
- support for %[a-b] ranges with %[ (impl-defined by widely used)
- no intermediate conversion of fmt string to wide string
- cleaner, easier to share code with strto* functions
- better standards conformance for corner cases

the old code remains in the source tree, as the wide versions of the
scanf-family functions are still using it. it will be removed when no
longer needed.
-rw-r--r--src/internal/intscan.c97
-rw-r--r--src/internal/intscan.h8
-rw-r--r--src/internal/stdio_impl.h2
-rw-r--r--src/stdio/__string_read.c13
-rw-r--r--src/stdio/vfscanf.c338
-rw-r--r--src/stdio/vsscanf.c22
6 files changed, 450 insertions, 30 deletions
diff --git a/src/internal/intscan.c b/src/internal/intscan.c
new file mode 100644
index 00000000..a00f2ccc
--- /dev/null
+++ b/src/internal/intscan.c
@@ -0,0 +1,97 @@
+#include <limits.h>
+#include <errno.h>
+#include "shgetc.h"
+
+/* Lookup table for digit values. -1==255>=36 -> invalid */
+static const unsigned char table[] = { -1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
+-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
+-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+};
+
+unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long lim)
+{
+	const unsigned char *val = table+1;
+	int c, neg=0;
+	unsigned x;
+	unsigned long long y;
+	if (base > 36) {
+		errno = EINVAL;
+		return 0;
+	}
+	c = shgetc(f);
+	if (c=='+' || c=='-') {
+		neg = -(c=='-');
+		c = shgetc(f);
+	}
+	if ((base == 0 || base == 16) && c=='0') {
+		c = shgetc(f);
+		if ((c|32)=='x') {
+			c = shgetc(f);
+			if (val[c]>=16) {
+				shunget(f);
+				if (pok) shunget(f);
+				else shlim(f, 0);
+				return 0;
+			}
+			base = 16;
+		} else if (base == 0) {
+			base = 8;
+		}
+	} else {
+		if (base == 0) base = 10;
+		if (val[c] >= base) {
+			shlim(f, 0);
+			errno = EINVAL;
+			return 0;
+		}
+	}
+	if (base == 10) {
+		for (x=0; c-'0'<10U && x<=UINT_MAX/10-1; c=shgetc(f))
+			x = x*10 + (c-'0');
+		for (y=x; c-'0'<10U && y<=ULLONG_MAX/10 && 10*y<=ULLONG_MAX-(c-'0'); c=shgetc(f))
+			y = y*10 + (c-'0');
+		if (c-'0'>=10U) goto done;
+	} else if (!(base & base-1)) {
+		int bs = "\0\1\2\4\7\3\6\5"[(0x17*base)>>5&7];
+		for (x=0; val[c]<base && x<=UINT_MAX/32; c=shgetc(f))
+			x = x<<bs | val[c];
+		for (y=x; val[c]<base && y<=ULLONG_MAX>>bs; c=shgetc(f))
+			y = y<<bs | val[c];
+	} else {
+		for (x=0; val[c]<base && x<=UINT_MAX/36-1; c=shgetc(f))
+			x = x*base + val[c];
+		for (y=x; val[c]<base && y<=ULLONG_MAX/base && base*y<=ULLONG_MAX-val[c]; c=shgetc(f))
+			y = y*base + val[c];
+	}
+	if (val[c]<base) {
+		for (; val[c]<base; c=shgetc(f));
+		errno = ERANGE;
+		y = lim;
+	}
+done:
+	shunget(f);
+	if (y>=lim) {
+		if (!(lim&1) && !neg) {
+			errno = ERANGE;
+			return lim-1;
+		} else if (y>lim) {
+			errno = ERANGE;
+			return lim;
+		}
+	}
+	return (y^neg)-neg;
+}
diff --git a/src/internal/intscan.h b/src/internal/intscan.h
new file mode 100644
index 00000000..994c5e7d
--- /dev/null
+++ b/src/internal/intscan.h
@@ -0,0 +1,8 @@
+#ifndef INTSCAN_H
+#define INTSCAN_H
+
+#include <stdio.h>
+
+unsigned long long __intscan(FILE *, unsigned, int, unsigned long long);
+
+#endif
diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h
index 5ec296f3..af7aacc8 100644
--- a/src/internal/stdio_impl.h
+++ b/src/internal/stdio_impl.h
@@ -69,6 +69,8 @@ size_t __stdout_write(FILE *, const unsigned char *, size_t);
 off_t __stdio_seek(FILE *, off_t, int);
 int __stdio_close(FILE *);
 
+size_t __string_read(FILE *, unsigned char *, size_t);
+
 int __toread(FILE *);
 int __towrite(FILE *);
 
diff --git a/src/stdio/__string_read.c b/src/stdio/__string_read.c
new file mode 100644
index 00000000..5c3728d7
--- /dev/null
+++ b/src/stdio/__string_read.c
@@ -0,0 +1,13 @@
+#include "stdio_impl.h"
+
+size_t __string_read(FILE *f, unsigned char *buf, size_t len)
+{
+	char *src = f->cookie;
+	size_t k = strnlen(src, len+256);
+	if (k < len) len = k;
+	memcpy(buf, src, len);
+	f->rpos = (void *)(src+len);
+	f->rend = (void *)(src+k);
+	f->cookie = src+k;
+	return len;
+}
diff --git a/src/stdio/vfscanf.c b/src/stdio/vfscanf.c
index 414c2a3d..5c1e49b1 100644
--- a/src/stdio/vfscanf.c
+++ b/src/stdio/vfscanf.c
@@ -1,36 +1,342 @@
 #include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <limits.h>
 #include <string.h>
 #include <errno.h>
-#include <ctype.h>
+#include <math.h>
+#include <float.h>
 
 #include "stdio_impl.h"
-#include "__scanf.h"
+#include "shgetc.h"
+#include "intscan.h"
+#include "floatscan.h"
 
-static void f_read(rctx_t *r)
+#define SIZE_hh -2
+#define SIZE_h  -1
+#define SIZE_def 0
+#define SIZE_l   1
+#define SIZE_L   2
+#define SIZE_ll  3
+
+static void store_int(void *dest, int size, unsigned long long i)
 {
-	FILE *f = r->opaque;
-	if ((r->c = getc_unlocked(f)) >= 0) r->l++;
+	if (!dest) return;
+	switch (size) {
+	case SIZE_hh:
+		*(char *)dest = i;
+		break;
+	case SIZE_h:
+		*(short *)dest = i;
+		break;
+	case SIZE_def:
+		*(int *)dest = i;
+		break;
+	case SIZE_l:
+		*(long *)dest = i;
+		break;
+	case SIZE_ll:
+		*(long long *)dest = i;
+		break;
+	}
 }
 
-int vfscanf(FILE *f, const char *fmt, va_list ap)
+static void *arg_n(va_list ap, unsigned int n)
 {
-	size_t l = strlen(fmt), i, result;
-	rctx_t r = { f_read, (void *)f, 0, isspace };
-	wchar_t fmt2[l+1];
+	void *p;
+	unsigned int i;
+	va_list ap2;
+	va_copy(ap2, ap);
+	for (i=n; i>1; i--) va_arg(ap2, void *);
+	p = va_arg(ap2, void *);
+	va_end(ap2);
+	return p;
+}
 
-	if (l > 0x100000) {
-		errno = ENOMEM;
+static int readwc(int c, wchar_t **wcs, mbstate_t *st)
+{
+	char ch = c;
+	wchar_t wc;
+	switch (mbrtowc(&wc, &ch, 1, st)) {
+	case -1:
 		return -1;
+	case -2:
+		break;
+	default:
+		if (*wcs) *(*wcs)++ = wc;
 	}
-	for (i=0; i<=l; i++) fmt2[i] = (unsigned char)fmt[i];
+	return 0;
+}
+
+int vfscanf(FILE *f, const char *fmt, va_list ap)
+{
+	int width;
+	int size;
+	int alloc;
+	int base;
+	const unsigned char *p;
+	int c, t;
+	char *s;
+	wchar_t *wcs;
+	mbstate_t st;
+	void *dest=NULL;
+	int invert;
+	int matches=0;
+	unsigned long long x;
+	long double y;
+	off_t pos = 0;
 
 	FLOCK(f);
 
-	result = __scanf(&r, fmt2, ap);
+	for (p=(const unsigned char *)fmt; *p; p++) {
+
+		if (isspace(*p)) {
+			while (isspace(p[1])) p++;
+			shlim(f, 0);
+			while (isspace(shgetc(f)));
+			shunget(f);
+			pos += shcnt(f);
+			continue;
+		}
+		if (*p != '%' || p[1] == '%') {
+			p += *p=='%';
+			c = shgetc(f);
+			if (c!=*p) {
+				shunget(f);
+				if (c<0) goto input_fail;
+				goto match_fail;
+			}
+			pos++;
+			continue;
+		}
+
+		p++;
+		if (*p=='*') {
+			dest = 0; p++;
+		} else if (isdigit(*p) && p[1]=='$') {
+			dest = arg_n(ap, *p-'0'); p+=2;
+		} else {
+			dest = va_arg(ap, void *);
+		}
+
+		for (width=0; isdigit(*p); p++) {
+			width = 10*width + *p - '0';
+		}
 
-	if (r.u && r.c >= 0)
-		ungetc(r.c, f);
+		if (*p=='m') {
+			alloc = 1;
+			p++;
+		} else {
+			alloc = 0;
+		}
 
+		size = SIZE_def;
+		switch (*p++) {
+		case 'h':
+			if (*p == 'h') p++, size = SIZE_hh;
+			else size = SIZE_h;
+			break;
+		case 'l':
+			if (*p == 'l') p++, size = SIZE_ll;
+			else size = SIZE_l;
+			break;
+		case 'j':
+			size = SIZE_ll;
+			break;
+		case 'z':
+		case 't':
+			size = SIZE_l;
+			break;
+		case 'L':
+			size = SIZE_L;
+			break;
+		case 'd': case 'i': case 'o': case 'u': case 'x':
+		case 'a': case 'e': case 'f': case 'g':
+		case 'A': case 'E': case 'F': case 'G': case 'X':
+		case 's': case 'c': case '[':
+		case 'S': case 'C':
+		case 'p': case 'n':
+			p--;
+			break;
+		default:
+			goto fmt_fail;
+		}
+
+		t = *p;
+
+		switch (t) {
+		case 'C':
+		case 'c':
+			if (width < 1) width = 1;
+		case 's':
+			if (size == SIZE_l) t &= ~0x20;
+		case 'd': case 'i': case 'o': case 'u': case 'x':
+		case 'a': case 'e': case 'f': case 'g':
+		case 'A': case 'E': case 'F': case 'G': case 'X':
+		case '[': case 'S':
+		case 'p': case 'n':
+			if (width < 1) width = 0;
+			break;
+		default:
+			goto fmt_fail;
+		}
+
+		shlim(f, width);
+
+		if (t != 'n') {
+			if (shgetc(f) < 0) goto input_fail;
+			shunget(f);
+		}
+
+		switch (t) {
+		case 'n':
+			store_int(dest, size, pos);
+			/* do not increment match count, etc! */
+			continue;
+		case 'C':
+			wcs = dest;
+			st = (mbstate_t){ 0 };
+			while ((c=shgetc(f)) >= 0) {
+				if (readwc(c, &wcs, &st) < 0)
+					goto input_fail;
+			}
+			if (!mbsinit(&st)) goto input_fail;
+			if (shcnt(f) != width) goto match_fail;
+			break;
+		case 'c':
+			if (dest) {
+				s = dest;
+				while ((c=shgetc(f)) >= 0) *s++ = c;
+			} else {
+				while (shgetc(f)>=0);
+			}
+			if (shcnt(f) < width) goto match_fail;
+			break;
+		case '[':
+			s = dest;
+			wcs = dest;
+
+			if (*++p == '^') p++, invert = 1;
+			else invert = 0;
+
+			unsigned char scanset[257];
+			memset(scanset, invert, sizeof scanset);
+
+			scanset[0] = 0;
+			if (*p == '-') p++, scanset[1+'-'] = 1-invert;
+			if (*p == ']') p++, scanset[1+']'] = 1-invert;
+			for (; *p && *p != ']'; p++) {
+				if (*p=='-' && p[1] != ']')
+					for (c=p++[-1]; c<*p; c++)
+						scanset[1+c] = 1-invert;
+				scanset[1+*p] = 1-invert;
+			}
+			if (!*p) goto fmt_fail;
+
+			if (size == SIZE_l) {
+				st = (mbstate_t){0};
+				while (scanset[(c=shgetc(f))+1]) {
+					if (readwc(c, &wcs, &st) < 0)
+						goto input_fail;
+				}
+				if (!mbsinit(&st)) goto input_fail;
+				s = 0;
+			} else if (s) {
+				while (scanset[(c=shgetc(f))+1])
+					*s++ = c;
+				wcs = 0;
+			} else {
+				while (scanset[(c=shgetc(f))+1]);
+			}
+			shunget(f);
+			if (!shcnt(f)) goto match_fail;
+			if (s) *s = 0;
+			if (wcs) *wcs = 0;
+			break;
+		default:
+			shlim(f, 0);
+			while (isspace(shgetc(f)));
+			shunget(f);
+			pos += shcnt(f);
+			shlim(f, width);
+			if (shgetc(f) < 0) goto input_fail;
+			shunget(f);
+		}
+
+		switch (t) {
+		case 'p':
+		case 'X':
+		case 'x':
+			base = 16;
+			goto int_common;
+		case 'o':
+			base = 8;
+			goto int_common;
+		case 'd':
+		case 'u':
+			base = 10;
+			goto int_common;
+		case 'i':
+			base = 0;
+		int_common:
+			x = __intscan(f, base, 0, ULLONG_MAX);
+			if (!shcnt(f)) goto match_fail;
+			if (t=='p') *(void **)dest = (void *)(uintptr_t)x;
+			else store_int(dest, size, x);
+			break;
+		case 'a': case 'A':
+		case 'e': case 'E':
+		case 'f': case 'F':
+		case 'g': case 'G':
+			y = __floatscan(f, -1, size, 0);
+			if (!shcnt(f)) goto match_fail;
+			if (dest) switch (size) {
+			case SIZE_def:
+				*(float *)dest = y;
+				break;
+			case SIZE_l:
+				*(double *)dest = y;
+				break;
+			case SIZE_L:
+				*(long double *)dest = y;
+				break;
+			}
+			break;
+		case 'S':
+			wcs = dest;
+			st = (mbstate_t){ 0 };
+			while (!isspace(c=shgetc(f)) && c!=EOF) {
+				if (readwc(c, &wcs, &st) < 0)
+					goto input_fail;
+			}
+			if (!mbsinit(&st)) goto input_fail;
+			if (dest) *wcs++ = 0;
+			break;
+		case 's':
+			if (dest) {
+				s = dest;
+				while (!isspace(c=shgetc(f)) && c!=EOF)
+					*s++ = c;
+				*s = 0;
+			} else {
+				while (!isspace(c=shgetc(f)) && c!=EOF);
+			}
+			shunget(f);
+			break;
+		}
+
+		pos += shcnt(f);
+		if (dest) matches++;
+	}
+	if (0) {
+fmt_fail:
+input_fail:
+		if (!matches) matches--;
+	}
+match_fail:
 	FUNLOCK(f);
-	return result;
+	return matches;
 }
diff --git a/src/stdio/vsscanf.c b/src/stdio/vsscanf.c
index fd48f709..fbc15e69 100644
--- a/src/stdio/vsscanf.c
+++ b/src/stdio/vsscanf.c
@@ -1,21 +1,15 @@
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
+#include "stdio_impl.h"
 
-#include "__scanf.h"
-
-static void s_read(rctx_t *r)
+static size_t do_read(FILE *f, unsigned char *buf, size_t len)
 {
-	unsigned char *s = r->opaque;
-	if (!s[r->l]) r->c = -1;
-	else r->c = s[r->l++];
+	return __string_read(f, buf, len);
 }
 
 int vsscanf(const char *s, const char *fmt, va_list ap)
 {
-	size_t l = strlen(fmt), i;
-	wchar_t fmt2[l+1];
-	rctx_t r = { s_read, (void *)s, 0, isspace };
-	for (i=0; i<=l; i++) fmt2[i] = (unsigned char)fmt[i];
-	return __scanf(&r, fmt2, ap);
+	FILE f = {
+		.buf = (void *)s, .cookie = (void *)s,
+		.read = do_read, .lock = -1
+	};
+	return vfscanf(&f, fmt, ap);
 }