about summary refs log tree commit diff
path: root/src/stdio/__scanf.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/stdio/__scanf.c')
-rw-r--r--src/stdio/__scanf.c487
1 files changed, 487 insertions, 0 deletions
diff --git a/src/stdio/__scanf.c b/src/stdio/__scanf.c
new file mode 100644
index 00000000..185615d3
--- /dev/null
+++ b/src/stdio/__scanf.c
@@ -0,0 +1,487 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+#include <math.h>
+#include <float.h>
+
+#include "__scanf.h"
+
+static int read(rctx_t *r)
+{
+	if (--r->w < 0) return r->w = -1;
+	if (r->u) r->u = 0;
+	else r->read(r);
+	return r->c;
+}
+
+static void unread(rctx_t *r)
+{
+	//if (r->u || r->w < 0) return;
+	if (r->w < 0) return;
+	r->w++;
+	r->u = 1;
+}
+
+#define SIZE_hh -2
+#define SIZE_h  -1
+#define SIZE_def 0
+#define SIZE_l   1
+#define SIZE_ll  2
+#define SIZE_L   3
+
+static void store_int(void *dest, int size, int neg, unsigned long long i)
+{
+	if (!dest) return;
+	if (neg) i = -i;
+	switch (size) {
+	case SIZE_hh:
+		*(char *)dest = i;
+		break;
+	case SIZE_h:
+		*(short *)dest = i;
+		break;
+	case SIZE_def:
+		*(int *)dest = i;
+		break;
+	case SIZE_l:
+		*(long *)dest = i;
+		break;
+	case SIZE_ll:
+		*(long long *)dest = i;
+		break;
+	}
+}
+
+static void *arg_n(va_list ap, unsigned int n)
+{
+	void *p;
+	unsigned int i;
+	va_list ap2;
+	va_copy(ap2, ap);
+	for (i=n; i>1; i--) va_arg(ap2, void *);
+	p = va_arg(ap2, void *);
+	va_end(ap2);
+	return p;
+}
+
+int __scanf(rctx_t *r, const wchar_t *fmt, va_list ap)
+{
+	int mode=0;
+	int width;
+	int size;
+	const wchar_t *p, *z;
+	int c, l, t, m;
+	long long dummy;
+	char *s;
+	wchar_t *wcs;
+	mbstate_t st;
+	int wide = r->wide;
+	void *dest=NULL;
+	int invert;
+	unsigned long long i=0;
+	int neg=0;
+	int matches=0;
+	long double f;
+	int (*is_space)(int) = r->is_space;
+
+	for (p=fmt; *p; ) {
+		if (is_space(*p)) {
+			do p++; while (is_space(*p));
+			do r->w=1; while (is_space(read(r)));
+			unread(r);
+			continue;
+		} else if (*p != '%' || p[1] == '%') {
+			if (*p == '%') p++;
+			r->w = 1;
+			if (*p++ != read(r))
+				goto match_fail;
+			continue;
+		}
+		p++;
+		if (mode != 1) {
+			for (z=p; isdigit(*z); z++);
+			if (*z != '$' && *z != '*') {
+				if (mode == 0) mode = 1;
+				else goto fmt_fail;
+			} else if (*z != '*') {
+				int pos = 0;
+				mode = 2;
+				for (; p<z; p++) {
+					pos = 10*pos + *p - '0';
+				}
+				p++;
+				if (!pos) goto fmt_fail;
+				dest = arg_n(ap, pos);
+			}
+		}
+		if (*p == '*') {
+			dest = NULL;
+			p++;
+		} else if (mode == 1) {
+			dest = va_arg(ap, void *);
+		}
+		
+		if (!*p) goto fmt_fail;
+
+		width = 0;
+		for (; isdigit(*p); p++) {
+			width = 10*width + *p - '0';
+		}
+
+		size = 0;
+		switch (*p++) {
+		case 0:
+			goto fmt_fail;
+		case 'h':
+			if (*p == 'h') p++, size = SIZE_hh;
+			else size = SIZE_h;
+			break;
+		case 'l':
+			if (*p == 'l') p++, size = SIZE_ll;
+			else size = SIZE_l;
+			break;
+		case 'j':
+			size = SIZE_ll;
+			break;
+		case 'z':
+		case 't':
+			size = SIZE_l;
+			break;
+		case 'L':
+			size = SIZE_L;
+			break;
+		case 'd': case 'i': case 'o': case 'u': case 'x':
+		case 'a': case 'e': case 'f': case 'g':
+		case 'A': case 'E': case 'F': case 'G': case 'X':
+		case 's': case 'c': case '[':
+		case 'S': case 'C':
+		case 'p': case 'n':
+			p--;
+			break;
+		default:
+			goto fmt_fail;
+		}
+
+		t = *p++;
+
+		switch (t) {
+		case 'C':
+		case 'c':
+			if (width < 1) width = 1;
+		case 's':
+			if (size == SIZE_l) t &= ~0x20;
+		case 'd': case 'i': case 'o': case 'u': case 'x':
+		case 'a': case 'e': case 'f': case 'g':
+		case 'A': case 'E': case 'F': case 'G': case 'X':
+		case '[': case 'S':
+		case 'p': case 'n':
+			if (width < 1) width = INT_MAX;
+			break;
+		default:
+			goto fmt_fail;
+		}
+
+		r->w = width;
+
+		if (t != 'n') {
+			if (read(r) < 0) goto input_fail;
+			unread(r);
+		}
+
+		switch (t) {
+		case 'n':
+			store_int(dest, size, 0, r->l - r->u);
+			/* do not increment match count, etc! */
+			continue;
+		case 'C':
+			wcs = dest ? dest : (void *)&dummy;
+			st = (mbstate_t){ 0 };
+			while ((c=read(r)) >= 0) {
+				if (wide) {
+					if (dest) *wcs++ = c;
+				} else {
+					char ch = c;
+					switch (mbrtowc(wcs, &ch, 1, &st)) {
+					case -1:
+						goto enc_fail;
+					case -2:
+						break;
+					default:
+						if (dest) wcs++;
+					}
+				}
+			}
+			if (r->w > 0) goto match_fail;
+			break;
+		case 'c':
+			s = dest ? dest : (void *)&dummy;
+			while ((c=read(r)) >= 0) {
+				if (wide) {
+					if ((l=wctomb(s, c)) < 0)
+						goto enc_fail;
+					if (dest) s += l;
+				} else {
+					if (dest) *s++ = c;
+				}
+			}
+			if (r->w > 0) goto match_fail;
+			break;
+		case '[':
+			wcs = dest ? dest : (void *)&dummy;
+			s = dest ? dest : (void *)&dummy;
+			if (!wide && size == SIZE_l) st = (mbstate_t){ 0 };
+
+			if (*p == '^') p++, invert = 1;
+			else invert = 0;
+
+			if (wide) {
+				for (m=0; (c=read(r)) >= 0; m=1) {
+					for (z=p; *z && *z != c && (*z != ']' || z==p); z++);
+					if (!*z) goto fmt_fail;
+					if (*z == c && (*z != ']' || z==p)) {
+						if (invert) break;
+					} else {
+						if (!invert) break;
+					}
+					if (size == SIZE_l) {
+						if (dest) *wcs++ = c;
+					} else {
+						if ((l=wctomb(s, c)) < 0)
+							goto enc_fail;
+						if (dest) s += l;
+					}
+				}
+				for (p++; *p && *p != ']'; p++);
+				p++;
+			} else {
+				unsigned char scanset[257];
+				memset(scanset, invert, sizeof scanset);
+				scanset[0] = 0;
+				for (z=p; *z && (*z != ']' || z==p); z++)
+					scanset[1+*z] = 1-invert;
+				if (!*z) goto fmt_fail;
+				p=z+1;
+				c=0;
+				for (m=0; scanset[(c=read(r))+1]; m=1) {
+					if (size == SIZE_l) {
+						char ch = c;
+						switch (mbrtowc(wcs, &ch, 1, &st)) {
+						case -1:
+							goto enc_fail;
+						case -2:
+							break;
+						default:
+							if (dest) wcs++;
+						}
+					} else {
+						if (dest) *s++ = c;
+					}
+				}
+			}
+			if (!m) goto match_fail;
+			if (dest) {
+				if (size == SIZE_l) *wcs++ = 0;
+				else *s++ = 0;
+			}
+			break;
+		default:
+			/* read unlimited number of spaces, then reset width */
+			do r->w = 1; while (is_space(c = read(r)));
+			if (c < 0) goto input_fail;
+			unread(r);
+			r->w = width;
+		}
+
+		switch (t) {
+		case 'p':
+		case 'X':
+			t = 'x';
+		case 'd':
+		case 'i':
+		case 'o':
+		case 'u':
+		case 'x':
+			i = m = neg = 0;
+			if ((c=read(r)) == '-') neg=1;
+			else if (c != '+') unread(r);
+			switch (t) {
+			case 'i':
+			case 'x':
+				if ((c=read(r)) != '0') {
+					if (t == 'i') t = 'd';
+					unread(r);
+					break;
+				}
+				if (((c=read(r))|0x20) != 'x') {
+					if (t == 'i') {
+						t = 'o';
+						/* lone 0 is valid octal */
+						if ((unsigned)(c-'0') >= 8) {
+							m = 1;
+							goto int_finish;
+						}
+					}
+					unread(r);
+					break;
+				}
+				t = 'x';
+			}
+		}
+		
+		switch (t) {
+		case 'd':
+		case 'u':
+			for (m=0; isdigit(c=read(r)); m=1)
+				i = 10*i + c-'0';
+			goto int_finish;
+		case 'o':
+			for (m=0; (unsigned)(c=read(r))-'0' < 8; m=1)
+				i = (i<<3) + c-'0';
+			goto int_finish;
+		case 'x':
+			for (m=0; ; m=1) {
+				if (isdigit(c=read(r))) {
+					i = (i<<4) + c-'0';
+				} else if ((unsigned)(c|0x20)-'a' < 6) {
+					i = (i<<4) + (c|0x20)-'a'+10;
+				} else break;
+			}
+		int_finish:
+			if (!m) goto match_fail;
+			store_int(dest, size, neg, i);
+			break;
+		case 'a':
+		case 'e':
+		case 'f':
+		case 'g':
+			f = 0.0;
+			neg = m = 0;
+			if ((c=read(r)) == '-') neg=1;
+			else if (c != '+') unread(r);
+			/* FIXME: check for INF/NAN strings here */
+			if (read(r)=='0' && (m=1, (read(r)|0x20) == 'x'))
+				goto hexfloat;
+			else unread(r);
+			for (; isdigit(c=read(r)); m=1)
+				f = 10.0 * f + (c-'0');
+			if (c=='.') {
+				double mag = 10.0;
+				for (; isdigit(c=read(r)); mag*=10.0)
+					f += (c-'0')/mag;
+			}
+			if ((c|0x20)=='e') {
+				int ex=0, en=0;
+				m = 0;
+				if ((c=read(r))=='-') en=1;
+				else if (c!='+') unread(r);
+				for (; isdigit(c=read(r)); m=1)
+					if (ex < LDBL_MAX_10_EXP)
+						ex = 10 * ex + (c-'0');
+				if (ex > LDBL_MAX_10_EXP)
+					f = en ? 0 : INFINITY;
+				else {
+					if (en) while (ex--) f/=10.0;
+					else while (ex--) f*=10.0;
+				}
+			}
+			goto writefloat;
+hexfloat:
+			m = 0;
+			for (; isxdigit(c=read(r)); m=1)
+				if (isdigit(c)) f = 16.0*f + (c-'0');
+				else f = 16.0*f + ((c|32)-'a'+10);
+			if (c=='.') {
+				double mag = 1/16.0;
+				for (; isxdigit(c=read(r)); mag*=1/16.0)
+					if (isdigit(c)) f += (c-'0')*mag;
+					else f += ((c|32)-'a'+10)*mag;
+			}
+			if ((c|0x20)=='p') {
+				int ex=0, en=0;
+				m = 0;
+				if ((c=read(r))=='-') en=1;
+				else if (c!='+') unread(r);
+				for (; isdigit(c=read(r)); m=1)
+					if (ex < LDBL_MAX_EXP)
+						ex = 10 * ex + (c-'0');
+				if (ex > LDBL_MAX_EXP)
+					f = en ? 0 : INFINITY;
+				else {
+					if (en) while (ex--) f*=0.5;
+					else while (ex--) f*=2.0;
+				}
+			}
+writefloat:
+			if (!m) goto match_fail;
+			if (neg) f *= -1.0;
+			if (dest) switch (size) {
+			case SIZE_def:
+				*(float *)dest = f;
+				break;
+			case SIZE_l:
+				*(double *)dest = f;
+				break;
+			case SIZE_L:
+				*(long double *)dest = f;
+				break;
+			}
+			break;
+		case 'S':
+			wcs = dest ? dest : (void *)&dummy;
+			st = (mbstate_t){ 0 };
+			while((c=read(r)) >= 0) {
+				if (wide) {
+					if (is_space(c)) break;
+					if (dest) *wcs++ = c;
+				} else {
+					char ch = c;
+					if (is_space(c)) break;
+					switch (mbrtowc(wcs, &ch, 1, &st)) {
+					case -1:
+						goto enc_fail;
+					case -2:
+						break;
+					default:
+						if (dest) wcs++;
+					}
+				}
+			}
+			if (dest) *wcs++ = 0;
+			break;
+		case 's':
+			s = dest ? dest : (void *)&dummy;
+			while((c=read(r)) >= 0) {
+				if (wide) {
+					if (is_space(c)) break;
+					if ((l=wctomb(s, c)) < 0)
+						goto enc_fail;
+					if (dest) s += l;
+				} else {
+					if (is_space(c)) break;
+					if (dest) *s++ = c;
+				}
+			}
+			if (dest) *s++ = 0;
+			break;
+		}
+
+		/* unread will do nothing if field width was exhausted */
+		unread(r);
+		if (dest) matches++;
+	}
+	return matches;
+enc_fail:
+	errno = EILSEQ;
+fmt_fail:
+input_fail:
+	if (!matches) matches--;
+match_fail:
+	unread(r);
+	return matches;
+}