about summary refs log tree commit diff
diff options
context:
space:
mode:
authorChristian Neukirchen <chneukirchen@gmail.com>2016-07-11 23:40:00 +0200
committerChristian Neukirchen <chneukirchen@gmail.com>2016-07-11 23:40:00 +0200
commit3bb545296414c7bc4795a206ab3eb7d06e736047 (patch)
treec45af6ab0325433b89519eaa95f33e00b89b822c
parentd548547d99c5cd72fa205ac2ba6347e672185db7 (diff)
downloadmblaze-3bb545296414c7bc4795a206ab3eb7d06e736047.tar.gz
mblaze-3bb545296414c7bc4795a206ab3eb7d06e736047.tar.xz
mblaze-3bb545296414c7bc4795a206ab3eb7d06e736047.zip
add rfc2047 decoder
-rw-r--r--Makefile2
-rw-r--r--blaze822.h5
-rw-r--r--rfc2047.c216
-rw-r--r--scan.c14
4 files changed, 226 insertions, 11 deletions
diff --git a/Makefile b/Makefile
index b80b616..db0c9cb 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ ALL = scan thread hdr show
 
 all: $(ALL)
 
-scan: blaze822.o scan.o fmt_rfc2047.o
+scan: blaze822.o scan.o rfc2047.o
 thread: blaze822.o thread.o
 hdr: blaze822.o hdr.o
 show: blaze822.o show.o
diff --git a/blaze822.h b/blaze822.h
index 5c77de0..7785b75 100644
--- a/blaze822.h
+++ b/blaze822.h
@@ -12,4 +12,7 @@ int blaze822_loop(int, char **, void (*)(char *));
 
 time_t blaze822_date(char *);
 char *blaze822_addr(char *, char **, char **);
-int decode_rfc2047 (char *, char *, size_t);
+
+
+int blaze822_decode_rfc2047(char *, char *, size_t, char *);
+
diff --git a/rfc2047.c b/rfc2047.c
new file mode 100644
index 0000000..40f750e
--- /dev/null
+++ b/rfc2047.c
@@ -0,0 +1,216 @@
+#include <stdlib.h>
+#include <errno.h>
+#include <iconv.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+#define iswsp(c)  (((c) == ' ' || (c) == '\t'))
+
+// XXX error detection on decode
+// XXX keep trying bytewise on invalid iconv
+
+int
+decode_qp(char *start, char *stop, char **deco, size_t *decleno)
+{
+	static signed char hex[] = {
+		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+		 0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
+		-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+		-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
+	};
+
+	char *buf = malloc(4 * (stop - start));
+	if (!buf)
+		return 0;
+
+	*deco = buf;
+
+	char *s = start;
+	size_t declen;
+
+	while (s < stop) {
+		if (*s == '=' && s[1] == '\n') {
+			s += 2;
+		} else if (*s == '=' && s+2 < stop) {
+			*buf++ = (hex[s[1]] << 4) | hex[s[2]];
+			s += 3;
+		} else if (*s == '_') {
+			*buf++ = ' ';
+			s++;
+		} else {
+			*buf++ = *s++;
+		}
+	}
+
+	*buf = 0;
+
+	*decleno = buf - *deco;
+	return 1;
+}
+int
+decode_b64(char *s, char *e, char **deco, size_t *decleno)
+{
+	static signed char b64[128] = {
+		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
+		52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1,
+		-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
+		15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
+		-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
+		41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
+	};
+
+	char *buf = malloc(e - s);   // XXX better bound
+	if (!buf)
+		return 0;
+
+	*deco = buf;
+
+	while (s + 4 <= e) {
+		while (s < e && isspace((unsigned char) *s))
+			s++;
+		if (s < e) {
+			uint32_t v = 0;
+			v |= b64[s[0]]; v <<= 6;
+			v |= b64[s[1]]; v <<= 6;
+			v |= b64[s[2]]; v <<= 6;
+			v |= b64[s[3]];
+			
+			char d2 = v & 0xff; v >>= 8;
+			char d1 = v & 0xff; v >>= 8;
+			char d0 = v & 0xff;
+
+			if (s[1] != '=') *buf++ = d0;
+			if (s[2] != '=') *buf++ = d1;
+			if (s[3] != '=') *buf++ = d2;
+			
+			s += 4;
+		}
+	}
+
+	*decleno = buf - *deco;
+	return 1;
+}
+
+int
+blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
+{
+	iconv_t ic;
+
+	char *b = src;
+
+	// use memmem
+	char *s = strstr(src, "=?");
+	if (!s)
+		goto nocodeok;
+
+	do {
+		char *t;
+		t = b;
+		while (t < s)  // strip space-only inbetween encoded words
+			if (!isspace((unsigned char) *t++)) {
+				while (b < s && dlen--)
+					*dst++ = *b++;
+				break;
+			}
+
+		s += 2;
+
+		char *e = strchr(s, '?');
+
+		*e = 0;
+		ic = iconv_open(tgtenc, s);
+		*e = '?';
+		e++;
+
+		if (ic < 0) {
+			perror("iconv_open");
+			goto nocode;
+		}
+
+		char enc = tolower(*e++);
+		if (*e++ != '?')
+			goto nocode;
+		char *start = e++;
+		char *stop = strstr(e, "?=");
+		if (!stop)
+			goto nocode;
+
+		char *dec;
+		size_t declen;
+		if (enc == 'q')
+			decode_qp(start, stop, &dec, &declen);
+		else if (enc == 'b')
+			decode_b64(start, stop, &dec, &declen);
+		else
+			goto nocode;
+
+		int r = iconv(ic, &dec, &declen, &dst, &dlen);
+		if (r < 0) {
+			if (errno == E2BIG)
+				break;
+			perror("iconv");
+			iconv_close(ic);
+			goto nocode;
+		}
+
+		iconv_close(ic);
+
+		while (declen-- && dlen--)
+			*dst++ = *dec++;
+
+		b = stop + 2;
+	} while (s = strstr(b, "=?"));
+
+	while (*b && dlen-- >= 0)
+		*dst++ = *b++;
+
+	*dst = 0;
+
+	return 1;
+
+nocode:
+	fprintf(stderr, "error decoding rfc2047\n");
+nocodeok:
+	while (*src && dlen--)
+		*dst++ = *src++;
+	*dst = 0;
+
+	return 1;
+}
+
+#ifdef TEST
+int 
+main() {
+	char *r;
+	size_t l;
+	char test[] = "Keld_J=F8rn_Simonsen";
+	decode_qp(test, test + sizeof test, &r, &l);
+	printf("%s %d\n", r, l);
+
+	char *r2;
+	size_t l2;
+	char test2[] = "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==";
+	decode_b64(test2, test2+sizeof test2, &r2, &l2);
+	printf("%s %d\n", r2, l2);
+
+	char test3[] = "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>";
+	char test3dec[255];
+	blaze822_decode_rfc2047(test3dec, test3, sizeof test3dec, "UTF-8");
+	printf("%s\n", test3dec);
+
+	char test4[] = "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= "
+	    "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= z "
+	    "=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=";
+	char test4dec[255];
+	blaze822_decode_rfc2047(test4dec, test4, sizeof test4dec, "UTF-8");
+	printf("%s\n", test4dec);
+	
+}
+#endif
diff --git a/scan.c b/scan.c
index 74e0b14..19b1b49 100644
--- a/scan.c
+++ b/scan.c
@@ -92,20 +92,16 @@ oneline(char *file)
 	}
 
 	char fromdec[17];
-	if (!decode_rfc2047(from, fromdec, sizeof fromdec))
-		memcpy(fromdec, from, sizeof fromdec);
+	blaze822_decode_rfc2047(fromdec, from, sizeof fromdec - 1, "UTF-8");
 	fromdec[sizeof fromdec - 1] = 0;
 
 
 	char *subj = "(no subject)";
-	char subjdec[1000];   // XXX rewrite decode_rfc2047, it overflows!
+	char subjdec[100];
         if ((v = blaze822_hdr(msg, "subject"))) {
-		if (decode_rfc2047(v, subjdec, sizeof subjdec - 1))
-			subj = subjdec;
-		else
-			subj = v;
-		
+		subj = v;
 	}
+	blaze822_decode_rfc2047(subjdec, v, sizeof subjdec - 1, "UTF-8");
 
 	printf("%c%c%9s  ", flag1, flag2, date);
 	u8putstr(stdout, fromdec, 17, 1);
@@ -113,7 +109,7 @@ oneline(char *file)
 	int z;
 	for (z = 0; z < indent; z++)
 		printf(" ");
-	u8putstr(stdout, subj, 80-33-indent, 0);
+	u8putstr(stdout, subjdec, 80-33-indent, 0);
 	printf("\n");
 }