about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Makefile3
-rw-r--r--blaze822.h5
-rw-r--r--rfc2045.c131
-rw-r--r--unmime.c61
4 files changed, 199 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 90b14ac..2d2e89b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 CFLAGS=-g -O1 -Wall -Wno-switch -Wextra -Wwrite-strings -fstack-protector-strong -D_FORTIFY_SOURCE=2
 
-ALL = scan thread hdr show list next
+ALL = scan thread hdr show list next unmime
 
 all: $(ALL)
 
@@ -10,6 +10,7 @@ hdr: blaze822.o hdr.o
 show: blaze822.o show.o
 list: list.o
 next: next.o
+unmime: blaze822.o unmime.o rfc2045.o rfc2047.o
 
 clean: FRC
 	-rm -f $(ALL) *.o
diff --git a/blaze822.h b/blaze822.h
index d3d7d17..f39cf78 100644
--- a/blaze822.h
+++ b/blaze822.h
@@ -27,3 +27,8 @@ int blaze822_decode_rfc2047(char *, char *, size_t, char *);
 int blaze822_decode_qp(char *start, char *stop, char **deco, size_t *decleno);
 int blaze822_decode_b64(char *start, char *stop, char **deco, size_t *decleno);
 
+// rfc2045.c
+
+int blaze822_check_mime(struct message *msg);
+int blaze822_mime_body(struct message *msg, char **cto, char **bodyo, size_t *bodyleno);
+int blaze822_multipart(struct message *msg, struct message **imsg);
diff --git a/rfc2045.c b/rfc2045.c
new file mode 100644
index 0000000..cb6d317
--- /dev/null
+++ b/rfc2045.c
@@ -0,0 +1,131 @@
+#define _GNU_SOURCE
+#include <string.h>
+
+#include "blaze822.h"
+#include "blaze822_priv.h"
+
+int
+blaze822_check_mime(struct message *msg)
+{
+        char *v = blaze822_hdr(msg, "mime-version");
+	return (v &&
+	    v[0] && v[0] == '1' &&
+	    v[1] && v[1] == '.' &&
+	    v[2] && v[2] == '0' &&
+	    (!v[3] || iswsp(v[3])));
+}
+
+int
+blaze822_mime_body(struct message *msg, char **cto, char **bodyo, size_t *bodyleno)
+{
+	if (!msg->body || !msg->bodyend)
+		return -1;
+
+	char *ct = blaze822_hdr(msg, "content-type");
+	char *cte = blaze822_hdr(msg, "content-transfer-encoding");
+
+	if (!ct)
+		ct = "text/plain; charset=US-ASCII";
+
+	char *s = ct;
+	while (*s && *s != ';')
+		s++;
+
+	*cto = ct;
+
+	if (cte) {
+		if (strncasecmp(cte, "quoted-printable", 16) == 0)
+			blaze822_decode_qp(msg->body, msg->bodyend, bodyo, bodyleno);
+		else if (strncasecmp(cte, "base64", 6) == 0)
+			blaze822_decode_b64(msg->body, msg->bodyend, bodyo, bodyleno);
+		else
+			cte = 0;
+	}
+	if (!cte) {
+		*bodyo = msg->body;
+		*bodyleno = msg->bodyend - msg->body;
+	}
+
+	return 1;
+}
+
+int
+blaze822_multipart(struct message *msg, struct message **imsg)
+{
+	char *s = blaze822_hdr(msg, "content-type");
+	if (!s)
+		return 0;
+	while (*s && *s != ';')
+		s++;
+	if (!*s)
+		return 0;
+
+	// XXX scan boundary only once
+	char *boundary = s+1;
+	while (*boundary) {
+		while (iswsp(*boundary))
+			boundary++;
+		if (strncasecmp(boundary, "boundary=", 9) == 0) {
+			boundary += 9;
+			break;
+		}
+		boundary = strchr(boundary+1, ';');
+		if (!boundary)
+			break;
+		boundary++;
+	}
+	if (!boundary || !*boundary)
+		return 0;
+	char *e;
+	if (*boundary == '"') {
+		boundary++;
+		e = strchr(boundary, '"');
+		if (!e)
+			return 0;
+	} else {
+		e = boundary;
+		// XXX    bchars := bcharsnospace / " "
+		// bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
+		//              "+" / "_" / "," / "-" / "." /
+		//              "/" / ":" / "=" / "?"
+		while(!iswsp(*e) && *e != ';')
+			e++;
+		e++;
+	}
+
+	char mboundary[256];
+	mboundary[0] = '-';
+	mboundary[1] = '-';
+	memcpy(mboundary+2, boundary, e-boundary);  // XXX overflow
+	mboundary[e-boundary+2] = 0;
+
+	int boundarylen = e-boundary+2;
+
+//	printf("boundary: %s %s %s\n", ct, cte, boundary);
+
+	char *prevpart;
+	if (*imsg)
+		prevpart = (*imsg)->bodyend;
+	else
+		prevpart = msg->body;
+
+	char *part = memmem(prevpart, msg->bodyend - prevpart, mboundary, boundarylen);
+	if (!part)
+		return 0;
+	/// XXX access to stuff before first boundary?
+	part += boundarylen;
+	if (*part == '\n')    // XXX crlf
+		part++;
+	else if (*part == '-' && part < msg->bodyend && *(part+1) == '-')
+		return 0;
+	else
+		return 0;   // XXX error condition?
+
+	char *nextpart = memmem(part, msg->bodyend - part, mboundary, boundarylen);
+	if (!nextpart)
+		return 0;   // XXX error condition
+
+	*imsg = blaze822_mem(part, nextpart-part);
+
+	return 1;
+}
diff --git a/unmime.c b/unmime.c
new file mode 100644
index 0000000..68ee514
--- /dev/null
+++ b/unmime.c
@@ -0,0 +1,61 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <wchar.h>
+
+#include "blaze822.h"
+
+void
+recmime(struct message *msg, int depth)
+{
+	struct message *imsg = 0;
+	char *ct, *body;
+	size_t bodylen;
+
+	if (blaze822_mime_body(msg, &ct, &body, &bodylen)) {
+		printf("%*.sbody %s len %d\n", depth*2, "", ct, bodylen);
+		if (strncmp(ct, "multipart/", 10) == 0)
+			while (blaze822_multipart(msg, &imsg))
+				recmime(imsg, depth+1);
+		else if (strncmp(ct, "text/", 5) == 0) {
+			printf("---\n");
+			fwrite(body, bodylen, 1, stdout);
+			printf("---\n");
+		}
+	}
+}
+
+void
+unmime(char *file)
+{
+	struct message *msg;
+
+	msg = blaze822_file(file);
+	if (!msg)
+		return;
+
+	if (blaze822_check_mime(msg))
+		printf("a mime message\n");
+	else
+		return;
+
+	recmime(msg, 0);
+}
+
+int
+main(int argc, char *argv[])
+{
+	blaze822_loop(argc-1, argv+1, unmime);
+
+	return 0;
+}