From 01c1c0d9707d2ac859a77d0b95c66fbfac1c07bd Mon Sep 17 00:00:00 2001 From: Christian Neukirchen Date: Wed, 13 Jul 2016 15:53:27 +0200 Subject: add rfc2045 and multipart decoding --- Makefile | 3 +- blaze822.h | 5 +++ rfc2045.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ unmime.c | 61 ++++++++++++++++++++++++++++ 4 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 rfc2045.c create mode 100644 unmime.c diff --git a/Makefile b/Makefile index 90b14ac..2d2e89b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ CFLAGS=-g -O1 -Wall -Wno-switch -Wextra -Wwrite-strings -fstack-protector-strong -D_FORTIFY_SOURCE=2 -ALL = scan thread hdr show list next +ALL = scan thread hdr show list next unmime all: $(ALL) @@ -10,6 +10,7 @@ hdr: blaze822.o hdr.o show: blaze822.o show.o list: list.o next: next.o +unmime: blaze822.o unmime.o rfc2045.o rfc2047.o clean: FRC -rm -f $(ALL) *.o diff --git a/blaze822.h b/blaze822.h index d3d7d17..f39cf78 100644 --- a/blaze822.h +++ b/blaze822.h @@ -27,3 +27,8 @@ int blaze822_decode_rfc2047(char *, char *, size_t, char *); int blaze822_decode_qp(char *start, char *stop, char **deco, size_t *decleno); int blaze822_decode_b64(char *start, char *stop, char **deco, size_t *decleno); +// rfc2045.c + +int blaze822_check_mime(struct message *msg); +int blaze822_mime_body(struct message *msg, char **cto, char **bodyo, size_t *bodyleno); +int blaze822_multipart(struct message *msg, struct message **imsg); diff --git a/rfc2045.c b/rfc2045.c new file mode 100644 index 0000000..cb6d317 --- /dev/null +++ b/rfc2045.c @@ -0,0 +1,131 @@ +#define _GNU_SOURCE +#include + +#include "blaze822.h" +#include "blaze822_priv.h" + +int +blaze822_check_mime(struct message *msg) +{ + char *v = blaze822_hdr(msg, "mime-version"); + return (v && + v[0] && v[0] == '1' && + v[1] && v[1] == '.' && + v[2] && v[2] == '0' && + (!v[3] || iswsp(v[3]))); +} + +int +blaze822_mime_body(struct message *msg, char **cto, char **bodyo, size_t *bodyleno) +{ + if (!msg->body || !msg->bodyend) + return -1; + + char *ct = blaze822_hdr(msg, "content-type"); + char *cte = blaze822_hdr(msg, "content-transfer-encoding"); + + if (!ct) + ct = "text/plain; charset=US-ASCII"; + + char *s = ct; + while (*s && *s != ';') + s++; + + *cto = ct; + + if (cte) { + if (strncasecmp(cte, "quoted-printable", 16) == 0) + blaze822_decode_qp(msg->body, msg->bodyend, bodyo, bodyleno); + else if (strncasecmp(cte, "base64", 6) == 0) + blaze822_decode_b64(msg->body, msg->bodyend, bodyo, bodyleno); + else + cte = 0; + } + if (!cte) { + *bodyo = msg->body; + *bodyleno = msg->bodyend - msg->body; + } + + return 1; +} + +int +blaze822_multipart(struct message *msg, struct message **imsg) +{ + char *s = blaze822_hdr(msg, "content-type"); + if (!s) + return 0; + while (*s && *s != ';') + s++; + if (!*s) + return 0; + + // XXX scan boundary only once + char *boundary = s+1; + while (*boundary) { + while (iswsp(*boundary)) + boundary++; + if (strncasecmp(boundary, "boundary=", 9) == 0) { + boundary += 9; + break; + } + boundary = strchr(boundary+1, ';'); + if (!boundary) + break; + boundary++; + } + if (!boundary || !*boundary) + return 0; + char *e; + if (*boundary == '"') { + boundary++; + e = strchr(boundary, '"'); + if (!e) + return 0; + } else { + e = boundary; + // XXX bchars := bcharsnospace / " " + // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / + // "+" / "_" / "," / "-" / "." / + // "/" / ":" / "=" / "?" + while(!iswsp(*e) && *e != ';') + e++; + e++; + } + + char mboundary[256]; + mboundary[0] = '-'; + mboundary[1] = '-'; + memcpy(mboundary+2, boundary, e-boundary); // XXX overflow + mboundary[e-boundary+2] = 0; + + int boundarylen = e-boundary+2; + +// printf("boundary: %s %s %s\n", ct, cte, boundary); + + char *prevpart; + if (*imsg) + prevpart = (*imsg)->bodyend; + else + prevpart = msg->body; + + char *part = memmem(prevpart, msg->bodyend - prevpart, mboundary, boundarylen); + if (!part) + return 0; + /// XXX access to stuff before first boundary? + part += boundarylen; + if (*part == '\n') // XXX crlf + part++; + else if (*part == '-' && part < msg->bodyend && *(part+1) == '-') + return 0; + else + return 0; // XXX error condition? + + char *nextpart = memmem(part, msg->bodyend - part, mboundary, boundarylen); + if (!nextpart) + return 0; // XXX error condition + + *imsg = blaze822_mem(part, nextpart-part); + + return 1; +} diff --git a/unmime.c b/unmime.c new file mode 100644 index 0000000..68ee514 --- /dev/null +++ b/unmime.c @@ -0,0 +1,61 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "blaze822.h" + +void +recmime(struct message *msg, int depth) +{ + struct message *imsg = 0; + char *ct, *body; + size_t bodylen; + + if (blaze822_mime_body(msg, &ct, &body, &bodylen)) { + printf("%*.sbody %s len %d\n", depth*2, "", ct, bodylen); + if (strncmp(ct, "multipart/", 10) == 0) + while (blaze822_multipart(msg, &imsg)) + recmime(imsg, depth+1); + else if (strncmp(ct, "text/", 5) == 0) { + printf("---\n"); + fwrite(body, bodylen, 1, stdout); + printf("---\n"); + } + } +} + +void +unmime(char *file) +{ + struct message *msg; + + msg = blaze822_file(file); + if (!msg) + return; + + if (blaze822_check_mime(msg)) + printf("a mime message\n"); + else + return; + + recmime(msg, 0); +} + +int +main(int argc, char *argv[]) +{ + blaze822_loop(argc-1, argv+1, unmime); + + return 0; +} -- cgit 1.4.1