diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | blaze822.h | 5 | ||||
-rw-r--r-- | rfc2047.c | 216 | ||||
-rw-r--r-- | scan.c | 14 |
4 files changed, 226 insertions, 11 deletions
diff --git a/Makefile b/Makefile index b80b616..db0c9cb 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ ALL = scan thread hdr show all: $(ALL) -scan: blaze822.o scan.o fmt_rfc2047.o +scan: blaze822.o scan.o rfc2047.o thread: blaze822.o thread.o hdr: blaze822.o hdr.o show: blaze822.o show.o diff --git a/blaze822.h b/blaze822.h index 5c77de0..7785b75 100644 --- a/blaze822.h +++ b/blaze822.h @@ -12,4 +12,7 @@ int blaze822_loop(int, char **, void (*)(char *)); time_t blaze822_date(char *); char *blaze822_addr(char *, char **, char **); -int decode_rfc2047 (char *, char *, size_t); + + +int blaze822_decode_rfc2047(char *, char *, size_t, char *); + diff --git a/rfc2047.c b/rfc2047.c new file mode 100644 index 0000000..40f750e --- /dev/null +++ b/rfc2047.c @@ -0,0 +1,216 @@ +#include <stdlib.h> +#include <errno.h> +#include <iconv.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> + +#define iswsp(c) (((c) == ' ' || (c) == '\t')) + +// XXX error detection on decode +// XXX keep trying bytewise on invalid iconv + +int +decode_qp(char *start, char *stop, char **deco, size_t *decleno) +{ + static signed char hex[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, + -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 + }; + + char *buf = malloc(4 * (stop - start)); + if (!buf) + return 0; + + *deco = buf; + + char *s = start; + size_t declen; + + while (s < stop) { + if (*s == '=' && s[1] == '\n') { + s += 2; + } else if (*s == '=' && s+2 < stop) { + *buf++ = (hex[s[1]] << 4) | hex[s[2]]; + s += 3; + } else if (*s == '_') { + *buf++ = ' '; + s++; + } else { + *buf++ = *s++; + } + } + + *buf = 0; + + *decleno = buf - *deco; + return 1; +} +int +decode_b64(char *s, char *e, char **deco, size_t *decleno) +{ + static signed char b64[128] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, + 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, + 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, + -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, + 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 + }; + + char *buf = malloc(e - s); // XXX better bound + if (!buf) + return 0; + + *deco = buf; + + while (s + 4 <= e) { + while (s < e && isspace((unsigned char) *s)) + s++; + if (s < e) { + uint32_t v = 0; + v |= b64[s[0]]; v <<= 6; + v |= b64[s[1]]; v <<= 6; + v |= b64[s[2]]; v <<= 6; + v |= b64[s[3]]; + + char d2 = v & 0xff; v >>= 8; + char d1 = v & 0xff; v >>= 8; + char d0 = v & 0xff; + + if (s[1] != '=') *buf++ = d0; + if (s[2] != '=') *buf++ = d1; + if (s[3] != '=') *buf++ = d2; + + s += 4; + } + } + + *decleno = buf - *deco; + return 1; +} + +int +blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc) +{ + iconv_t ic; + + char *b = src; + + // use memmem + char *s = strstr(src, "=?"); + if (!s) + goto nocodeok; + + do { + char *t; + t = b; + while (t < s) // strip space-only inbetween encoded words + if (!isspace((unsigned char) *t++)) { + while (b < s && dlen--) + *dst++ = *b++; + break; + } + + s += 2; + + char *e = strchr(s, '?'); + + *e = 0; + ic = iconv_open(tgtenc, s); + *e = '?'; + e++; + + if (ic < 0) { + perror("iconv_open"); + goto nocode; + } + + char enc = tolower(*e++); + if (*e++ != '?') + goto nocode; + char *start = e++; + char *stop = strstr(e, "?="); + if (!stop) + goto nocode; + + char *dec; + size_t declen; + if (enc == 'q') + decode_qp(start, stop, &dec, &declen); + else if (enc == 'b') + decode_b64(start, stop, &dec, &declen); + else + goto nocode; + + int r = iconv(ic, &dec, &declen, &dst, &dlen); + if (r < 0) { + if (errno == E2BIG) + break; + perror("iconv"); + iconv_close(ic); + goto nocode; + } + + iconv_close(ic); + + while (declen-- && dlen--) + *dst++ = *dec++; + + b = stop + 2; + } while (s = strstr(b, "=?")); + + while (*b && dlen-- >= 0) + *dst++ = *b++; + + *dst = 0; + + return 1; + +nocode: + fprintf(stderr, "error decoding rfc2047\n"); +nocodeok: + while (*src && dlen--) + *dst++ = *src++; + *dst = 0; + + return 1; +} + +#ifdef TEST +int +main() { + char *r; + size_t l; + char test[] = "Keld_J=F8rn_Simonsen"; + decode_qp(test, test + sizeof test, &r, &l); + printf("%s %d\n", r, l); + + char *r2; + size_t l2; + char test2[] = "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg=="; + decode_b64(test2, test2+sizeof test2, &r2, &l2); + printf("%s %d\n", r2, l2); + + char test3[] = "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>"; + char test3dec[255]; + blaze822_decode_rfc2047(test3dec, test3, sizeof test3dec, "UTF-8"); + printf("%s\n", test3dec); + + char test4[] = "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= " + "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= z " + "=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?="; + char test4dec[255]; + blaze822_decode_rfc2047(test4dec, test4, sizeof test4dec, "UTF-8"); + printf("%s\n", test4dec); + +} +#endif diff --git a/scan.c b/scan.c index 74e0b14..19b1b49 100644 --- a/scan.c +++ b/scan.c @@ -92,20 +92,16 @@ oneline(char *file) } char fromdec[17]; - if (!decode_rfc2047(from, fromdec, sizeof fromdec)) - memcpy(fromdec, from, sizeof fromdec); + blaze822_decode_rfc2047(fromdec, from, sizeof fromdec - 1, "UTF-8"); fromdec[sizeof fromdec - 1] = 0; char *subj = "(no subject)"; - char subjdec[1000]; // XXX rewrite decode_rfc2047, it overflows! + char subjdec[100]; if ((v = blaze822_hdr(msg, "subject"))) { - if (decode_rfc2047(v, subjdec, sizeof subjdec - 1)) - subj = subjdec; - else - subj = v; - + subj = v; } + blaze822_decode_rfc2047(subjdec, v, sizeof subjdec - 1, "UTF-8"); printf("%c%c%9s ", flag1, flag2, date); u8putstr(stdout, fromdec, 17, 1); @@ -113,7 +109,7 @@ oneline(char *file) int z; for (z = 0; z < indent; z++) printf(" "); - u8putstr(stdout, subj, 80-33-indent, 0); + u8putstr(stdout, subjdec, 80-33-indent, 0); printf("\n"); } |