about summary refs log tree commit diff
path: root/rfc2047.c
diff options
context:
space:
mode:
authorChristian Neukirchen <chneukirchen@gmail.com>2016-10-14 22:20:11 +0200
committerChristian Neukirchen <chneukirchen@gmail.com>2016-10-14 22:20:11 +0200
commit27915af60dd2cde02068268bbb32574527dc715c (patch)
tree4426bfaab3a61c87845721b9a0db41b0a81b94ec /rfc2047.c
parent631b6c7c826477ebc3b8b59a2a77df8592e0c627 (diff)
downloadmblaze-27915af60dd2cde02068268bbb32574527dc715c.tar.gz
mblaze-27915af60dd2cde02068268bbb32574527dc715c.tar.xz
mblaze-27915af60dd2cde02068268bbb32574527dc715c.zip
rfc2047: detect partial multibyte sequences and decode them correctly
Diffstat (limited to 'rfc2047.c')
-rw-r--r--rfc2047.c58
1 files changed, 49 insertions, 9 deletions
diff --git a/rfc2047.c b/rfc2047.c
index 8462d25..b9c41ab 100644
--- a/rfc2047.c
+++ b/rfc2047.c
@@ -126,6 +126,7 @@ int
 blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
 {
 	iconv_t ic = (iconv_t)-1;
+	char *srcenc = 0;
 
 	char *b = src;
 
@@ -134,11 +135,17 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
 	if (!s)
 		goto nocodeok;
 
+	// keep track of partial multibyte sequences
+	char *partial = 0;
+	size_t partiallen = 0;
+
 	do {
 		char *t;
 		t = b;
 		while (t < s)  // strip space-only inbetween encoded words
 			if (!isfws(*t++)) {
+				if (partial)  // mixed up encodings
+					goto nocode;
 				while (b < s && dlen) {
 					*dst++ = *b++;
 					dlen--;
@@ -156,7 +163,17 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
 			goto nocode;
 
 		*e = 0;
-		ic = iconv_open(tgtenc, s);
+		if (!srcenc || strcmp(srcenc, s) != 0) {
+			if (partial)  // mixed up encodings
+				goto nocode;
+			free(srcenc);
+			srcenc = strdup(s);
+			if (!srcenc)
+				goto nocode;
+			if (ic != (iconv_t)-1)
+				iconv_close(ic);
+			ic = iconv_open(tgtenc, srcenc);
+		}
 		*e = '?';
 		e++;
 
@@ -180,23 +197,38 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
 		else
 			goto nocode;
 
+		if (partial) {
+			dec = realloc(dec, declen + partiallen);
+			if (!dec)
+				goto nocode;
+			memmove(dec + partiallen, dec, declen);
+			memcpy(dec, partial, partiallen);
+			declen += partiallen;
+			free(partial);
+			partial = 0;
+			partiallen = 0;
+		}
+
 		decchunk = dec;
 		int r = iconv(ic, &dec, &declen, &dst, &dlen);
 		if (r < 0) {
 			if (errno == E2BIG) {
-				iconv_close(ic);
 				break;
-			} else if (errno == EILSEQ || errno == EINVAL) {
+			} else if (errno == EILSEQ) {
 				goto nocode;
+			} else if (errno == EINVAL) {
+				partial = malloc(declen);
+				if (!partial)
+					goto nocode;
+				memcpy(partial, dec, declen);
+				partiallen = declen;
 			} else {
 				perror("iconv");
 				goto nocode;
 			}
 		}
 
-		iconv_close(ic);
-
-		while (declen && dlen) {
+		while (!partial && declen && dlen) {
 			*dst++ = *dec++;
 			declen--;
 			dlen--;
@@ -214,13 +246,17 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
 
 	*dst = 0;
 
+	if (ic != (iconv_t)-1)
+		iconv_close(ic);
+	free(srcenc);
+
 	return 1;
 
 nocode:
+	fprintf(stderr, "error decoding rfc2047\n");
 	if (ic != (iconv_t)-1)
 		iconv_close(ic);
-
-	fprintf(stderr, "error decoding rfc2047\n");
+	free(srcenc);
 nocodeok:
 	while (*src && dlen) {
 		*dst++ = *src++;
@@ -257,6 +293,10 @@ main() {
 	char test4dec[255];
 	blaze822_decode_rfc2047(test4dec, test4, sizeof test4dec, "UTF-8");
 	printf("%s\n", test4dec);
-	
+
+	char test5[] = "=?UTF-8?Q?z=E2=80?= =?UTF-8?Q?=99z?=";
+	char test5dec[255];
+	blaze822_decode_rfc2047(test5dec, test5, sizeof test5dec, "UTF-8");
+	printf("%s\n", test5dec);
 }
 #endif