From 613a1182ff9e8b5ca598e3d233a0e36386559e30 Mon Sep 17 00:00:00 2001 From: Leah Neukirchen Date: Thu, 29 Jun 2017 16:43:47 +0200 Subject: mmime: gen_qp: ensure not to split utf8 multibyte sequences into encoded words RFC2047, 5.(3): > Each 'encoded-word' MUST represent an integral number of characters. > A multi-octet character may not be split across adjacent 'encoded- > word's. --- mmime.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'mmime.c') diff --git a/mmime.c b/mmime.c index 0aee314..5d667b9 100644 --- a/mmime.c +++ b/mmime.c @@ -64,6 +64,26 @@ int gen_qp(uint8_t *s, off_t size, int maxlinelen, int linelen) char prev = 0; for (i = 0; i < size; i++) { + // inspect utf8 sequence to not wrap in between multibyte + int mb; + if ((s[i] & 0x80) == 0) mb = 3; + else if ((s[i] & 0xc0) == 0x80) mb = 3; + else if ((s[i] & 0xe0) == 0xc0) mb = 6; + else if ((s[i] & 0xf0) == 0xe0) mb = 9; + else if ((s[i] & 0xf8) == 0xf0) mb = 12; + else mb = 3; + + if (linelen >= maxlinelen-mb-!!header) { + linelen = 0; + prev = '\n'; + if (header) { + printf("?=\n =?UTF-8?Q?"); + linelen += 11; + } else { + puts("="); + } + } + if ((s[i] > 126) || (s[i] < 32 && s[i] != '\n' && s[i] != '\t') || (s[i] == '=')) { @@ -90,17 +110,6 @@ int gen_qp(uint8_t *s, off_t size, int maxlinelen, int linelen) linelen++; prev = s[i]; } - - if (linelen >= maxlinelen-3-!!header) { - linelen = 0; - prev = '\n'; - if (header) { - printf("?=\n =?UTF-8?Q?"); - linelen += 11; - } else { - puts("="); - } - } } if (linelen > 0 && !header) puts("="); -- cgit 1.4.1