about summary refs log tree commit diff
path: root/safe_u8putstr.c
diff options
context:
space:
mode:
authorLeah Neukirchen <leah@vuxu.org>2017-03-13 15:50:41 +0100
committerLeah Neukirchen <leah@vuxu.org>2017-03-13 15:54:52 +0100
commit564bb223eff77c087f06d4048b109cbf4bf16612 (patch)
treef15edbed03ac47a8e9afcaa2288b178133e85c9a /safe_u8putstr.c
parent14a12635bdfdf2ecd3c4609eb3527593e12de4a0 (diff)
downloadmblaze-564bb223eff77c087f06d4048b109cbf4bf16612.tar.gz
mblaze-564bb223eff77c087f06d4048b109cbf4bf16612.tar.xz
mblaze-564bb223eff77c087f06d4048b109cbf4bf16612.zip
mshow: print plain text safely
Diffstat (limited to 'safe_u8putstr.c')
-rw-r--r--safe_u8putstr.c68
1 files changed, 68 insertions, 0 deletions
diff --git a/safe_u8putstr.c b/safe_u8putstr.c
new file mode 100644
index 0000000..758c5be
--- /dev/null
+++ b/safe_u8putstr.c
@@ -0,0 +1,68 @@
+#include <stdio.h>
+#include <stdint.h>
+
+void
+safe_u8putstr(char *s0, size_t l, FILE *stream)
+{
+	// tty-safe output of s, with relaxed utf-8 semantics:
+	// - C0 and C1 are displayed as escape sequences
+	// - valid utf8 is printed as is
+	// - rest is printed bytewise as is (probably latin1)
+	// - translate CRLF to CR
+
+	unsigned char *s = (unsigned char* )s0;
+	unsigned char *e = s + l;
+
+	while (s < e) {
+		if ((*s & 0x80) == 0) {
+			if (*s < 32 &&
+			    *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r') {
+				// C0
+				fputc(0xe2, stream);
+				fputc(0x90, stream);
+				fputc(0x80+*s, stream);
+			} else if (*s == 127) {
+				// DEL
+				fputc(0xe2, stream);
+				fputc(0x90, stream);
+				fputc(0xa1, stream);
+			} else if (*s == '\r') {
+				if (e - s > 1 && s[1] == '\n')
+					s++;
+				fputc(*s, stream);
+			} else {
+				// safe ASCII
+				fputc(*s, stream);
+			}
+		} else if ((*s & 0xc0) == 0x80) {
+			// C1
+			fputc(0xe2, stream);
+			fputc(0x90, stream);
+			fputc(0x80+0x1b, stream);
+
+			fputc(0xe2, stream);
+			fputc(0x90, stream);
+			fputc(*s, stream);
+		} else {
+			uint32_t f = 0;
+			if (e - s >= 4)
+				f = (s[0]<<24) | (s[1]<<16) | (s[2]<<8) | s[3];
+			else if (e - s == 3)
+				f = (s[0]<<24) | (s[1]<<16) | (s[2]<<8);
+			else if (e - s == 2)
+				f = (s[0]<<24) | (s[1]<<16);
+			else if (e - s == 1)
+				f = (s[0]<<24);
+			
+			if      ((f & 0xe0c00000) == 0xc0800000) goto u2;
+			else if ((f & 0xf0c0c000) == 0xe0808000) goto u3;
+			else if ((f & 0xf8c0c0c0) == 0xf0808080) goto u4;
+			else    /* invalid utf8 */               goto u1;
+u4:				fputc(*s++, stream);
+u3:				fputc(*s++, stream);
+u2:				fputc(*s++, stream);
+u1:				fputc(*s, stream);
+		}
+		s++;
+	}
+}