about summary refs log tree commit diff
path: root/scan.c
diff options
context:
space:
mode:
authorChristian Neukirchen <chneukirchen@gmail.com>2016-07-19 16:24:47 +0200
committerChristian Neukirchen <chneukirchen@gmail.com>2016-07-19 16:24:47 +0200
commit606b7d1cdfb6245f3d6f26af39f83825cde6ec6f (patch)
treeada27ac04b564c192a6bf383e4a592fbee7a30cd /scan.c
parentb319493901b988be6a5c457f023be6dbf5356283 (diff)
downloadmblaze-606b7d1cdfb6245f3d6f26af39f83825cde6ec6f.tar.gz
mblaze-606b7d1cdfb6245f3d6f26af39f83825cde6ec6f.tar.xz
mblaze-606b7d1cdfb6245f3d6f26af39f83825cde6ec6f.zip
scan: use wide char functions
Diffstat (limited to 'scan.c')
-rw-r--r--scan.c32
1 files changed, 25 insertions, 7 deletions
diff --git a/scan.c b/scan.c
index a2c58fa..d962d33 100644
--- a/scan.c
+++ b/scan.c
@@ -1,3 +1,5 @@
+#define _GNU_SOURCE
+
 #include <sys/stat.h>
 #include <sys/types.h>
 
@@ -8,20 +10,32 @@
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
+#include <wchar.h>
+#include <locale.h>
 
 #include "blaze822.h"
 
+wchar_t replacement = '?';
+
 void
 u8putstr(FILE *out, char *s, size_t l, int pad)
 {
 	while (*s && l) {
-		putc(*s, out);
-		// elongate by utf8 overhead
-		if      ((*s & 0xf0) == 0xf0) l += 3;
-		else if ((*s & 0xe0) == 0xe0) l += 2;
-		else if ((*s & 0xc0) == 0xc0) l += 1;
-		l--;
-		s++;
+		if (*s >= 32 && *s < 127) {
+			putc(*s, out);
+			s++;
+			l--;
+		} else {
+			wchar_t wc;
+			int r = mbtowc(&wc, s, 4);
+			if (r < 0) {
+				r = 1;
+				wc = replacement;
+			}
+			s += r;
+			fprintf(out, "%lc", wc);
+			l -= wcwidth(wc);
+		}
 	}
 	if (pad)
 		while (l-- > 0)
@@ -134,6 +148,10 @@ oneline(char *file)
 int
 main(int argc, char *argv[])
 {
+	setlocale(LC_ALL, "");  // for wcwidth later
+	if (wcwidth(0xFFFD) > 0)
+		replacement = 0xFFFD;
+
 	char *seqmap = blaze822_seq_open(0);
 	blaze822_seq_load(seqmap);
 	cur = blaze822_seq_cur();