about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2013-04-08 23:01:32 -0400
committerRich Felker <dalias@aerifal.cx>2013-04-08 23:01:32 -0400
commitea34b1b90ca5ba3b87725662f6a1ff03b7a88a1f (patch)
treef9289a7c3a981755b54860555a65a6afde26ba72
parenta49e038bab7b3927b6a9c7d0c52f9e1a9cb82629 (diff)
downloadmusl-ea34b1b90ca5ba3b87725662f6a1ff03b7a88a1f.tar.gz
musl-ea34b1b90ca5ba3b87725662f6a1ff03b7a88a1f.tar.xz
musl-ea34b1b90ca5ba3b87725662f6a1ff03b7a88a1f.zip
implement mbtowc directly, not as a wrapper for mbrtowc
the interface contract for mbtowc admits a much faster implementation
than mbrtowc can achieve; wrapping mbrtowc with an extra call frame
only made the situation worse.

since the regex implementation uses mbtowc already, this change should
improve regex performance too. it may be possible to improve
performance in other places internally by switching from mbrtowc to
mbtowc.
-rw-r--r--src/multibyte/mbtowc.c44
1 files changed, 39 insertions, 5 deletions
diff --git a/src/multibyte/mbtowc.c b/src/multibyte/mbtowc.c
index b5dd7e3c..ec9e54ad 100644
--- a/src/multibyte/mbtowc.c
+++ b/src/multibyte/mbtowc.c
@@ -10,10 +10,44 @@
 #include <errno.h>
 
 #include "internal.h"
-
-int mbtowc(wchar_t *restrict wc, const char *restrict s, size_t n)
+#include <stdio.h>
+int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n)
 {
-	mbstate_t st = { 0 };
-	n = mbrtowc(wc, s, n, &st);
-	return n+2 ? n : -1;
+	unsigned c;
+	const unsigned char *s = (const void *)src;
+
+	if (!s) return 0;
+	if (!n) goto ilseq;
+	if (!wc) wc = (void *)&wc;
+
+	if (*s < 0x80) return !!(*wc = *s);
+	if (*s-SA > SB-SA) goto ilseq;
+	c = bittab[*s++-SA];
+
+	/* Avoid excessive checks against n: If shifting the state n-1
+	 * times does not clear the high bit, then the value of n is
+	 * insufficient to read a character */
+	if (n<4 && ((c<<(6*n-6)) & (1U<<31))) goto ilseq;
+
+	if (OOB(c,*s)) goto ilseq;
+	c = c<<6 | *s++-0x80;
+	if (!(c&(1U<<31))) {
+		*wc = c;
+		return 2;
+	}
+
+	if (*s-0x80u >= 0x40) goto ilseq;
+	c = c<<6 | *s++-0x80;
+	if (!(c&(1U<<31))) {
+		*wc = c;
+		return 3;
+	}
+
+	if (*s-0x80u >= 0x40) goto ilseq;
+	*wc = c<<6 | *s++-0x80;
+	return 4;
+
+ilseq:
+	errno = EILSEQ;
+	return -1;
 }