about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@zsh.org>2016-11-04 09:56:03 +0000
committerPeter Stephenson <pws@zsh.org>2016-11-04 09:56:51 +0000
commit9c68ef083b9ddd94c3ca7027214b2e8a31a1bc47 (patch)
tree4ce960ea3dec72a4775bd00c291e859d59fe798f
parent6ea4e708bae26dbf3957df68e1db70a60b67c105 (diff)
downloadzsh-9c68ef083b9ddd94c3ca7027214b2e8a31a1bc47.tar.gz
zsh-9c68ef083b9ddd94c3ca7027214b2e8a31a1bc47.tar.xz
zsh-9c68ef083b9ddd94c3ca7027214b2e8a31a1bc47.zip
39825: optimise mb_metastrlenend() for 7-bit character.
As the shell relies intimately on US-ASCII as a subset we can
skip the multibyte functions if we are dealing with a complete
7-bit character.
-rw-r--r--ChangeLog5
-rw-r--r--Src/utils.c17
2 files changed, 21 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index e83dccbe6..1b1ecd75d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2016-11-04  Peter Stephenson  <p.stephenson@samsung.com>
+
+	* Sebastian: 39825: Src/utils.c: Optimise mb_metastrlenend() for
+	the common case of 7-bit character.
+
 2016-11-04  Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
 
 	* 39818 (Oliver Kiddle): Completion/Unix/Command/_awk: add nawk
diff --git a/Src/utils.c b/Src/utils.c
index db4352908..733f57088 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -5323,7 +5323,7 @@ mb_metastrlenend(char *ptr, int width, char *eptr)
     char inchar, *laststart;
     size_t ret;
     wchar_t wc;
-    int num, num_in_char;
+    int num, num_in_char, complete;
 
     if (!isset(MULTIBYTE))
 	return ztrlen(ptr);
@@ -5331,6 +5331,7 @@ mb_metastrlenend(char *ptr, int width, char *eptr)
     laststart = ptr;
     ret = MB_INVALID;
     num = num_in_char = 0;
+    complete = 1;
 
     memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
     while (*ptr && !(eptr && ptr >= eptr)) {
@@ -5339,6 +5340,18 @@ mb_metastrlenend(char *ptr, int width, char *eptr)
 	else
 	    inchar = *ptr;
 	ptr++;
+
+	if (complete && (inchar >= 0 && inchar <= 0x7f)) {
+	    /*
+	     * We rely on 7-bit US-ASCII as a subset, so skip
+	     * multibyte handling if we have such a character.
+	     */
+	    num++;
+	    laststart = ptr;
+	    num_in_char = 0;
+	    continue;
+	}
+
 	ret = mbrtowc(&wc, &inchar, 1, &mb_shiftstate);
 
 	if (ret == MB_INCOMPLETE) {
@@ -5358,6 +5371,7 @@ mb_metastrlenend(char *ptr, int width, char *eptr)
 	     * so we don't count characters twice.
 	     */
 	    num_in_char++;
+	    complete = 0;
 	} else {
 	    if (ret == MB_INVALID) {
 		/* Reset, treat as single character */
@@ -5380,6 +5394,7 @@ mb_metastrlenend(char *ptr, int width, char *eptr)
 		num++;
 	    laststart = ptr;
 	    num_in_char = 0;
+	    complete = 1;
 	}
     }