diff options
author | Wilco Dijkstra <wdijkstr@arm.com> | 2016-03-25 16:44:26 -0300 |
---|---|---|
committer | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2016-04-01 10:44:40 -0300 |
commit | d3496c9f4f27d3009b71be87f6108b4fed7314bd (patch) | |
tree | eefcd05beb1f6ed269167433a763c234483b7c0b /string/strcspn.c | |
parent | d8a012c5c9e4bfc1b8db2bc6deacb85b44a2e1eb (diff) | |
download | glibc-d3496c9f4f27d3009b71be87f6108b4fed7314bd.tar.gz glibc-d3496c9f4f27d3009b71be87f6108b4fed7314bd.tar.xz glibc-d3496c9f4f27d3009b71be87f6108b4fed7314bd.zip |
Improve generic strcspn performance
Improve strcspn performance using a much faster algorithm. It is kept simple so it works well on most targets. It is generally at least 10 times faster than the existing implementation on bench-strcspn on a few AArch64 implementations, and for some tests 100 times as fast (repeatedly calling strchr on a small string is extremely slow...). In fact the string/bits/string2.h inlines make no longer sense, as GCC already uses strlen if reject is an empty string, strchrnul is 5 times as fast as __strcspn_c1, while __strcspn_c2 and __strcspn_c3 are slower than the strcspn main loop for large strings (though reject length 2-4 could be special cased in the future to gain even more performance). Tested on x86_64, i686, and aarch64. * string/Version (libc): Add GLIBC_2.24. * string/strcspn.c (strcspn): Rewrite function. * string/bits/string2.h (strcspn): Use __builtin_strcspn. (__strcspn_c1): Remove inline function. (__strcspn_c2): Likewise. (__strcspn_c3): Likewise. * string/string-inline.c [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c1): Add compatibility symbol. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c2): Likewise. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c3): Likewise. * sysdeps/i386/string-inlines.c: Include generic string-inlines.c.
Diffstat (limited to 'string/strcspn.c')
-rw-r--r-- | string/strcspn.c | 46 |
1 files changed, 38 insertions, 8 deletions
diff --git a/string/strcspn.c b/string/strcspn.c index 8888919639..c535dd1c3d 100644 --- a/string/strcspn.c +++ b/string/strcspn.c @@ -16,6 +16,7 @@ <http://www.gnu.org/licenses/>. */ #include <string.h> +#include <stdint.h> #undef strcspn @@ -26,16 +27,45 @@ /* Return the length of the maximum initial segment of S which contains no characters from REJECT. */ size_t -STRCSPN (const char *s, const char *reject) +STRCSPN (const char *str, const char *reject) { - size_t count = 0; + if (__glibc_unlikely (reject[0] == '\0') || + __glibc_unlikely (reject[1] == '\0')) + return __strchrnul (str, reject [0]) - str; - while (*s != '\0') - if (strchr (reject, *s++) == NULL) - ++count; - else - return count; + /* Use multiple small memsets to enable inlining on most targets. */ + unsigned char table[256]; + unsigned char *p = memset (table, 0, 64); + memset (p + 64, 0, 64); + memset (p + 128, 0, 64); + memset (p + 192, 0, 64); - return count; + unsigned char *s = (unsigned char*) reject; + unsigned char tmp; + do + p[tmp = *s++] = 1; + while (tmp); + + s = (unsigned char*) str; + if (p[s[0]]) return 0; + if (p[s[1]]) return 1; + if (p[s[2]]) return 2; + if (p[s[3]]) return 3; + + s = (unsigned char *) ((uintptr_t)(s) & ~3); + + unsigned int c0, c1, c2, c3; + do + { + s += 4; + c0 = p[s[0]]; + c1 = p[s[1]]; + c2 = p[s[2]]; + c3 = p[s[3]]; + } + while ((c0 | c1 | c2 | c3) == 0); + + size_t count = s - (unsigned char *) str; + return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3; } libc_hidden_builtin_def (strcspn) |