diff options
author | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2016-03-26 19:09:07 +0000 |
---|---|---|
committer | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2016-04-01 10:44:44 -0300 |
commit | 91f3b75f47c9eca3299098c3dcc2f5d9dad320b1 (patch) | |
tree | f00e3a58386999ac44a322211ff74cffcb37df99 /string/strspn.c | |
parent | d3496c9f4f27d3009b71be87f6108b4fed7314bd (diff) | |
download | glibc-91f3b75f47c9eca3299098c3dcc2f5d9dad320b1.tar.gz glibc-91f3b75f47c9eca3299098c3dcc2f5d9dad320b1.tar.xz glibc-91f3b75f47c9eca3299098c3dcc2f5d9dad320b1.zip |
Improve generic strspn performance
As for strcspn, this patch improves strspn performance using a much faster algorithm. It first constructs a 256-entry table based on the accept string and then uses it as a lookup table for the input string. As for strcspn optimization, it is generally at least 10 times faster than the existing implementation on bench-strspn on a few AArch64 implementations. Also the string/bits/string2.h inlines make no longer sense, as current implementation will already implement most of the optimizations. Tested on x86_64, i686, and aarch64. * string/strspn.c (strcspn): Rewrite function. * string/bits/string2.h (strspn): Use __builtin_strcspn. (__strspn_c1): Remove inline function. (__strspn_c2): Likewise. (__strspn_c3): Likewise. * string/string-inlines.c [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c1): Add compatibility symbol. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c2): Likewise. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c3): Likewise.
Diffstat (limited to 'string/strspn.c')
-rw-r--r-- | string/strspn.c | 57 |
1 files changed, 42 insertions, 15 deletions
diff --git a/string/strspn.c b/string/strspn.c index f0635c156a..f98340b1b2 100644 --- a/string/strspn.c +++ b/string/strspn.c @@ -16,32 +16,59 @@ <http://www.gnu.org/licenses/>. */ #include <string.h> +#include <stdint.h> #undef strspn #ifndef STRSPN -#define STRSPN strspn +# define STRSPN strspn #endif /* Return the length of the maximum initial segment of S which contains only characters in ACCEPT. */ size_t -STRSPN (const char *s, const char *accept) +STRSPN (const char *str, const char *accept) { - const char *p; - const char *a; - size_t count = 0; - - for (p = s; *p != '\0'; ++p) + if (accept[0] == '\0') + return 0; + if (__glibc_unlikely (accept[1] == '\0')) { - for (a = accept; *a != '\0'; ++a) - if (*p == *a) - break; - if (*a == '\0') - return count; - else - ++count; + const char *a = str; + for (; *str == *accept; str++); + return str - a; } - return count; + /* Use multiple small memsets to enable inlining on most targets. */ + unsigned char table[256]; + unsigned char *p = memset (table, 0, 64); + memset (p + 64, 0, 64); + memset (p + 128, 0, 64); + memset (p + 192, 0, 64); + + unsigned char *s = (unsigned char*) accept; + /* Different from strcspn it does not add the NULL on the table + so can avoid check if str[i] is NULL, since table['\0'] will + be 0 and thus stopping the loop check. */ + do + p[*s++] = 1; + while (*s); + + s = (unsigned char*) str; + if (!p[s[0]]) return 0; + if (!p[s[1]]) return 1; + if (!p[s[2]]) return 2; + if (!p[s[3]]) return 3; + + s = (unsigned char *) ((uintptr_t)(s) & ~3); + unsigned int c0, c1, c2, c3; + do { + s += 4; + c0 = p[s[0]]; + c1 = p[s[1]]; + c2 = p[s[2]]; + c3 = p[s[3]]; + } while ((c0 & c1 & c2 & c3) != 0); + + size_t count = s - (unsigned char *) str; + return (c0 & c1) == 0 ? count + c0 : count + c2 + 2; } libc_hidden_builtin_def (strspn) |