about summary refs log tree commit diff
path: root/string/strspn.c
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2016-03-26 19:09:07 +0000
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2016-04-01 10:44:44 -0300
commit91f3b75f47c9eca3299098c3dcc2f5d9dad320b1 (patch)
treef00e3a58386999ac44a322211ff74cffcb37df99 /string/strspn.c
parentd3496c9f4f27d3009b71be87f6108b4fed7314bd (diff)
downloadglibc-91f3b75f47c9eca3299098c3dcc2f5d9dad320b1.tar.gz
glibc-91f3b75f47c9eca3299098c3dcc2f5d9dad320b1.tar.xz
glibc-91f3b75f47c9eca3299098c3dcc2f5d9dad320b1.zip
Improve generic strspn performance
As for strcspn, this patch improves strspn performance using a much
faster algorithm.  It first constructs a 256-entry table based on
the accept string and then uses it as a lookup table for the
input string.  As for strcspn optimization, it is generally at least
10 times faster than the existing implementation on bench-strspn
on a few AArch64 implementations.

Also the string/bits/string2.h inlines make no longer sense, as current
implementation will already implement most of the optimizations.

Tested on x86_64, i686, and aarch64.

	* string/strspn.c (strcspn): Rewrite function.
	* string/bits/string2.h (strspn): Use __builtin_strcspn.
	(__strspn_c1): Remove inline function.
	(__strspn_c2): Likewise.
	(__strspn_c3): Likewise.
	* string/string-inlines.c
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c1): Add
	compatibility symbol.
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c2):
	Likewise.
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c3):
	Likewise.
Diffstat (limited to 'string/strspn.c')
-rw-r--r--string/strspn.c57
1 files changed, 42 insertions, 15 deletions
diff --git a/string/strspn.c b/string/strspn.c
index f0635c156a..f98340b1b2 100644
--- a/string/strspn.c
+++ b/string/strspn.c
@@ -16,32 +16,59 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <string.h>
+#include <stdint.h>
 
 #undef strspn
 #ifndef STRSPN
-#define STRSPN strspn
+# define STRSPN strspn
 #endif
 
 /* Return the length of the maximum initial segment
    of S which contains only characters in ACCEPT.  */
 size_t
-STRSPN (const char *s, const char *accept)
+STRSPN (const char *str, const char *accept)
 {
-  const char *p;
-  const char *a;
-  size_t count = 0;
-
-  for (p = s; *p != '\0'; ++p)
+  if (accept[0] == '\0')
+    return 0;
+  if (__glibc_unlikely (accept[1] == '\0'))
     {
-      for (a = accept; *a != '\0'; ++a)
-	if (*p == *a)
-	  break;
-      if (*a == '\0')
-	return count;
-      else
-	++count;
+      const char *a = str;
+      for (; *str == *accept; str++);
+      return str - a;
     }
 
-  return count;
+  /* Use multiple small memsets to enable inlining on most targets.  */
+  unsigned char table[256];
+  unsigned char *p = memset (table, 0, 64);
+  memset (p + 64, 0, 64);
+  memset (p + 128, 0, 64);
+  memset (p + 192, 0, 64);
+
+  unsigned char *s = (unsigned char*) accept;
+  /* Different from strcspn it does not add the NULL on the table
+     so can avoid check if str[i] is NULL, since table['\0'] will
+     be 0 and thus stopping the loop check.  */
+  do
+    p[*s++] = 1;
+  while (*s);
+
+  s = (unsigned char*) str;
+  if (!p[s[0]]) return 0;
+  if (!p[s[1]]) return 1;
+  if (!p[s[2]]) return 2;
+  if (!p[s[3]]) return 3;
+
+  s = (unsigned char *) ((uintptr_t)(s) & ~3);
+  unsigned int c0, c1, c2, c3;
+  do {
+      s += 4;
+      c0 = p[s[0]];
+      c1 = p[s[1]];
+      c2 = p[s[2]];
+      c3 = p[s[3]];
+  } while ((c0 & c1 & c2 & c3) != 0);
+
+  size_t count = s - (unsigned char *) str;
+  return (c0 & c1) == 0 ? count + c0 : count + c2 + 2;
 }
 libc_hidden_builtin_def (strspn)