about summary refs log tree commit diff
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2018-07-16 17:50:09 +0100
committerWilco Dijkstra <wdijkstr@arm.com>2019-09-13 16:39:12 +0100
commit55a280689e61cb8a7879ebbe0586d031559f1ba4 (patch)
tree8ffbf8c70f65b95abe44a1a17278fd1702ab4232
parentd6613ad24f708706c24bffa38351e26e1dd5b5d1 (diff)
downloadglibc-55a280689e61cb8a7879ebbe0586d031559f1ba4.tar.gz
glibc-55a280689e61cb8a7879ebbe0586d031559f1ba4.tar.xz
glibc-55a280689e61cb8a7879ebbe0586d031559f1ba4.zip
Improve strstr performance
Improve strstr performance.  Strstr tends to be slow because it uses
many calls to memchr and a slow byte loop to scan for the next match.
Performance is significantly improved by using strnlen on larger blocks
and using strchr to search for the next matching character.  strcasestr
can also use strnlen to scan ahead, and memmem can use memchr to check
for the next match.

On the GLIBC bench tests the performance gains on Cortex-A72 are:
strstr: +25%
strcasestr: +4.3%
memmem: +18%

On a 256KB dataset strstr performance improves by 67%, strcasestr by 47%.

    Reviewd-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>

(cherry picked from commit 3ae725dfb6d7f61447d27d00ed83e573bd5454f4)
-rw-r--r--ChangeLog15
-rw-r--r--benchtests/bench-strcasestr.c1
-rw-r--r--benchtests/bench-strstr.c3
-rw-r--r--string/memmem.c1
-rw-r--r--string/str-two-way.h56
-rw-r--r--string/strcasestr.c4
-rw-r--r--string/strstr.c5
-rw-r--r--string/test-strcasestr.c1
-rw-r--r--string/test-strstr.c1
9 files changed, 53 insertions, 34 deletions
diff --git a/ChangeLog b/ChangeLog
index 2a9b6ed7ef..d21bdbcce8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,17 @@
-2019-01-09  Wilco Dijkstra  <wdijkstr@arm.com>
+2019-09-13  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* benchtests/bench-strcasestr.c: Rename __strnlen to strnlen.
+	* benchtests/bench-strstr.c: Likewise.
+	* string/memmem.c (FASTSEARCH): Define.
+	* string/str-two-way.h (two_way_short_needle): Minor cleanups.
+	Add support for FASTSEARCH.
+	* string/strcasestr.c (AVAILABLE): Use read-ahead __strnlen.
+	* string/strstr.c (AVAILABLE): Use read-ahead __strnlen.
+	(FASTSEARCH): Define.
+	* string/test-strcasestr.c: Rename __strnlen to strnlen.
+	* string/test-strstr.c: Likewise.
+
+2019-09-06  Wilco Dijkstra  <wdijkstr@arm.com>
 
 	* manual/tunables.texi (glibc.cpu.name): Add ares tunable.
 	* sysdeps/aarch64/multiarch/memcpy.c (__libc_memcpy): Use
diff --git a/benchtests/bench-strcasestr.c b/benchtests/bench-strcasestr.c
index 4e6f480c84..9a031b3064 100644
--- a/benchtests/bench-strcasestr.c
+++ b/benchtests/bench-strcasestr.c
@@ -24,6 +24,7 @@
 #define STRCASESTR simple_strcasestr
 #define NO_ALIAS
 #define __strncasecmp strncasecmp
+#define __strnlen strnlen
 #include "../string/strcasestr.c"
 
 
diff --git a/benchtests/bench-strstr.c b/benchtests/bench-strstr.c
index e63659f136..2fa64118f4 100644
--- a/benchtests/bench-strstr.c
+++ b/benchtests/bench-strstr.c
@@ -22,6 +22,9 @@
 
 
 #define STRSTR simple_strstr
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(X)
+#define __strnlen strnlen
 #include "../string/strstr.c"
 
 
diff --git a/string/memmem.c b/string/memmem.c
index 54fca4966d..34299b8864 100644
--- a/string/memmem.c
+++ b/string/memmem.c
@@ -31,6 +31,7 @@
 
 #define RETURN_TYPE void *
 #define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l))
+#define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N))
 #include "str-two-way.h"
 
 #undef memmem
diff --git a/string/str-two-way.h b/string/str-two-way.h
index 599c867ffd..f433c76478 100644
--- a/string/str-two-way.h
+++ b/string/str-two-way.h
@@ -281,50 +281,50 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
     }
   else
     {
-      const unsigned char *phaystack = &haystack[suffix];
+      const unsigned char *phaystack;
       /* The comparison always starts from needle[suffix], so cache it
 	 and use an optimized first-character loop.  */
       unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
 
-#if CHECK_EOL
-      /* We start matching from the SUFFIX'th element, so make sure we
-	 don't hit '\0' before that.  */
-      if (haystack_len < suffix + 1
-	  && !AVAILABLE (haystack, haystack_len, 0, suffix + 1))
-	return NULL;
-#endif
-
       /* The two halves of needle are distinct; no extra memory is
 	 required, and any mismatch results in a maximal shift.  */
       period = MAX (suffix, needle_len - suffix) + 1;
       j = 0;
-      while (1
-#if !CHECK_EOL
-	     && AVAILABLE (haystack, haystack_len, j, needle_len)
-#endif
-	     )
+      while (AVAILABLE (haystack, haystack_len, j, needle_len))
 	{
 	  unsigned char haystack_char;
 	  const unsigned char *pneedle;
 
-	  /* TODO: The first-character loop can be sped up by adapting
-	     longword-at-a-time implementation of memchr/strchr.  */
-	  if (needle_suffix
+	  phaystack = &haystack[suffix + j];
+
+#ifdef FASTSEARCH
+	  if (*phaystack++ != needle_suffix)
+	    {
+	      phaystack = FASTSEARCH (phaystack, needle_suffix,
+				      haystack_len - needle_len - j);
+	      if (phaystack == NULL)
+		goto ret0;
+	      j = phaystack - &haystack[suffix];
+	      phaystack++;
+	    }
+#else
+	  while (needle_suffix
 	      != (haystack_char = CANON_ELEMENT (*phaystack++)))
 	    {
 	      RET0_IF_0 (haystack_char);
-#if !CHECK_EOL
+# if !CHECK_EOL
 	      ++j;
-#endif
-	      continue;
+	      if (!AVAILABLE (haystack, haystack_len, j, needle_len))
+		goto ret0;
+# endif
 	    }
 
-#if CHECK_EOL
+# if CHECK_EOL
 	  /* Calculate J if it wasn't kept up-to-date in the first-character
 	     loop.  */
 	  j = phaystack - &haystack[suffix] - 1;
+# endif
 #endif
-
 	  /* Scan for matches in right half.  */
 	  i = suffix + 1;
 	  pneedle = &needle[i];
@@ -338,6 +338,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
 		}
 	      ++i;
 	    }
+#if CHECK_EOL
+	  /* Update minimal length of haystack.  */
+	  if (phaystack > haystack + haystack_len)
+	    haystack_len = phaystack - haystack;
+#endif
 	  if (needle_len <= i)
 	    {
 	      /* Scan for matches in left half.  */
@@ -360,13 +365,6 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
 	    }
 	  else
 	    j += i - suffix + 1;
-
-#if CHECK_EOL
-	  if (!AVAILABLE (haystack, haystack_len, j, needle_len))
-	    break;
-#endif
-
-	  phaystack = &haystack[suffix + j];
 	}
     }
  ret0: __attribute__ ((unused))
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 2acf003155..7caaade5d2 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -37,8 +37,8 @@
 /* Two-Way algorithm.  */
 #define RETURN_TYPE char *
 #define AVAILABLE(h, h_l, j, n_l)			\
-  (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
-   && ((h_l) = (j) + (n_l)))
+  (((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
+			      (j) + (n_l) <= (h_l)))
 #define CHECK_EOL (1)
 #define RET0_IF_0(a) if (!a) goto ret0
 #define CANON_ELEMENT(c) TOLOWER (c)
diff --git a/string/strstr.c b/string/strstr.c
index 88f1d5de36..63facae4a5 100644
--- a/string/strstr.c
+++ b/string/strstr.c
@@ -33,10 +33,11 @@
 
 #define RETURN_TYPE char *
 #define AVAILABLE(h, h_l, j, n_l)			\
-  (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
-   && ((h_l) = (j) + (n_l)))
+  (((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
+			      (j) + (n_l) <= (h_l)))
 #define CHECK_EOL (1)
 #define RET0_IF_0(a) if (!a) goto ret0
+#define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C))
 #include "str-two-way.h"
 
 #undef strstr
diff --git a/string/test-strcasestr.c b/string/test-strcasestr.c
index abb3916732..78e03da7c4 100644
--- a/string/test-strcasestr.c
+++ b/string/test-strcasestr.c
@@ -25,6 +25,7 @@
 #define STRCASESTR simple_strcasestr
 #define NO_ALIAS
 #define __strncasecmp strncasecmp
+#define __strnlen strnlen
 #include "strcasestr.c"
 
 
diff --git a/string/test-strstr.c b/string/test-strstr.c
index 33f221149a..8d04134e0b 100644
--- a/string/test-strstr.c
+++ b/string/test-strstr.c
@@ -24,6 +24,7 @@
 
 #define STRSTR simple_strstr
 #define libc_hidden_builtin_def(arg) /* nothing */
+#define __strnlen strnlen
 #include "strstr.c"