diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | string/memmem.c | 127 |
2 files changed, 89 insertions, 42 deletions
diff --git a/ChangeLog b/ChangeLog index 7ddec5412b..01fc098865 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2019-06-12 Wilco Dijkstra <wdijkstr@arm.com> + * string/memmem.c (__memmem): Rewrite to improve performance. + +2019-06-12 Wilco Dijkstra <wdijkstr@arm.com> + * string/str-two-way.h (two_way_short_needle): Add inline to avoid warning. (two_way_long_needle): Block inlining. diff --git a/string/memmem.c b/string/memmem.c index 4bf733f1f0..83ee75e8c7 100644 --- a/string/memmem.c +++ b/string/memmem.c @@ -15,17 +15,13 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -/* This particular implementation was written by Eric Blake, 2008. */ - #ifndef _LIBC # include <config.h> #endif -/* Specification of memmem. */ #include <string.h> #ifndef _LIBC -# define __builtin_expect(expr, val) (expr) # define __memmem memmem #endif @@ -36,51 +32,98 @@ #undef memmem -/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK - if NEEDLE_LEN is 0, otherwise NULL if NEEDLE is not found in - HAYSTACK. */ +/* Hash character pairs so a small shift table can be used. All bits of + p[0] are included, but not all bits from p[-1]. So if two equal hashes + match on p[-1], p[0] matches too. Hash collisions are harmless and result + in smaller shifts. */ +#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift)) + +/* Fast memmem algorithm with guaranteed linear-time performance. + Small needles up to size 2 use a dedicated linear search. Longer needles + up to size 256 use a novel modified Horspool algorithm. It hashes pairs + of characters to quickly skip past mismatches. The main search loop only + exits if the last 2 characters match, avoiding unnecessary calls to memcmp + and allowing for a larger skip if there is no match. A self-adapting + filtering check is used to quickly detect mismatches in long needles. + By limiting the needle length to 256, the shift table can be reduced to 8 + bits per entry, lowering preprocessing overhead and minimizing cache effects. + The limit also implies worst-case performance is linear. + Needles larger than 256 characters use the linear-time Two-Way algorithm. */ void * -__memmem (const void *haystack_start, size_t haystack_len, - const void *needle_start, size_t needle_len) +__memmem (const void *haystack, size_t hs_len, + const void *needle, size_t ne_len) { - /* Abstract memory is considered to be an array of 'unsigned char' values, - not an array of 'char' values. See ISO C 99 section 6.2.6.1. */ - const unsigned char *haystack = (const unsigned char *) haystack_start; - const unsigned char *needle = (const unsigned char *) needle_start; - - if (needle_len == 0) - /* The first occurrence of the empty string is deemed to occur at - the beginning of the string. */ - return (void *) haystack; - - /* Sanity check, otherwise the loop might search through the whole - memory. */ - if (__glibc_unlikely (haystack_len < needle_len)) + const unsigned char *hs = (const unsigned char *) haystack; + const unsigned char *ne = (const unsigned char *) needle; + + if (ne_len == 0) + return (void *) hs; + if (ne_len == 1) + return (void *) memchr (hs, ne[0], hs_len); + + /* Ensure haystack length is >= needle length. */ + if (hs_len < ne_len) return NULL; - /* Use optimizations in memchr when possible, to reduce the search - size of haystack using a linear algorithm with a smaller - coefficient. However, avoid memchr for long needles, since we - can often achieve sublinear performance. */ - if (needle_len < LONG_NEEDLE_THRESHOLD) + const unsigned char *end = hs + hs_len - ne_len; + + if (ne_len == 2) + { + uint32_t nw = ne[0] << 16 | ne[1], hw = hs[0] << 16 | hs[1]; + for (hs++; hs <= end && hw != nw; ) + hw = hw << 16 | *++hs; + return hw == nw ? (void *)hs - 1 : NULL; + } + + /* Use Two-Way algorithm for very long needles. */ + if (__builtin_expect (ne_len > 256, 0)) + return two_way_long_needle (hs, hs_len, ne, ne_len); + + uint8_t shift[256]; + size_t tmp, shift1; + size_t m1 = ne_len - 1; + size_t offset = 0; + + memset (shift, 0, sizeof (shift)); + for (int i = 1; i < m1; i++) + shift[hash2 (ne + i)] = i; + /* Shift1 is the amount we can skip after matching the hash of the + needle end but not the full needle. */ + shift1 = m1 - shift[hash2 (ne + m1)]; + shift[hash2 (ne + m1)] = m1; + + for ( ; hs <= end; ) { - haystack = memchr (haystack, *needle, haystack_len); - if (!haystack || __builtin_expect (needle_len == 1, 0)) - return (void *) haystack; - haystack_len -= haystack - (const unsigned char *) haystack_start; - if (haystack_len < needle_len) - return NULL; - /* Check whether we have a match. This improves performance since we - avoid the initialization overhead of the two-way algorithm. */ - if (memcmp (haystack, needle, needle_len) == 0) - return (void *) haystack; - return two_way_short_needle (haystack, haystack_len, needle, needle_len); + /* Skip past character pairs not in the needle. */ + do + { + hs += m1; + tmp = shift[hash2 (hs)]; + } + while (tmp == 0 && hs <= end); + + /* If the match is not at the end of the needle, shift to the end + and continue until we match the hash of the needle end. */ + hs -= tmp; + if (tmp < m1) + continue; + + /* Hash of the last 2 characters matches. If the needle is long, + try to quickly filter out mismatches. */ + if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0) + { + if (memcmp (hs, ne, m1) == 0) + return (void *) hs; + + /* Adjust filter offset when it doesn't find the mismatch. */ + offset = (offset >= 8 ? offset : m1) - 8; + } + + /* Skip based on matching the hash of the needle end. */ + hs += shift1; } - else - return two_way_long_needle (haystack, haystack_len, needle, needle_len); + return NULL; } libc_hidden_def (__memmem) weak_alias (__memmem, memmem) libc_hidden_weak (memmem) - -#undef LONG_NEEDLE_THRESHOLD |