diff options
author | Maxim Kuvyrkov <maxim@codesourcery.com> | 2012-05-28 22:50:15 -0700 |
---|---|---|
committer | Maxim Kuvyrkov <maxim@codesourcery.com> | 2012-08-21 18:07:47 -0700 |
commit | 400726deef57d8da8d6c7cd5c8e7891eaabab041 (patch) | |
tree | 73509a99d4f1d1e11afd525300e496119865e431 | |
parent | 20a71f2c8ab45b458416422af2eec2b7bc52f66b (diff) | |
download | glibc-400726deef57d8da8d6c7cd5c8e7891eaabab041.tar.gz glibc-400726deef57d8da8d6c7cd5c8e7891eaabab041.tar.xz glibc-400726deef57d8da8d6c7cd5c8e7891eaabab041.zip |
Detect EOL on-the-fly in strstr, strcasestr and memmem.
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | NEWS | 7 | ||||
-rw-r--r-- | string/Makefile | 3 | ||||
-rw-r--r-- | string/bug-strcasestr1.c | 39 | ||||
-rw-r--r-- | string/str-two-way.h | 50 | ||||
-rw-r--r-- | string/strcasestr.c | 6 | ||||
-rw-r--r-- | string/strstr.c | 6 |
7 files changed, 108 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog index 7c96e02795..4aeac6dcfc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,18 @@ 2012-08-21 Maxim Kuvyrkov <maxim@codesourcery.com> [BZ #11607] + * NEWS: Add an entry. + * string/str-two-way.h (AVAILABLE1, AVAILABLE2, RET0_IF_0): New macros, + define their defaults. + (two_way_short_needle): Detect end-of-string on-the-fly. + * string/strcasestr.c, string/strstr.c (AVAILABLE): Update. + (AVAILABLE1, AVAILABLE2, RET0_IF_0, AVAILABLE_USES_J): Define. + * string/bug-strcasestr1.c: New test. + * string/Makefile: Run it. + +2012-08-21 Maxim Kuvyrkov <maxim@codesourcery.com> + + [BZ #11607] * string/str-two-way.h (two_way_short_needle): Optimize matching of the first character. diff --git a/NEWS b/NEWS index cd4f498d82..d37e5a5920 100644 --- a/NEWS +++ b/NEWS @@ -9,8 +9,8 @@ Version 2.17 * The following bugs are resolved with this release: - 6778, 6808, 9685, 13717, 13939, 14042, 14090, 14166, 14150, 14151, 14154, - 14157, 14166, 14173, 14195, 14283, 14298, 14303, 14307, 14328, 14331, + 6778, 6808, 9685, 11607, 13717, 13939, 14042, 14090, 14166, 14150, 14151, + 14154, 14157, 14166, 14173, 14195, 14283, 14298, 14303, 14307, 14328, 14331, 14336, 14337, 14347, 14349 * Support for STT_GNU_IFUNC symbols added for s390 and s390x. @@ -25,6 +25,9 @@ Version 2.17 * SystemTap static probes have been added into the dynamic linker. Implemented by Gary Benson. +* Optimizations of string functions strstr, strcasestr and memmem. + Implemented by Maxim Kuvyrkov. + * The minimum Linux kernel version that this version of the GNU C Library can be used with is 2.6.16. diff --git a/string/Makefile b/string/Makefile index 1628b6adfc..a1204d9ad0 100644 --- a/string/Makefile +++ b/string/Makefile @@ -56,7 +56,7 @@ tests := tester inl-tester noinl-tester testcopy test-ffs \ tst-strtok tst-strxfrm bug-strcoll1 tst-strfry \ bug-strtok1 $(addprefix test-,$(strop-tests)) \ bug-envz1 tst-strxfrm2 tst-endian tst-svc2 \ - bug-strstr1 bug-strchr1 tst-strtok_r + bug-strstr1 bug-strcasestr1 bug-strchr1 tst-strtok_r include ../Rules @@ -74,6 +74,7 @@ CFLAGS-stratcliff.c = -fno-builtin CFLAGS-test-ffs.c = -fno-builtin CFLAGS-tst-inlcall.c = -fno-builtin CFLAGS-bug-strstr1.c = -fno-builtin +CFLAGS-bug-strcasestr1.c = -fno-builtin ifeq ($(cross-compiling),no) tests: $(objpfx)tst-svc.out diff --git a/string/bug-strcasestr1.c b/string/bug-strcasestr1.c new file mode 100644 index 0000000000..e8334d7093 --- /dev/null +++ b/string/bug-strcasestr1.c @@ -0,0 +1,39 @@ +/* Test for non-submitted strcasestr bug. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <string.h> + +#define TEST_FUNCTION do_test () +static int +do_test (void) +{ + const char haystack[] = "AOKB"; + const char needle[] = "OK"; + const char *sub = strcasestr (haystack, needle); + + if (sub == NULL) + { + fprintf (stderr, "BUG: didn't find \"%s\" in \"%s\"\n", needle, haystack); + return 1; + } + + return 0; +} + +#include "../test-skeleton.c" diff --git a/string/str-two-way.h b/string/str-two-way.h index e18dc91738..aab627a27b 100644 --- a/string/str-two-way.h +++ b/string/str-two-way.h @@ -75,6 +75,16 @@ # define CMP_FUNC memcmp #endif +#ifndef AVAILABLE1 +# define AVAILABLE1(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l) +#endif +#ifndef AVAILABLE2 +# define AVAILABLE2(h, h_l, j, n_l) (1) +#endif +#ifndef RET0_IF_0 +# define RET0_IF_0(a) /* nothing */ +#endif + /* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN. Return the index of the first byte in the right half, and set *PERIOD to the global period of the right half. @@ -266,37 +276,58 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, required, and any mismatch results in a maximal shift. */ period = MAX (suffix, needle_len - suffix) + 1; j = 0; - while (AVAILABLE (haystack, haystack_len, j, needle_len)) + while (AVAILABLE1 (haystack, haystack_len, j, needle_len)) { + unsigned char haystack_char; + /* TODO: The first-character loop can be sped up by adapting longword-at-a-time implementation of memchr/strchr. */ if (needle_suffix - != CANON_ELEMENT (haystack[suffix + j])) + != (haystack_char = CANON_ELEMENT (haystack[suffix + j]))) { + RET0_IF_0 (haystack_char); ++j; continue; } /* Scan for matches in right half. */ i = suffix + 1; - while (i < needle_len && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - ++i; + while (i < needle_len) + { + if (CANON_ELEMENT (needle[i]) + != (haystack_char = CANON_ELEMENT (haystack[i + j]))) + { + RET0_IF_0 (haystack_char); + break; + } + ++i; + } if (needle_len <= i) { /* Scan for matches in left half. */ i = suffix - 1; - while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - --i; + while (i != SIZE_MAX) + { + if (CANON_ELEMENT (needle[i]) + != (haystack_char = CANON_ELEMENT (haystack[i + j]))) + { + RET0_IF_0 (haystack_char); + break; + } + --i; + } if (i == SIZE_MAX) return (RETURN_TYPE) (haystack + j); j += period; } else j += i - suffix + 1; + + if (!AVAILABLE2 (haystack, haystack_len, j, needle_len)) + break; } } + ret0: __attribute__ ((unused)) return NULL; } @@ -433,6 +464,9 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len, } #undef AVAILABLE +#undef AVAILABLE1 +#undef AVAILABLE2 #undef CANON_ELEMENT #undef CMP_FUNC +#undef RET0_IF_0 #undef RETURN_TYPE diff --git a/string/strcasestr.c b/string/strcasestr.c index 9e1bde97ae..184ca68deb 100644 --- a/string/strcasestr.c +++ b/string/strcasestr.c @@ -1,6 +1,5 @@ /* Return the offset of one string within another. - Copyright (C) 1994, 1996-2000, 2004, 2008, 2009, 2010 - Free Software Foundation, Inc. + Copyright (C) 1994-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -44,6 +43,9 @@ #define AVAILABLE(h, h_l, j, n_l) \ (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ && ((h_l) = (j) + (n_l))) +#define AVAILABLE1(h, h_l, j, n_l) (true) +#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l) +#define RET0_IF_0(a) if (!a) goto ret0 #define CANON_ELEMENT(c) TOLOWER (c) #define CMP_FUNC(p1, p2, l) \ __strncasecmp ((const char *) (p1), (const char *) (p2), l) diff --git a/string/strstr.c b/string/strstr.c index 10e6fdc7f9..ec2a1e91e7 100644 --- a/string/strstr.c +++ b/string/strstr.c @@ -1,6 +1,5 @@ /* Return the offset of one string within another. - Copyright (C) 1994,1996,1997,2000,2001,2003,2008,2009 - Free Software Foundation, Inc. + Copyright (C) 1994-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -36,6 +35,9 @@ #define AVAILABLE(h, h_l, j, n_l) \ (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ && ((h_l) = (j) + (n_l))) +#define AVAILABLE1(h, h_l, j, n_l) (true) +#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l) +#define RET0_IF_0(a) if (!a) goto ret0 #include "str-two-way.h" #undef strstr |