From cf11e74b0d81d389bcad5cdbba020ba475f0ac4b Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Tue, 19 Mar 2024 10:15:28 -0300 Subject: wcsmbs: Ensure wcstr worst-case linear execution time (BZ 23865) It uses the same two-way algorithm used on strstr, strcasestr, and memmem. Different than strstr, neither the "shift table" optimization nor the self-adapting filtering check is used because it would result in a too-large shift table (and it also simplifies the implementation bit). Checked on x86_64-linux-gnu and aarch64-linux-gnu. Reviewed-by: DJ Delorie --- string/test-strstr.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) (limited to 'string') diff --git a/string/test-strstr.c b/string/test-strstr.c index 86237555c0..1eebdbcb51 100644 --- a/string/test-strstr.c +++ b/string/test-strstr.c @@ -59,6 +59,9 @@ # undef weak_alias # define weak_alias(a, b) # define WCSSTR c_wcsstr +# define __wmemcmp wmemcmp +# define __wcsnlen wcsnlen +# define __wcslen wcslen # include "wcsstr.c" # define C_IMPL WCSSTR #endif @@ -217,6 +220,31 @@ check2 (void) } } +static void +check3 (void) +{ + /* Check that a long periodic needle does not cause false positives. */ + { + const CHAR input[] = L("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD"); + const CHAR need[] = L("_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"); + FOR_EACH_IMPL (impl, 0) + check_result (impl, input, need, NULL); + } + + { + const CHAR input[] = L("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD" + "_C3_A7_20_EF_BF_BD_DA_B5_C2_A6_20" + "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"); + const CHAR need[] = L("_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"); + FOR_EACH_IMPL (impl, 0) + check_result (impl, input, need, (CHAR *) input + 115); + } +} + + #define N 1024 static void @@ -243,6 +271,148 @@ pr23637 (void) check_result (impl, h, n, exp_result); } +static void +pr23865 (void) +{ + /* Check that a very long haystack is handled quickly if the needle is + short and occurs near the beginning. */ + { + size_t m = 1000000; + const CHAR *needle = + L("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + CHAR *haystack = xmalloc ((m + 1) * sizeof (CHAR)); + MEMSET (haystack, L('A'), m); + haystack[0] = L('B'); + haystack[m] = L('\0'); + + FOR_EACH_IMPL (impl, 0) + check_result (impl, haystack, needle, haystack + 1); + + free (haystack); + } + + /* Check that a very long needle is discarded quickly if the haystack is + short. */ + { + size_t m = 1000000; + const CHAR *haystack = + L("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "ABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABAB"); + CHAR *needle = xmalloc ((m + 1) * sizeof (CHAR)); + MEMSET (needle, L'A', m); + needle[m] = L('\0'); + + FOR_EACH_IMPL (impl, 0) + check_result (impl, haystack, needle, NULL); + + free (needle); + } + + /* Check that the asymptotic worst-case complexity is not quadratic. */ + { + size_t m = 1000000; + CHAR *haystack = xmalloc ((2 * m + 2) * sizeof (CHAR)); + CHAR *needle = xmalloc ((m + 2) * sizeof (CHAR)); + + MEMSET (haystack, L('A'), 2 * m); + haystack[2 * m] = L('B'); + haystack[2 * m + 1] = L('\0'); + + MEMSET (needle, L('A'), m); + needle[m] = L('B'); + needle[m + 1] = L('\0'); + + FOR_EACH_IMPL (impl, 0) + check_result (impl, haystack, needle, haystack + m); + + free (needle); + free (haystack); + } + + { + /* Ensure that with a barely periodic "short" needle, STRSTR's + search does not mistakenly skip just past the match point. */ + const CHAR *haystack = + L("\n" + "with_build_libsubdir\n" + "with_local_prefix\n" + "with_gxx_include_dir\n" + "with_cpp_install_dir\n" + "enable_generated_files_in_srcdir\n" + "with_gnu_ld\n" + "with_ld\n" + "with_demangler_in_ld\n" + "with_gnu_as\n" + "with_as\n" + "enable_largefile\n" + "enable_werror_always\n" + "enable_checking\n" + "enable_coverage\n" + "enable_gather_detailed_mem_stats\n" + "enable_build_with_cxx\n" + "with_stabs\n" + "enable_multilib\n" + "enable___cxa_atexit\n" + "enable_decimal_float\n" + "enable_fixed_point\n" + "enable_threads\n" + "enable_tls\n" + "enable_objc_gc\n" + "with_dwarf2\n" + "enable_shared\n" + "with_build_sysroot\n" + "with_sysroot\n" + "with_specs\n" + "with_pkgversion\n" + "with_bugurl\n" + "enable_languages\n" + "with_multilib_list\n"); + const CHAR *needle = + L("\n" + "with_gnu_ld\n"); + + FOR_EACH_IMPL (impl, 0) + check_result (impl, haystack, needle, (CHAR*) haystack + 114); + } + + { + /* Same bug, shorter trigger. */ + const CHAR *haystack = L("..wi.d."); + const CHAR *needle = L(".d."); + FOR_EACH_IMPL (impl, 0) + check_result (impl, haystack, needle, (CHAR*) haystack + 4); + } + + /* Test case from Yves Bastide. + */ + { + const CHAR *input = L("playing play play play always"); + const CHAR *needle = L("play play play"); + FOR_EACH_IMPL (impl, 0) + check_result (impl, input, needle, (CHAR*) input + 8); + } + + /* Test long needles. */ + { + size_t m = 1024; + CHAR *haystack = xmalloc ((2 * m + 1) * sizeof (CHAR)); + CHAR *needle = xmalloc ((m + 1) * sizeof (CHAR)); + haystack[0] = L('x'); + MEMSET (haystack + 1, L(' '), m - 1); + MEMSET (haystack + m, L('x'), m); + haystack[2 * m] = L('\0'); + MEMSET (needle, L('x'), m); + needle[m] = L('\0'); + + FOR_EACH_IMPL (impl, 0) + check_result (impl, haystack, needle, haystack + m); + + free (needle); + free (haystack); + } +} + static int test_main (void) { @@ -250,7 +420,9 @@ test_main (void) check1 (); check2 (); + check3 (); pr23637 (); + pr23865 (); printf ("%23s", ""); FOR_EACH_IMPL (impl, 0) -- cgit 1.4.1