diff options
author | Ulrich Drepper <drepper@redhat.com> | 2008-05-15 04:42:20 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2008-05-15 04:42:20 +0000 |
commit | 0caca71ac95d12c6f45bbbe39d9adb7ac7074146 (patch) | |
tree | 5cf5eff46b5e4c7a09cebaf262bfcf3a20a0f6a1 /string/strcasestr.c | |
parent | b194db79852e6bbd5d5ad72690679c8be06eef15 (diff) | |
download | glibc-0caca71ac95d12c6f45bbbe39d9adb7ac7074146.tar.gz glibc-0caca71ac95d12c6f45bbbe39d9adb7ac7074146.tar.xz glibc-0caca71ac95d12c6f45bbbe39d9adb7ac7074146.zip |
* string/Makefile (distribute): Add str-two-way.h. cvs/fedora-glibc-20080515T0735
2008-03-29 Eric Blake <ebb9@byu.net> Rewrite string searches to O(n) rather than O(n^2). * string/str-two-way.h: New file. For linear fixed-allocation string searching. * string/memmem.c: New implementation. * string/strstr.c: New implementation. * string/strcasestr.c: New implementation. * sysdeps/posix/getaddrinfo.c (getaddrinfo): Call _res_hconf_init
Diffstat (limited to 'string/strcasestr.c')
-rw-r--r-- | string/strcasestr.c | 152 |
1 files changed, 55 insertions, 97 deletions
diff --git a/string/strcasestr.c b/string/strcasestr.c index 1dde43c606..9de19aafa8 100644 --- a/string/strcasestr.c +++ b/string/strcasestr.c @@ -1,5 +1,5 @@ /* Return the offset of one string within another. - Copyright (C) 1994, 1996-2000, 2004 Free Software Foundation, Inc. + Copyright (C) 1994, 1996-2000, 2004, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -30,113 +30,71 @@ # include <config.h> #endif +/* Specification. */ +#include <string.h> + #include <ctype.h> +#include <stdbool.h> +#include <strings.h> -#if defined _LIBC || defined HAVE_STRING_H -# include <string.h> -#endif +#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) -#ifdef _LIBC -# include <locale/localeinfo.h> -# define TOLOWER(c) __tolower_l ((unsigned char) c, loc) -#else -# define TOLOWER(c) _tolower (c) -#endif - -typedef unsigned chartype; +/* Two-Way algorithm. */ +#define RETURN_TYPE char * +#define AVAILABLE(h, h_l, j, n_l) \ + (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ + && ((h_l) = (j) + (n_l))) +#define CANON_ELEMENT(c) TOLOWER (c) +#define CMP_FUNC(p1, p2, l) \ + strncasecmp ((const char *) (p1), (const char *) (p2), l) +#include "str-two-way.h" #undef strcasestr #undef __strcasestr +/* Find the first occurrence of NEEDLE in HAYSTACK, using + case-insensitive comparison. This function gives unspecified + results in multibyte locales. */ char * -__strcasestr (phaystack, pneedle) - const char *phaystack; - const char *pneedle; +__strcasestr (const char *haystack_start, const char *needle_start) { - register const unsigned char *haystack, *needle; - register chartype b, c; -#ifdef _LIBC - __locale_t loc = _NL_CURRENT_LOCALE; -#endif - - haystack = (const unsigned char *) phaystack; - needle = (const unsigned char *) pneedle; - - b = TOLOWER (*needle); - if (b != '\0') + const char *haystack = haystack_start; + const char *needle = needle_start; + size_t needle_len; /* Length of NEEDLE. */ + size_t haystack_len; /* Known minimum length of HAYSTACK. */ + bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */ + + /* Determine length of NEEDLE, and in the process, make sure + HAYSTACK is at least as long (no point processing all of a long + NEEDLE if HAYSTACK is too short). */ + while (*haystack && *needle) { - haystack--; /* possible ANSI violation */ - do - { - c = *++haystack; - if (c == '\0') - goto ret0; - } - while (TOLOWER (c) != (int) b); - - c = TOLOWER (*++needle); - if (c == '\0') - goto foundneedle; - ++needle; - goto jin; - - for (;;) - { - register chartype a; - register const unsigned char *rhaystack, *rneedle; - - do - { - a = *++haystack; - if (a == '\0') - goto ret0; - if (TOLOWER (a) == (int) b) - break; - a = *++haystack; - if (a == '\0') - goto ret0; -shloop: - ; - } - while (TOLOWER (a) != (int) b); - -jin: a = *++haystack; - if (a == '\0') - goto ret0; - - if (TOLOWER (a) != (int) c) - goto shloop; - - rhaystack = haystack-- + 1; - rneedle = needle; - a = TOLOWER (*rneedle); - - if (TOLOWER (*rhaystack) == (int) a) - do - { - if (a == '\0') - goto foundneedle; - ++rhaystack; - a = TOLOWER (*++needle); - if (TOLOWER (*rhaystack) != (int) a) - break; - if (a == '\0') - goto foundneedle; - ++rhaystack; - a = TOLOWER (*++needle); - } - while (TOLOWER (*rhaystack) == (int) a); - - needle = rneedle; /* took the register-poor approach */ - - if (a == '\0') - break; - } + ok &= (TOLOWER ((unsigned char) *haystack) + == TOLOWER ((unsigned char) *needle)); + haystack++; + needle++; } -foundneedle: - return (char*) haystack; -ret0: - return 0; + if (*needle) + return NULL; + if (ok) + return (char *) haystack_start; + needle_len = needle - needle_start; + haystack = haystack_start + 1; + haystack_len = needle_len - 1; + + /* Perform the search. Abstract memory is considered to be an array + of 'unsigned char' values, not an array of 'char' values. See + ISO C 99 section 6.2.6.1. */ + if (needle_len < LONG_NEEDLE_THRESHOLD) + return two_way_short_needle ((const unsigned char *) haystack, + haystack_len, + (const unsigned char *) needle_start, + needle_len); + return two_way_long_needle ((const unsigned char *) haystack, haystack_len, + (const unsigned char *) needle_start, + needle_len); } +#undef LONG_NEEDLE_THRESHOLD + weak_alias (__strcasestr, strcasestr) |