about summary refs log tree commit diff
path: root/string/strcasestr.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2008-05-15 04:42:20 +0000
committerUlrich Drepper <drepper@redhat.com>2008-05-15 04:42:20 +0000
commit0caca71ac95d12c6f45bbbe39d9adb7ac7074146 (patch)
tree5cf5eff46b5e4c7a09cebaf262bfcf3a20a0f6a1 /string/strcasestr.c
parentb194db79852e6bbd5d5ad72690679c8be06eef15 (diff)
downloadglibc-0caca71ac95d12c6f45bbbe39d9adb7ac7074146.tar.gz
glibc-0caca71ac95d12c6f45bbbe39d9adb7ac7074146.tar.xz
glibc-0caca71ac95d12c6f45bbbe39d9adb7ac7074146.zip
* string/Makefile (distribute): Add str-two-way.h. cvs/fedora-glibc-20080515T0735
2008-03-29  Eric Blake	<ebb9@byu.net>

	Rewrite string searches to O(n) rather than O(n^2).
	* string/str-two-way.h: New file.  For linear fixed-allocation
	string searching.
	* string/memmem.c: New implementation.
	* string/strstr.c: New implementation.
	* string/strcasestr.c: New implementation.

	* sysdeps/posix/getaddrinfo.c (getaddrinfo): Call _res_hconf_init
Diffstat (limited to 'string/strcasestr.c')
-rw-r--r--string/strcasestr.c152
1 files changed, 55 insertions, 97 deletions
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 1dde43c606..9de19aafa8 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -1,5 +1,5 @@
 /* Return the offset of one string within another.
-   Copyright (C) 1994, 1996-2000, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1994, 1996-2000, 2004, 2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -30,113 +30,71 @@
 # include <config.h>
 #endif
 
+/* Specification.  */
+#include <string.h>
+
 #include <ctype.h>
+#include <stdbool.h>
+#include <strings.h>
 
-#if defined _LIBC || defined HAVE_STRING_H
-# include <string.h>
-#endif
+#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
 
-#ifdef _LIBC
-# include <locale/localeinfo.h>
-# define TOLOWER(c) __tolower_l ((unsigned char) c, loc)
-#else
-# define TOLOWER(c) _tolower (c)
-#endif
-
-typedef unsigned chartype;
+/* Two-Way algorithm.  */
+#define RETURN_TYPE char *
+#define AVAILABLE(h, h_l, j, n_l)			\
+  (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
+   && ((h_l) = (j) + (n_l)))
+#define CANON_ELEMENT(c) TOLOWER (c)
+#define CMP_FUNC(p1, p2, l)				\
+  strncasecmp ((const char *) (p1), (const char *) (p2), l)
+#include "str-two-way.h"
 
 #undef strcasestr
 #undef __strcasestr
 
+/* Find the first occurrence of NEEDLE in HAYSTACK, using
+   case-insensitive comparison.  This function gives unspecified
+   results in multibyte locales.  */
 char *
-__strcasestr (phaystack, pneedle)
-     const char *phaystack;
-     const char *pneedle;
+__strcasestr (const char *haystack_start, const char *needle_start)
 {
-  register const unsigned char *haystack, *needle;
-  register chartype b, c;
-#ifdef _LIBC
-  __locale_t loc = _NL_CURRENT_LOCALE;
-#endif
-
-  haystack = (const unsigned char *) phaystack;
-  needle = (const unsigned char *) pneedle;
-
-  b = TOLOWER (*needle);
-  if (b != '\0')
+  const char *haystack = haystack_start;
+  const char *needle = needle_start;
+  size_t needle_len; /* Length of NEEDLE.  */
+  size_t haystack_len; /* Known minimum length of HAYSTACK.  */
+  bool ok = true; /* True if NEEDLE is prefix of HAYSTACK.  */
+
+  /* Determine length of NEEDLE, and in the process, make sure
+     HAYSTACK is at least as long (no point processing all of a long
+     NEEDLE if HAYSTACK is too short).  */
+  while (*haystack && *needle)
     {
-      haystack--;				/* possible ANSI violation */
-      do
-	{
-	  c = *++haystack;
-	  if (c == '\0')
-	    goto ret0;
-	}
-      while (TOLOWER (c) != (int) b);
-
-      c = TOLOWER (*++needle);
-      if (c == '\0')
-	goto foundneedle;
-      ++needle;
-      goto jin;
-
-      for (;;)
-        {
-          register chartype a;
-	  register const unsigned char *rhaystack, *rneedle;
-
-	  do
-	    {
-	      a = *++haystack;
-	      if (a == '\0')
-		goto ret0;
-	      if (TOLOWER (a) == (int) b)
-		break;
-	      a = *++haystack;
-	      if (a == '\0')
-		goto ret0;
-shloop:
-	      ;
-	    }
-          while (TOLOWER (a) != (int) b);
-
-jin:	  a = *++haystack;
-	  if (a == '\0')
-	    goto ret0;
-
-	  if (TOLOWER (a) != (int) c)
-	    goto shloop;
-
-	  rhaystack = haystack-- + 1;
-	  rneedle = needle;
-	  a = TOLOWER (*rneedle);
-
-	  if (TOLOWER (*rhaystack) == (int) a)
-	    do
-	      {
-		if (a == '\0')
-		  goto foundneedle;
-		++rhaystack;
-		a = TOLOWER (*++needle);
-		if (TOLOWER (*rhaystack) != (int) a)
-		  break;
-		if (a == '\0')
-		  goto foundneedle;
-		++rhaystack;
-		a = TOLOWER (*++needle);
-	      }
-	    while (TOLOWER (*rhaystack) == (int) a);
-
-	  needle = rneedle;		/* took the register-poor approach */
-
-	  if (a == '\0')
-	    break;
-        }
+      ok &= (TOLOWER ((unsigned char) *haystack)
+	     == TOLOWER ((unsigned char) *needle));
+      haystack++;
+      needle++;
     }
-foundneedle:
-  return (char*) haystack;
-ret0:
-  return 0;
+  if (*needle)
+    return NULL;
+  if (ok)
+    return (char *) haystack_start;
+  needle_len = needle - needle_start;
+  haystack = haystack_start + 1;
+  haystack_len = needle_len - 1;
+
+  /* Perform the search.  Abstract memory is considered to be an array
+     of 'unsigned char' values, not an array of 'char' values.  See
+     ISO C 99 section 6.2.6.1.  */
+  if (needle_len < LONG_NEEDLE_THRESHOLD)
+    return two_way_short_needle ((const unsigned char *) haystack,
+				 haystack_len,
+				 (const unsigned char *) needle_start,
+				 needle_len);
+  return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
+			      (const unsigned char *) needle_start,
+			      needle_len);
 }
 
+#undef LONG_NEEDLE_THRESHOLD
+
 weak_alias (__strcasestr, strcasestr)