about summary refs log tree commit diff
path: root/string/strstr.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2008-05-15 04:42:20 +0000
committerUlrich Drepper <drepper@redhat.com>2008-05-15 04:42:20 +0000
commit0caca71ac95d12c6f45bbbe39d9adb7ac7074146 (patch)
tree5cf5eff46b5e4c7a09cebaf262bfcf3a20a0f6a1 /string/strstr.c
parentb194db79852e6bbd5d5ad72690679c8be06eef15 (diff)
downloadglibc-0caca71ac95d12c6f45bbbe39d9adb7ac7074146.tar.gz
glibc-0caca71ac95d12c6f45bbbe39d9adb7ac7074146.tar.xz
glibc-0caca71ac95d12c6f45bbbe39d9adb7ac7074146.zip
* string/Makefile (distribute): Add str-two-way.h. cvs/fedora-glibc-20080515T0735
2008-03-29  Eric Blake	<ebb9@byu.net>

	Rewrite string searches to O(n) rather than O(n^2).
	* string/str-two-way.h: New file.  For linear fixed-allocation
	string searching.
	* string/memmem.c: New implementation.
	* string/strstr.c: New implementation.
	* string/strcasestr.c: New implementation.

	* sysdeps/posix/getaddrinfo.c (getaddrinfo): Call _res_hconf_init
Diffstat (limited to 'string/strstr.c')
-rw-r--r--string/strstr.c142
1 files changed, 53 insertions, 89 deletions
diff --git a/string/strstr.c b/string/strstr.c
index fce1f2a756..a9dc312992 100644
--- a/string/strstr.c
+++ b/string/strstr.c
@@ -1,5 +1,5 @@
 /* Return the offset of one string within another.
-   Copyright (C) 1994,1996,1997,2000,2001,2003 Free Software Foundation, Inc.
+   Copyright (C) 1994,1996,1997,2000,2001,2003,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,107 +17,71 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-/*
- * My personal strstr() implementation that beats most other algorithms.
- * Until someone tells me otherwise, I assume that this is the
- * fastest implementation of strstr() in C.
- * I deliberately chose not to comment it.  You should have at least
- * as much fun trying to understand it, as I had to write it :-).
- *
- * Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de	*/
+/* This particular implementation was written by Eric Blake, 2008.  */
 
-#if HAVE_CONFIG_H
+#ifndef _LIBC
 # include <config.h>
 #endif
 
-#if defined _LIBC || defined HAVE_STRING_H
-# include <string.h>
+/* Specification of strstr.  */
+#include <string.h>
+
+#include <stdbool.h>
+
+#ifndef _LIBC
+# define __builtin_expect(expr, val)   (expr)
 #endif
 
-typedef unsigned chartype;
+#define RETURN_TYPE char *
+#define AVAILABLE(h, h_l, j, n_l)			\
+  (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
+   && ((h_l) = (j) + (n_l)))
+#include "str-two-way.h"
 
 #undef strstr
 
+/* Return the first occurrence of NEEDLE in HAYSTACK.  Return HAYSTACK
+   if NEEDLE is empty, otherwise NULL if NEEDLE is not found in
+   HAYSTACK.  */
 char *
-strstr (phaystack, pneedle)
-     const char *phaystack;
-     const char *pneedle;
+strstr (const char *haystack_start, const char *needle_start)
 {
-  const unsigned char *haystack, *needle;
-  chartype b;
-  const unsigned char *rneedle;
-
-  haystack = (const unsigned char *) phaystack;
+  const char *haystack = haystack_start;
+  const char *needle = needle_start;
+  size_t needle_len; /* Length of NEEDLE.  */
+  size_t haystack_len; /* Known minimum length of HAYSTACK.  */
+  bool ok = true; /* True if NEEDLE is prefix of HAYSTACK.  */
 
-  if ((b = *(needle = (const unsigned char *) pneedle)))
-    {
-      chartype c;
-      haystack--;		/* possible ANSI violation */
+  /* Determine length of NEEDLE, and in the process, make sure
+     HAYSTACK is at least as long (no point processing all of a long
+     NEEDLE if HAYSTACK is too short).  */
+  while (*haystack && *needle)
+    ok &= *haystack++ == *needle++;
+  if (*needle)
+    return NULL;
+  if (ok)
+    return (char *) haystack_start;
 
-      {
-	chartype a;
-	do
-	  if (!(a = *++haystack))
-	    goto ret0;
-	while (a != b);
-      }
+  /* Reduce the size of haystack using strchr, since it has a smaller
+     linear coefficient than the Two-Way algorithm.  */
+  needle_len = needle - needle_start;
+  haystack = strchr (haystack_start + 1, *needle_start);
+  if (!haystack || __builtin_expect (needle_len == 1, 0))
+    return (char *) haystack;
+  needle -= needle_len;
+  haystack_len = (haystack > haystack_start + needle_len ? 1
+		  : needle_len + haystack_start - haystack);
 
-      if (!(c = *++needle))
-	goto foundneedle;
-      ++needle;
-      goto jin;
-
-      for (;;)
-	{
-	  {
-	    chartype a;
-	    if (0)
-	    jin:{
-		if ((a = *++haystack) == c)
-		  goto crest;
-	      }
-	    else
-	      a = *++haystack;
-	    do
-	      {
-		for (; a != b; a = *++haystack)
-		  {
-		    if (!a)
-		      goto ret0;
-		    if ((a = *++haystack) == b)
-		      break;
-		    if (!a)
-		      goto ret0;
-		  }
-	      }
-	    while ((a = *++haystack) != c);
-	  }
-	crest:
-	  {
-	    chartype a;
-	    {
-	      const unsigned char *rhaystack;
-	      if (*(rhaystack = haystack-- + 1) == (a = *(rneedle = needle)))
-		do
-		  {
-		    if (!a)
-		      goto foundneedle;
-		    if (*++rhaystack != (a = *++needle))
-		      break;
-		    if (!a)
-		      goto foundneedle;
-		  }
-		while (*++rhaystack == (a = *++needle));
-	      needle = rneedle;	/* took the register-poor aproach */
-	    }
-	    if (!a)
-	      break;
-	  }
-	}
-    }
-foundneedle:
-  return (char *) haystack;
-ret0:
-  return 0;
+  /* Perform the search.  Abstract memory is considered to be an array
+     of 'unsigned char' values, not an array of 'char' values.  See
+     ISO C 99 section 6.2.6.1.  */
+  if (needle_len < LONG_NEEDLE_THRESHOLD)
+    return two_way_short_needle ((const unsigned char *) haystack,
+				 haystack_len,
+				 (const unsigned char *) needle, needle_len);
+  return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
+			      (const unsigned char *) needle, needle_len);
 }
 libc_hidden_builtin_def (strstr)
+
+#undef LONG_NEEDLE_THRESHOLD