about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2010-07-16 15:37:38 -0700
committerUlrich Drepper <drepper@redhat.com>2010-07-16 15:37:38 -0700
commitcc9f2e47a0a1b4ab0d78ff1d036ec7f8ebc74294 (patch)
tree271302a0acea6e4053d3af307011e9494b8d267c /sysdeps
parent9b059f977477351b1660cf50f6925e1eb0ec498b (diff)
downloadglibc-cc9f2e47a0a1b4ab0d78ff1d036ec7f8ebc74294.tar.gz
glibc-cc9f2e47a0a1b4ab0d78ff1d036ec7f8ebc74294.tar.xz
glibc-cc9f2e47a0a1b4ab0d78ff1d036ec7f8ebc74294.zip
Speed up SSE4.2 strcasestr by avoiding indirect function call.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86_64/multiarch/Makefile3
-rw-r--r--sysdeps/x86_64/multiarch/strcasestr-nonascii.c50
-rw-r--r--sysdeps/x86_64/multiarch/strcasestr.c4
-rw-r--r--sysdeps/x86_64/multiarch/strstr.c68
4 files changed, 76 insertions, 49 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 0ca914a377..f1251a0a50 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -7,7 +7,7 @@ ifeq ($(subdir),string)
 sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
 		   memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
-		   memmove-ssse3-back
+		   memmove-ssse3-back strcasestr-nonascii
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
 CFLAGS-strcspn-c.c += -msse4
@@ -15,5 +15,6 @@ CFLAGS-strpbrk-c.c += -msse4
 CFLAGS-strspn-c.c += -msse4
 CFLAGS-strstr.c += -msse4
 CFLAGS-strcasestr.c += -msse4
+CFLAGS-strcasestr-nonascii.c += -msse4
 endif
 endif
diff --git a/sysdeps/x86_64/multiarch/strcasestr-nonascii.c b/sysdeps/x86_64/multiarch/strcasestr-nonascii.c
new file mode 100644
index 0000000000..0804e96de7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasestr-nonascii.c
@@ -0,0 +1,50 @@
+/* strstr with SSE4.2 intrinsics
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+# include <ctype.h>
+
+
+/* Similar to __m128i_strloadu.  Convert to lower case for none-POSIX/C
+   locale.  */
+static inline __m128i
+__m128i_strloadu_tolower (const unsigned char * p)
+{
+  union
+    {
+      char b[16];
+      __m128i x;
+    } u;
+
+  for (int i = 0; i < 16; ++i)
+    if (p[i] == 0)
+      {
+	u.b[i] = 0;
+	break;
+      }
+    else
+      u.b[i] = tolower (p[i]);
+
+  return u.x;
+}
+
+
+#define STRCASESTR_NONASCII
+#define USE_AS_STRCASESTR
+#define STRSTR_SSE42 attribute_hidden __strcasestr_sse42_nonascii
+#include "strstr.c"
diff --git a/sysdeps/x86_64/multiarch/strcasestr.c b/sysdeps/x86_64/multiarch/strcasestr.c
index 064e3ef4fd..d1cfb3b264 100644
--- a/sysdeps/x86_64/multiarch/strcasestr.c
+++ b/sysdeps/x86_64/multiarch/strcasestr.c
@@ -1,3 +1,7 @@
+extern char *__strcasestr_sse42_nonascii (const unsigned char *s1,
+					  const unsigned char *s2)
+  attribute_hidden;
+
 #define USE_AS_STRCASESTR
 #define STRSTR_SSE42 __strcasestr_sse42
 #include "strstr.c"
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index 76d5ad16df..f647354971 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -1,5 +1,5 @@
 /* strstr with SSE4.2 intrinsics
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -67,10 +67,10 @@
 
    case		ECX	CFlag	ZFlag	SFlag
     3		 X	  1	  0	  0/1
-    4a		 0  	  1	  0	  0
-    4b		 0  	  1	  0	  1
-    4c		0 < X  	  1	  0	  0/1
-    5		16 	  0	  1	  0
+    4a		 0	  1	  0	  0
+    4b		 0	  1	  0	  1
+    4c		0 < X	  1	  0	  0/1
+    5		16	  0	  1	  0
 
    3. An initial ordered-comparison fragment match, we fix up to do
       subsequent string comparison
@@ -147,8 +147,7 @@ __m128i_shift_right (__m128i value, int offset)
    If EOS occurs within less than 16B before 4KB boundary, we don't
    cross to next page.  */
 
-static __m128i
-__attribute__ ((section (".text.sse4.2")))
+static inline __m128i
 __m128i_strloadu (const unsigned char * p)
 {
   int offset = ((size_t) p & (16 - 1));
@@ -164,14 +163,12 @@ __m128i_strloadu (const unsigned char * p)
   return _mm_loadu_si128 ((__m128i *) p);
 }
 
-#ifdef USE_AS_STRCASESTR
+#if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII
 
 /* Similar to __m128i_strloadu.  Convert to lower case for POSIX/C
    locale.  */
-
-static __m128i
-__attribute__ ((section (".text.sse4.2")))
-__m128i_strloadu_tolower_posix (const unsigned char * p)
+static inline __m128i
+__m128i_strloadu_tolower (const unsigned char * p)
 {
   __m128i frag = __m128i_strloadu (p);
 
@@ -184,39 +181,13 @@ __m128i_strloadu_tolower_posix (const unsigned char * p)
   return  _mm_blendv_epi8 (frag, mask2, mask1);
 }
 
-/* Similar to __m128i_strloadu.  Convert to lower case for none-POSIX/C
-   locale.  */
-
-static __m128i
-__attribute__ ((section (".text.sse4.2")))
-__m128i_strloadu_tolower (const unsigned char * p)
-{
-  union
-    {
-      char b[16];
-      __m128i x;
-    } u;
-
-  for (int i = 0; i < 16; i++)
-    if (p[i] == 0)
-      {
-	u.b[i] = 0;
-	break;
-      }
-    else
-      u.b[i] = tolower (p[i]);
-
-  return u.x;
-}
 #endif
 
 /* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP
    algorithm) overlap for a fully populated 16B vector.
    Input parameter: 1st 16Byte loaded from the reference string of a
 		    strstr function.
-   We don't use KMP algorithm if reference string is less than 16B.
- */
-
+   We don't use KMP algorithm if reference string is less than 16B.  */
 static int
 __inline__ __attribute__ ((__always_inline__,))
 KMP16Bovrlap (__m128i s2)
@@ -236,7 +207,7 @@ KMP16Bovrlap (__m128i s2)
     return 1;
   else if (!k1)
     {
-      /* There are al least two ditinct char in s2.  If byte 0 and 1 are
+      /* There are al least two distinct chars in s2.  If byte 0 and 1 are
 	 idential and the distinct value lies farther down, we can deduce
 	 the next byte offset to restart full compare is least no earlier
 	 than byte 3.  */
@@ -256,23 +227,24 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
 #define p1 s1
   const unsigned char *p2 = s2;
 
-  if (p2[0] == '\0')
+#ifndef STRCASESTR_NONASCII
+  if (__builtin_expect (p2[0] == '\0', 0))
     return (char *) p1;
 
-  if (p1[0] == '\0')
+  if (__builtin_expect (p1[0] == '\0', 0))
     return NULL;
 
   /* Check if p1 length is 1 byte long.  */
-  if (p1[1] == '\0')
+  if (__builtin_expect (p1[1] == '\0', 0))
     return p2[1] == '\0' && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL;
+#endif
 
 #ifdef USE_AS_STRCASESTR
-  __m128i (*strloadu) (const unsigned char *);
+  if (__builtin_expect (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE)
+			!= 0, 0))
+    return __strcasestr_sse42_nonascii (s1, s2);
 
-  if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE) == 0)
-    strloadu = __m128i_strloadu_tolower_posix;
-  else
-    strloadu = __m128i_strloadu_tolower;
+# define strloadu __m128i_strloadu_tolower
 #else
 # define strloadu __m128i_strloadu
 #endif