about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-19 22:22:29 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-19 22:22:29 -0400
commit855d156018a701b3613eb4b14ab3bd09fd12f0a3 (patch)
treeca43c7fc79f55509f9de920fee1af242ce7bab98
parentd9a4d2ab278ab50637e383b6174e9ec42db84327 (diff)
downloadglibc-855d156018a701b3613eb4b14ab3bd09fd12f0a3.tar.gz
glibc-855d156018a701b3613eb4b14ab3bd09fd12f0a3.tar.xz
glibc-855d156018a701b3613eb4b14ab3bd09fd12f0a3.zip
Optimize x86-64 rawmemchr and add test
-rw-r--r--ChangeLog5
-rw-r--r--string/Makefile2
-rw-r--r--string/test-rawmemchr.c189
-rw-r--r--sysdeps/x86_64/multiarch/rawmemchr.S7
4 files changed, 198 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index bc8d4cae13..ba2ff4c343 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2011-10-19  Ulrich Drepper  <drepper@gmail.com>
 
+	* sysdeps/x86_64/multiarch/rawmemchr.S: Small optimization to safe an
+	instruction.
+	* string/Makefile (strop-tests): Add rawmemchr.
+	* string/test-rawmemchr.c: New file.
+
 	* sysdeps/x86_64/multiarch/init-arch.h: Define bit_AVX and index_AVX.
 	* sysdeps/x86_64/multiarch/strcmp-sse42.S: New file.  Split out from...
 	* sysdeps/x86_64/multiarch/strcmp.S: ...here.  Include strcmp-sse42.S
diff --git a/string/Makefile b/string/Makefile
index ab100244df..109f59b454 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -50,7 +50,7 @@ strop-tests	:= memchr memcmp memcpy memmove mempcpy memset memccpy	\
 		   stpcpy stpncpy strcat strchr strcmp strcpy strcspn	\
 		   strlen strncmp strncpy strpbrk strrchr strspn memmem	\
 		   strstr strcasestr strnlen strcasecmp strncasecmp	\
-		   strncat
+		   strncat rawmemchr
 tests		:= tester inl-tester noinl-tester testcopy test-ffs	\
 		   tst-strlen stratcliff tst-svc tst-inlcall		\
 		   bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap	\
diff --git a/string/test-rawmemchr.c b/string/test-rawmemchr.c
new file mode 100644
index 0000000000..58c1b158f3
--- /dev/null
+++ b/string/test-rawmemchr.c
@@ -0,0 +1,189 @@
+/* Test and measure memchr functions.
+   Copyright (C) 1999,2002,2003,2005,2009,2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Jakub Jelinek <jakub@redhat.com>, 1999.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <assert.h>
+
+#define TEST_MAIN
+#include "test-string.h"
+
+typedef char *(*proto_t) (const char *, int);
+char *simple_rawmemchr (const char *, int);
+
+IMPL (simple_rawmemchr, 0)
+IMPL (rawmemchr, 1)
+
+char *
+simple_rawmemchr (const char *s, int c)
+{
+  while (1)
+    if (*s++ == (char) c)
+      return (char *) s - 1;
+  return NULL;
+}
+
+static void
+do_one_test (impl_t *impl, const char *s, int c, char *exp_res)
+{
+  char *res = CALL (impl, s, c);
+  if (res != exp_res)
+    {
+      error (0, 0, "Wrong result in function %s %p %p", impl->name,
+	     res, exp_res);
+      ret = 1;
+      return;
+    }
+
+  if (HP_TIMING_AVAIL)
+    {
+      hp_timing_t start __attribute ((unused));
+      hp_timing_t stop __attribute ((unused));
+      hp_timing_t best_time = ~ (hp_timing_t) 0;
+      size_t i;
+
+      for (i = 0; i < 32; ++i)
+	{
+	  HP_TIMING_NOW (start);
+	  CALL (impl, s, c);
+	  HP_TIMING_NOW (stop);
+	  HP_TIMING_BEST (best_time, start, stop);
+	}
+
+      printf ("\t%zd", (size_t) best_time);
+    }
+}
+
+static void
+do_test (size_t align, size_t pos, size_t len, int seek_char)
+{
+  size_t i;
+  char *result;
+
+  align &= 7;
+  if (align + len >= page_size)
+    return;
+
+  for (i = 0; i < len; ++i)
+    {
+      buf1[align + i] = 1 + 23 * i % 127;
+      if (buf1[align + i] == seek_char)
+	buf1[align + i] = seek_char + 1;
+    }
+  buf1[align + len] = 0;
+
+  assert (pos < len);
+
+  buf1[align + pos] = seek_char;
+  buf1[align + len] = -seek_char;
+  result = (char *) (buf1 + align + pos);
+
+  if (HP_TIMING_AVAIL)
+    printf ("Length %4zd, alignment %2zd:", pos, align);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, (char *) (buf1 + align), seek_char, result);
+
+  if (HP_TIMING_AVAIL)
+    putchar ('\n');
+}
+
+static void
+do_random_tests (void)
+{
+  size_t i, j, n, align, pos, len;
+  int seek_char;
+  char *result;
+  unsigned char *p = buf1 + page_size - 512;
+
+  for (n = 0; n < ITERATIONS; n++)
+    {
+      align = random () & 15;
+      pos = random () & 511;
+      if (pos + align >= 512)
+	pos = 511 - align - (random () & 7);
+      len = random () & 511;
+      if (len + align >= 512)
+	len = 512 - align - (random () & 7);
+      if (pos >= len)
+	continue;
+      seek_char = random () & 255;
+      j = len + align + 64;
+      if (j > 512)
+	j = 512;
+
+      for (i = 0; i < j; i++)
+	{
+	  if (i == pos + align)
+	    p[i] = seek_char;
+	  else
+	    {
+	      p[i] = random () & 255;
+	      if (i < pos + align && p[i] == seek_char)
+		p[i] = seek_char + 13;
+	    }
+	}
+
+      assert (pos < len);
+      size_t r = random ();
+      if ((r & 31) == 0)
+	len = ~(uintptr_t) (p + align) - ((r >> 5) & 31);
+      result = (char *) (p + pos + align);
+
+      FOR_EACH_IMPL (impl, 1)
+	if (CALL (impl, (char *) (p + align), seek_char) != result)
+	  {
+	    error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %d, %zd, %zd) %p != %p, p %p",
+		   n, impl->name, align, seek_char, len, pos,
+		   CALL (impl, (char *) (p + align), seek_char),
+		   result, p);
+	    ret = 1;
+	  }
+    }
+}
+
+int
+test_main (void)
+{
+  size_t i;
+
+  test_init ();
+
+  printf ("%20s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  for (i = 1; i < 7; ++i)
+    {
+      do_test (0, 16 << i, 2048, 23);
+      do_test (i, 64, 256, 23);
+      do_test (0, 16 << i, 2048, 0);
+      do_test (i, 64, 256, 0);
+    }
+  for (i = 1; i < 32; ++i)
+    {
+      do_test (0, i, i + 1, 23);
+      do_test (0, i, i + 1, 0);
+    }
+
+  do_random_tests ();
+  return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/sysdeps/x86_64/multiarch/rawmemchr.S b/sysdeps/x86_64/multiarch/rawmemchr.S
index a8933fb55a..1f5bbe6d3f 100644
--- a/sysdeps/x86_64/multiarch/rawmemchr.S
+++ b/sysdeps/x86_64/multiarch/rawmemchr.S
@@ -43,19 +43,18 @@ strong_alias (rawmemchr, __rawmemchr)
 
 
 	.section .text.sse4.2,"ax",@progbits
-	.align 	16
+	.align	16
 	.type	__rawmemchr_sse42, @function
 __rawmemchr_sse42:
 	cfi_startproc
 	CALL_MCOUNT
 	movd	%esi, %xmm1
 	movq	%rdi, %rcx
-	punpcklbw %xmm1, %xmm1
+	pxor	%xmm2, %xmm2
 	andq	$~15, %rdi
-	punpcklbw %xmm1, %xmm1
 	orl	$0xffffffff, %esi
+	pshufb	%xmm2, %xmm1
 	movdqa	(%rdi), %xmm0
-	pshufd	$0, %xmm1, %xmm1
 	subq	%rdi, %rcx
 	pcmpeqb	%xmm1, %xmm0
 	shl	%cl, %esi