about summary refs log tree commit diff
path: root/sysdeps/tile/tilegx/strnlen.c
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2014-09-15 20:10:18 -0400
committerChris Metcalf <cmetcalf@tilera.com>2014-10-06 11:19:18 -0400
commitc86f7b80f43d7336eab1119dae78b0f10b7244ec (patch)
tree951bc7a02304a850aaed2a361df614669f5271aa /sysdeps/tile/tilegx/strnlen.c
parent1c4c1a6f4d0e8ffab24419d136fbfe698a201d24 (diff)
downloadglibc-c86f7b80f43d7336eab1119dae78b0f10b7244ec.tar.gz
glibc-c86f7b80f43d7336eab1119dae78b0f10b7244ec.tar.xz
glibc-c86f7b80f43d7336eab1119dae78b0f10b7244ec.zip
tilegx: provide optimized strnlen, strstr, and strcasestr
strnlen() is based on the existing tile strlen() with length
checking added.  It speeds up by up to 5x, but on average across
the benchtest corpus by around 35%.  No regressions are seen.

strstr() does 8-byte aligned loads and compares using a 2-byte
filter on the first two bytes of the needle and then testing
the remaining bytes in needle using memcmp().  It speeds up
about 5x in the best case (for "found" needles), about 2x looking
at benchtest as a whole, with some slowdowns as much as 45%.
on a few cases (including the "fail" case for 128KB search).

strcasestr() is based on strstr() but uses a SIMD tolower
routine to convert 8-bytes to lower case in 5 instructions.
It also uses a 2-byte filter and then strncasecmp() for the
remaining bytes.  strncasecmp() is not optimized for SIMD, so
there is futher room for improvement.  However, it is still up
to 16x faster for "found" needles, averaging 2x faster on the
whole corpus of benchtests.  It does slow down by up to 35%
on a few cases, similarly to strstr().
Diffstat (limited to 'sysdeps/tile/tilegx/strnlen.c')
-rw-r--r--sysdeps/tile/tilegx/strnlen.c58
1 files changed, 58 insertions, 0 deletions
diff --git a/sysdeps/tile/tilegx/strnlen.c b/sysdeps/tile/tilegx/strnlen.c
new file mode 100644
index 0000000000..33ecc033f6
--- /dev/null
+++ b/sysdeps/tile/tilegx/strnlen.c
@@ -0,0 +1,58 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <string.h>
+#include <stdint.h>
+#include "string-endian.h"
+
+/* Find the length of S, but scan at most MAXLEN characters.  If no
+   '\0' terminator is found in that many characters, return MAXLEN.  */
+size_t
+__strnlen (const char *s, size_t maxlen)
+{
+  /* When maxlen is 0, can't read any bytes or it might cause a page fault.  */
+  if (maxlen == 0)
+    return 0;
+
+  /* Get an aligned pointer. */
+  const uintptr_t s_int = (uintptr_t) s;
+  const uint64_t *p = (const uint64_t *) (s_int & -8);
+  size_t bytes_read = sizeof (*p) - (s_int & (sizeof (*p) - 1));
+
+  /* Read and MASK the first word. */
+  uint64_t v = *p | MASK (s_int);
+
+  uint64_t bits;
+  while ((bits = __insn_v1cmpeqi (v, 0)) == 0)
+    {
+      if (bytes_read >= maxlen)
+	{
+	  /* Read maxlen bytes and didn't find the terminator. */
+	  return maxlen;
+	}
+      v = *++p;
+      bytes_read += sizeof (v);
+    }
+
+  /* Found '\0', check it is not larger than maxlen */
+  size_t len = ((const char *) p) + (CFZ (bits) >> 3) - s;
+  return (len < maxlen ? len : maxlen);
+}
+weak_alias (__strnlen, strnlen)
+libc_hidden_def (strnlen)