about summary refs log tree commit diff
path: root/sysdeps/arm/armv7/multiarch/memchr.S
diff options
context:
space:
mode:
authorPrakhar Bahuguna <prakhar.bahuguna@arm.com>2017-06-27 15:43:50 +0000
committerJoseph Myers <joseph@codesourcery.com>2017-06-27 15:43:50 +0000
commitf8f72bc0c3da8ba039e6a1ed670ca576120b1f85 (patch)
tree83b3438aea7f6425cf94c5f97cbbca1d62797683 /sysdeps/arm/armv7/multiarch/memchr.S
parenta37b5daa6bc7fbcbbc229b2549a161fa15023f41 (diff)
downloadglibc-f8f72bc0c3da8ba039e6a1ed670ca576120b1f85.tar.gz
glibc-f8f72bc0c3da8ba039e6a1ed670ca576120b1f85.tar.xz
glibc-f8f72bc0c3da8ba039e6a1ed670ca576120b1f85.zip
[ARM] Optimise memchr for NEON-enabled processors
This patch provides an optimised implementation of memchr using NEON
instructions to improve its performance, especially with longer search regions.
This gave an improvement in performance against the Thumb2+DSP optimised code,
with more significant gains for larger inputs. The NEON code also wins in cases
where the input is small (less than 8 bytes) by defaulting to a simple
byte-by-byte search. This avoids the overhead imposed by filling two quadword
registers from memory.

	* sysdeps/arm/armv7/multiarch/Makefile: Add memchr_neon to
	sysdep_routines.
	* sysdeps/arm/armv7/multiarch/ifunc-impl-list.c: Add define for
	__memchr_neon.
	Add ifunc definitions for __memchr_neon and __memchr_noneon.
	* sysdeps/arm/armv7/multiarch/memchr.S: New file.
	* sysdeps/arm/armv7/multiarch/memchr_impl.S: Likewise.
	* sysdeps/arm/armv7/multiarch/memchr_neon.S: Likewise.

Testing done: Ran regression tests for arm-none-linux-gnueabihf as well as a
full toolchain bootstrap. Benchmark tests were ran on ARMv7-A and ARMv8-A
hardware targets.
Diffstat (limited to 'sysdeps/arm/armv7/multiarch/memchr.S')
-rw-r--r--sysdeps/arm/armv7/multiarch/memchr.S59
1 files changed, 59 insertions, 0 deletions
diff --git a/sysdeps/arm/armv7/multiarch/memchr.S b/sysdeps/arm/armv7/multiarch/memchr.S
new file mode 100644
index 0000000000..8e8097abd5
--- /dev/null
+++ b/sysdeps/arm/armv7/multiarch/memchr.S
@@ -0,0 +1,59 @@
+/* Multiple versions of memchr
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2013-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+
+#if IS_IN (libc)
+/* Under __ARM_NEON__, memchr_neon.S defines the name memchr.  */
+# ifndef __ARM_NEON__
+	.text
+	.arm
+ENTRY(memchr)
+	.type	memchr, %gnu_indirect_function
+	ldr	r1, .Lmemchr_noneon
+	tst	r0, #HWCAP_ARM_NEON
+	ldrne	r1, .Lmemchr_neon
+1:
+	add	r0, r1, pc
+	DO_RET(lr)
+
+.Lmemchr_noneon:
+	.long	C_SYMBOL_NAME(__memchr_noneon) - 1b - 8
+.Lmemchr_neon:
+	.long	C_SYMBOL_NAME(__memchr_neon) - 1b - 8
+
+END(memchr)
+
+libc_hidden_builtin_def (memchr)
+# endif  /* Not __ARM_NEON__.  */
+libc_hidden_def (__memchr_noneon)
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)
+# undef weak_alias
+# define weak_alias(x, y)
+# undef libc_hidden_def
+# define libc_hidden_def(name)
+
+# define memchr __memchr_noneon
+
+#endif
+
+#include "memchr_impl.S"