about summary refs log tree commit diff
path: root/sysdeps/ia64/strlen.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/ia64/strlen.S')
-rw-r--r--sysdeps/ia64/strlen.S97
1 files changed, 97 insertions, 0 deletions
diff --git a/sysdeps/ia64/strlen.S b/sysdeps/ia64/strlen.S
new file mode 100644
index 0000000000..5930af7b32
--- /dev/null
+++ b/sysdeps/ia64/strlen.S
@@ -0,0 +1,97 @@
+/* Optimized version of the standard strlen() function.
+   This file is part of the GNU C Library.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Dan Pop <Dan.Pop@cern.ch>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Return: the length of the input string
+
+   Input:
+        in0:    str
+
+   Look for the null character byte by byte, until we reach a word aligned
+   address, then search word by word, using the czx instruction.  We're
+   also doing one word of read ahead, which could cause problems if the
+   null character is on the last word of a page and the next page is not
+   mapped in the process address space.  Hence the use of the speculative
+   load.
+
+   This implementation assumes little endian mode.  For big endian mode,
+   the instruction czx1.r should be replaced by czx1.l.  */
+
+#include <sysdep.h>
+#undef ret
+
+#define saved_lc	r18
+#define str		r19
+#define pos0		r20
+#define val1		r21
+#define val2		r22
+#define origadd		r23
+#define tmp		r24
+#define loopcnt		r30
+#define len		ret0
+
+ENTRY(strlen)
+	.prologue
+	alloc r2 = ar.pfs, 1, 0, 0, 0
+	.save ar.lc, saved_lc
+        mov 	saved_lc = ar.lc 	// save the loop counter
+	.body
+	mov 	str = in0
+	mov 	len = r0		// len = 0
+	and 	tmp = 7, in0		// tmp = str % 8
+	;;
+	sub	loopcnt = 8, tmp	// loopcnt = 8 - tmp
+	cmp.eq	p6, p0 = tmp, r0
+(p6)	br.cond.sptk	.str_aligned;;
+	adds	loopcnt = -1, loopcnt;;
+	mov	ar.lc = loopcnt
+.l1:
+	ld1	val2 = [str], 1
+	;;
+	cmp.eq	p6, p0 = val2, r0
+(p6)	br.cond.spnt	.restore_and_exit
+	adds	len = 1, len
+	br.cloop.dptk	.l1
+.str_aligned:
+	mov	origadd = str		// origadd = orig
+	ld8	val1 = [str], 8;;
+	nop.b	0
+	nop.b 	0
+.l2:	ld8.s	val2 = [str], 8		// don't bomb out here
+	czx1.r	pos0 = val1
+	;;
+	cmp.ne	p6, p0 = 8, pos0
+(p6)	br.cond.spnt .foundit
+	chk.s	val2, .recovery
+.back:
+	mov	val1 = val2
+	br.cond.dptk	.l2
+.foundit:
+	sub	tmp = str, origadd	// tmp = crt address - orig
+	add	len = len, pos0;;
+	add	len = len, tmp;;
+	adds	len = -16, len
+.restore_and_exit:
+	mov ar.lc = saved_lc		// restore the loop counter
+	br.ret.sptk.many b0
+.recovery:
+	adds	str = -8, str;;
+	ld8	val2 = [str], 8		// bomb out here
+	br.cond.sptk	.back
+END(strlen)
+libc_hidden_builtin_def (strlen)