about summary refs log tree commit diff
path: root/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S')
-rw-r--r--REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S155
1 files changed, 155 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S
new file mode 100644
index 0000000000..cbfcc14cfe
--- /dev/null
+++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S
@@ -0,0 +1,155 @@
+/* Optimized strchr implementation for PowerPC64.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* See strlen.s for comments on how this works.  */
+
+/* char * [r3] strchr (const char *s [r3] , int c [r4] )  */
+
+#ifndef STRCHR
+# define STRCHR strchr
+#endif
+
+ENTRY (STRCHR)
+	CALL_MCOUNT 2
+
+#define rTMP1	r0
+#define rRTN	r3	/* outgoing result */
+#define rSTR	r8	/* current word pointer */
+#define rCHR	r4	/* byte we're looking for, spread over the whole word */
+#define rWORD	r5	/* the current word */
+#define rCLZB	rCHR	/* leading zero byte count */
+#define rFEFE	r6	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F	r7	/* constant 0x7f7f7f7f7f7f7f7f */
+#define rTMP2	r9
+#define rIGN	r10	/* number of bits we should ignore in the first word */
+#define rMASK	r11	/* mask with the bits to ignore set to 0 */
+#define rTMP3	r12
+#define rTMP4	rIGN
+#define rTMP5	rMASK
+
+	dcbt	0,rRTN
+	insrdi	rCHR, rCHR, 8, 48
+	li	rMASK, -1
+	insrdi	rCHR, rCHR, 16, 32
+	rlwinm	rIGN, rRTN, 3, 26, 28
+	insrdi	rCHR, rCHR, 32, 0
+	lis	rFEFE, -0x101
+	lis	r7F7F, 0x7f7f
+	clrrdi	rSTR, rRTN, 3
+	addi	rFEFE, rFEFE, -0x101
+	addi	r7F7F, r7F7F, 0x7f7f
+	sldi	rTMP1, rFEFE, 32
+	insrdi	r7F7F, r7F7F, 32, 0
+	add	rFEFE, rFEFE, rTMP1
+/* Test the first (partial?) word.  */
+	ld	rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+	sld	rMASK, rMASK, rIGN
+#else
+	srd	rMASK, rMASK, rIGN
+#endif
+	orc	rWORD, rWORD, rMASK
+	add	rTMP1, rFEFE, rWORD
+	nor	rTMP2, r7F7F, rWORD
+	and.	rTMP4, rTMP1, rTMP2
+	xor	rTMP3, rCHR, rWORD
+	orc	rTMP3, rTMP3, rMASK
+	b	L(loopentry)
+
+/* The loop.  */
+
+L(loop):
+	ldu	rWORD, 8(rSTR)
+	and.	rTMP5, rTMP1, rTMP2
+/* Test for 0.	*/
+	add	rTMP1, rFEFE, rWORD /* x - 0x01010101.  */
+	nor	rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080.  */
+	bne	L(foundit)
+	and.	rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080.  */
+/* Start test for the bytes we're looking for.  */
+	xor	rTMP3, rCHR, rWORD
+L(loopentry):
+	add	rTMP1, rFEFE, rTMP3
+	nor	rTMP2, r7F7F, rTMP3
+	beq	L(loop)
+
+/* There is a zero byte in the word, but may also be a matching byte (either
+   before or after the zero byte).  In fact, we may be looking for a
+   zero byte, in which case we return a match.  */
+	and.	rTMP5, rTMP1, rTMP2
+	li	rRTN, 0
+	beqlr
+/* At this point:
+   rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+   rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+   But there may be false matches in the next most significant byte from
+   a true match due to carries.  This means we need to recalculate the
+   matches using a longer method for big-endian.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzd	rCLZB, rTMP1
+	addi	rTMP2, rTMP4, -1
+	andc	rTMP2, rTMP2, rTMP4
+	cmpld	rTMP1, rTMP2
+	bgtlr
+	subfic	rCLZB, rCLZB, 64-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+   results from the loop for reuse here.  See strlen.S tail.  Similarly
+   one instruction could be pruned from L(foundit).  */
+	and	rFEFE, r7F7F, rWORD
+	or	rTMP5, r7F7F, rWORD
+	and	rTMP1, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
+	add	rFEFE, rFEFE, r7F7F
+	add	rTMP1, rTMP1, r7F7F
+	nor	rWORD, rTMP5, rFEFE
+	nor	rTMP2, rTMP4, rTMP1
+	cntlzd	rCLZB, rTMP2
+	cmpld	rWORD, rTMP2
+	bgtlr
+#endif
+	srdi	rCLZB, rCLZB, 3
+	add	rRTN, rSTR, rCLZB
+	blr
+
+L(foundit):
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzd	rCLZB, rTMP1
+	subfic	rCLZB, rCLZB, 64-7-64
+	sradi	rCLZB, rCLZB, 3
+#else
+	and	rTMP1, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
+	add	rTMP1, rTMP1, r7F7F
+	nor	rTMP2, rTMP4, rTMP1
+	cntlzd	rCLZB, rTMP2
+	subi	rSTR, rSTR, 8
+	srdi	rCLZB, rCLZB, 3
+#endif
+	add	rRTN, rSTR, rCLZB
+	blr
+END (STRCHR)
+
+weak_alias (strchr, index)
+libc_hidden_builtin_def (strchr)