about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc64/strlen.S
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>2003-04-04 22:03:25 +0000
committerRoland McGrath <roland@gnu.org>2003-04-04 22:03:25 +0000
commitbeb03cee27a133e3fd34795e32d6d51c7b7b4d4d (patch)
tree920a38ae4bd7d92bbb77e229af7b92e14c4c66ab /sysdeps/powerpc/powerpc64/strlen.S
parent91613ed9d8e3cdef7a4257b1bec241828fa222c1 (diff)
downloadglibc-beb03cee27a133e3fd34795e32d6d51c7b7b4d4d.tar.gz
glibc-beb03cee27a133e3fd34795e32d6d51c7b7b4d4d.tar.xz
glibc-beb03cee27a133e3fd34795e32d6d51c7b7b4d4d.zip
* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.
	* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations.

	* sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before
	defining.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/strlen.S')
-rw-r--r--sysdeps/powerpc/powerpc64/strlen.S70
1 files changed, 40 insertions, 30 deletions
diff --git a/sysdeps/powerpc/powerpc64/strlen.S b/sysdeps/powerpc/powerpc64/strlen.S
index 7907382002..22a835b109 100644
--- a/sysdeps/powerpc/powerpc64/strlen.S
+++ b/sysdeps/powerpc/powerpc64/strlen.S
@@ -1,5 +1,5 @@
 /* Optimized strlen implementation for PowerPC64.
-   Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -60,7 +60,12 @@
    2) How popular are bytes with the high bit set? If they are very rare,
    on some processors it might be useful to use the simpler expression
    ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
-   ALU), but this fails when any character has its high bit set.  */
+   ALU), but this fails when any character has its high bit set.  
+   
+   Answer:
+   1) Added a Data Cache Block Touch early to prefetch the first 128 
+   byte cache line. Adding dcbt instructions to the loop would not be 
+   effective since most strings will be shorter than the cache line.*/
 
 /* Some notes on register usage: Under the SVR4 ABI, we can use registers
    0 and 3 through 12 (so long as we don't call any procedures) without
@@ -80,63 +85,68 @@ ENTRY (BP_SYM (strlen))
 #define rSTR	r4	/* current string position */
 #define rPADN	r5	/* number of padding bits we prepend to the
 			   string to make it start at a word boundary */
-#define rFEFE	r6	/* constant 0xfefefeff (-0x01010101) */
-#define r7F7F	r7	/* constant 0x7f7f7f7f */
-#define rWORD1	r8	/* current string word */
-#define rWORD2	r9	/* next string word */
-#define rMASK	r9	/* mask for first string word */
+#define rFEFE	r6	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F	r7	/* constant 0x7f7f7f7f7f7f7f7f */
+#define rWORD1	r8	/* current string doubleword */
+#define rWORD2	r9	/* next string doubleword */
+#define rMASK	r9	/* mask for first string doubleword */
 #define rTMP2	r10
 #define rTMP3	r11
 #define rTMP4	r12
 
+/* Note:  The Bounded pointer support in this code is broken.  This code
+   was inherited from PPC32 and and that support was never completed.  
+   Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
+   These artifacts are left in the code as a reminder in case we need
+   bounded pointer support in the future.  */
 	CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
 
-	clrrdi	rSTR, rRTN, 2
+	dcbt	0,rRTN
+	clrrdi	rSTR, rRTN, 3
 	lis	r7F7F, 0x7f7f
-	rlwinm	rPADN, rRTN, 3, 27, 28
-	lwz	rWORD1, 0(rSTR)
-	li	rMASK, -1
+	rlwinm	rPADN, rRTN, 3, 26, 28
+	ld	rWORD1, 0(rSTR)
 	addi	r7F7F, r7F7F, 0x7f7f
-/* That's the setup done, now do the first pair of words.
-   We make an exception and use method (2) on the first two words, to reduce
-   overhead.  */
-	srw	rMASK, rMASK, rPADN
+	li	rMASK, -1
+	insrdi	r7F7F, r7F7F, 32, 0
+/* That's the setup done, now do the first pair of doublewords.
+   We make an exception and use method (2) on the first two doublewords, 
+   to reduce overhead.  */
+	srd	rMASK, rMASK, rPADN
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
+	lis	rFEFE, -0x101
 	add	rTMP1, rTMP1, r7F7F
+	addi	rFEFE, rFEFE, -0x101
 	nor	rTMP1, rTMP2, rTMP1
 	and.	rWORD1, rTMP1, rMASK
 	mtcrf	0x01, rRTN
 	bne	L(done0)
-	lis	rFEFE, -0x101
-	addi	rFEFE, rFEFE, -0x101
-	clrldi	rFEFE,rFEFE,32 /* clear upper 32 */
+	sldi  rTMP1, rFEFE, 32
+	add  rFEFE, rFEFE, rTMP1
 /* Are we now aligned to a doubleword boundary?  */
-	bt	29, L(loop)
+	bt	28, L(loop)
 
-/* Handle second word of pair.  */
-	lwzu	rWORD1, 4(rSTR)
+/* Handle second doubleword of pair.  */
+	ldu	rWORD1, 8(rSTR)
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
 	add	rTMP1, rTMP1, r7F7F
 	nor.	rWORD1, rTMP2, rTMP1
-	clrldi.	rWORD1,rWORD1,32 /* clear upper 32 */
 	bne	L(done0)
 
 /* The loop.  */
 
 L(loop):
-	lwz	rWORD1, 4(rSTR)
-	lwzu	rWORD2, 8(rSTR)
+	ld	rWORD1, 8(rSTR)
+	ldu	rWORD2, 16(rSTR)
 	add	rTMP1, rFEFE, rWORD1
 	nor	rTMP2, r7F7F, rWORD1
 	and.	rTMP1, rTMP1, rTMP2
-	clrldi.	rTMP1,rTMP1,32 /* clear upper 32 */
 	add	rTMP3, rFEFE, rWORD2
 	nor	rTMP4, r7F7F, rWORD2
 	bne	L(done1)
 	and.	rTMP1, rTMP3, rTMP4
-	clrldi.	rTMP1,rTMP1,32 /* clear upper 32 */
 	beq	L(loop)
 
 	and	rTMP1, r7F7F, rWORD2
@@ -146,17 +156,17 @@ L(loop):
 
 L(done1):
 	and	rTMP1, r7F7F, rWORD1
-	subi	rSTR, rSTR, 4
+	subi	rSTR, rSTR, 8
 	add	rTMP1, rTMP1, r7F7F
 	andc	rWORD1, rTMP2, rTMP1
 
-/* When we get to here, rSTR points to the first word in the string that
+/* When we get to here, rSTR points to the first doubleword in the string that
    contains a zero byte, and the most significant set bit in rWORD1 is in that
    byte.  */
 L(done0):
-	cntlzw	rTMP3, rWORD1
+	cntlzd	rTMP3, rWORD1
 	subf	rTMP1, rRTN, rSTR
-	srwi	rTMP3, rTMP3, 3
+	srdi	rTMP3, rTMP3, 3
 	add	rRTN, rTMP1, rTMP3
 	/* GKM FIXME: check high bound.  */
 	blr