about summary refs log tree commit diff
path: root/sysdeps/powerpc/memset.S
diff options
context:
space:
mode:
authorGreg McGary <greg@mcgary.org>2000-06-06 22:37:40 +0000
committerGreg McGary <greg@mcgary.org>2000-06-06 22:37:40 +0000
commit1d280d9f1e4d4c889d8123663a81b49256cf6fd6 (patch)
treed75c35fc1e267377ac0957f4b521ae6bdfbab680 /sysdeps/powerpc/memset.S
parent019357d23488c773cdef1dd077cc8915b6012d52 (diff)
downloadglibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar.gz
glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar.xz
glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.zip
* sysdeps/powerpc/memset.S: Define & use symbolic register names.
Use C comments throughout.  Line up operands column with tabs. 
* sysdeps/powerpc/strchr.S: Likewise. 
* sysdeps/powerpc/strcmp.S: Likewise. 
* sysdeps/powerpc/strcpy.S: Likewise. 
* sysdeps/powerpc/strlen.S: Likewise.
	* sysdeps/powerpc/memset.S: Define & use symbolic register names.
	Use C comments throughout.  Line up operands column with tabs.
	* sysdeps/powerpc/strchr.S: Likewise.
	* sysdeps/powerpc/strcmp.S: Likewise.
	* sysdeps/powerpc/strcpy.S: Likewise.
	* sysdeps/powerpc/strlen.S: Likewise.
Diffstat (limited to 'sysdeps/powerpc/memset.S')
-rw-r--r--sysdeps/powerpc/memset.S273
1 files changed, 142 insertions, 131 deletions
diff --git a/sysdeps/powerpc/memset.S b/sysdeps/powerpc/memset.S
index 1b95bc7591..c48c0af7c8 100644
--- a/sysdeps/powerpc/memset.S
+++ b/sysdeps/powerpc/memset.S
@@ -19,181 +19,192 @@
 
 #include <sysdep.h>
 
-EALIGN(memset,5,1)
 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
    Returns 's'.
 
    The memset is done in three sizes: byte (8 bits), word (32 bits),
    cache line (256 bits). There is a special case for setting cache lines
-   to 0, to take advantage of the dcbz instruction.
-   r6:	current address we are storing at
-   r7:	number of bytes we are setting now (when aligning)  */
+   to 0, to take advantage of the dcbz instruction.  */
+
+EALIGN (memset, 5, 1)
+
+#define rTMP	r0
+#define	rRTN	r3	/* initial value of 1st argument */
+#define rCHR	r4	/* char to set in each byte */
+#define rLEN	r5	/* length of region to set */
+#define rMEMP	r6	/* address at which we are storing */
+#define rALIGN	r7	/* number of bytes we are setting now (when aligning) */
+#define rMEMP2	r8
+
+#define rPOS32	r7	/* constant +32 for clearing with dcbz */
+#define rNEG64	r8	/* constant -64 for clearing with dcbz */
+#define rNEG32	r9	/* constant -32 for clearing with dcbz */
 
 /* take care of case for size <= 4  */
-	cmplwi cr1,r5,4
-	andi.  r7,r3,3
-	mr     r6,r3
-	ble-   cr1,L(small)
+	cmplwi	cr1, rLEN, 4
+	andi.	rALIGN, rRTN, 3
+	mr	rMEMP, rRTN
+	ble-	cr1, L(small)
 /* align to word boundary  */
-	cmplwi cr5,r5,31
-	rlwimi r4,r4,8,16,23
-	beq+   L(aligned)		# 8th instruction from .align
-	mtcrf  0x01,r3
-	subfic r7,r7,4
-	add    r6,r6,r7
-	sub    r5,r5,r7
-	bf+    31,L(g0)
-	stb    r4,0(r3)
-	bt     30,L(aligned)
-L(g0):	sth    r4,-2(r6)		#  16th instruction from .align
+	cmplwi	cr5, rLEN, 31
+	rlwimi	rCHR, rCHR, 8, 16, 23
+	beq+	L(aligned)	/* 8th instruction from .align */
+	mtcrf	0x01, rRTN
+	subfic	rALIGN, rALIGN, 4
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	bf+	31, L(g0)
+	stb	rCHR, 0(rRTN)
+	bt	30, L(aligned)
+L(g0):	sth	rCHR, -2(rMEMP)	/* 16th instruction from .align */
 /* take care of case for size < 31 */
 L(aligned):
-	mtcrf  0x01,r5
-	rlwimi r4,r4,16,0,15
-	ble    cr5,L(medium)
+	mtcrf	0x01, rLEN
+	rlwimi	rCHR, rCHR, 16, 0, 15
+	ble	cr5, L(medium)
 /* align to cache line boundary...  */
-	andi.  r7,r6,0x1C
-	subfic r7,r7,0x20
-	beq    L(caligned)
-	mtcrf  0x01,r7
-	add    r6,r6,r7
-	sub    r5,r5,r7
-	cmplwi cr1,r7,0x10
-	mr     r8,r6
-	bf     28,L(a1)
-	stw    r4,-4(r8)
-	stwu   r4,-8(r8)
-L(a1):	blt    cr1,L(a2)
-	stw    r4,-4(r8)	# 32nd instruction from .align
-	stw    r4,-8(r8)
-	stw    r4,-12(r8)
-	stwu   r4,-16(r8)
-L(a2):	bf     29,L(caligned)
-	stw    r4,-4(r8)
+	andi.	rALIGN, rMEMP, 0x1C
+	subfic	rALIGN, rALIGN, 0x20
+	beq	L(caligned)
+	mtcrf	0x01, rALIGN
+	add	rMEMP, rMEMP, rALIGN
+	sub	rLEN, rLEN, rALIGN
+	cmplwi	cr1, rALIGN, 0x10
+	mr	rMEMP2, rMEMP
+	bf	28, L(a1)
+	stw	rCHR, -4(rMEMP2)
+	stwu	rCHR, -8(rMEMP2)
+L(a1):	blt	cr1, L(a2)
+	stw	rCHR, -4(rMEMP2) /* 32nd instruction from .align */
+	stw	rCHR, -8(rMEMP2)
+	stw	rCHR, -12(rMEMP2)
+	stwu	rCHR, -16(rMEMP2)
+L(a2):	bf	29, L(caligned)
+	stw	rCHR, -4(rMEMP2)
 /* now aligned to a cache line.  */
 L(caligned):
-	cmplwi cr1,r4,0
-	clrrwi. r7,r5,5
-	mtcrf  0x01,r5		# 40th instruction from .align
-	beq    cr1,L(zloopstart) # special case for clearing memory using dcbz
-	srwi   r0,r7,5
-	mtctr  r0
-	beq    L(medium)	# we may not actually get to do a full line
-	clrlwi. r5,r5,27
-	add    r6,r6,r7
-	li     r8,-0x40
-	bdz    L(cloopdone)	# 48th instruction from .align
+	cmplwi	cr1, rCHR, 0
+	clrrwi.	rALIGN, rLEN, 5
+	mtcrf	0x01, rLEN	/* 40th instruction from .align */
+	beq	cr1, L(zloopstart) /* special case for clearing memory using dcbz */
+	srwi	rTMP, rALIGN, 5
+	mtctr	rTMP
+	beq	L(medium)	/* we may not actually get to do a full line */
+	clrlwi.	rLEN, rLEN, 27
+	add	rMEMP, rMEMP, rALIGN
+	li	rNEG64, -0x40
+	bdz	L(cloopdone)	/* 48th instruction from .align */
 
-L(c3):	dcbz   r8,r6
-	stw    r4,-4(r6)
-	stw    r4,-8(r6)
-	stw    r4,-12(r6)
-	stw    r4,-16(r6)
-	nop			# let 601 fetch last 4 instructions of loop
-	stw    r4,-20(r6)
-	stw    r4,-24(r6)	# 56th instruction from .align
-	nop			# let 601 fetch first 8 instructions of loop
-	stw    r4,-28(r6)
-	stwu   r4,-32(r6)
-	bdnz   L(c3)
+L(c3):	dcbz	rNEG64, rMEMP
+	stw	rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	stw	rCHR, -12(rMEMP)
+	stw	rCHR, -16(rMEMP)
+	nop			/* let 601 fetch last 4 instructions of loop */
+	stw	rCHR, -20(rMEMP)
+	stw	rCHR, -24(rMEMP) /* 56th instruction from .align */
+	nop			/* let 601 fetch first 8 instructions of loop */
+	stw	rCHR, -28(rMEMP)
+	stwu	rCHR, -32(rMEMP)
+	bdnz	L(c3)
 L(cloopdone):
-	stw    r4,-4(r6)
-	stw    r4,-8(r6)
-	stw    r4,-12(r6)
-	stw    r4,-16(r6)	# 64th instruction from .align
-	stw    r4,-20(r6)
-	cmplwi cr1,r5,16
-	stw    r4,-24(r6)
-	stw    r4,-28(r6)
-	stwu   r4,-32(r6)
+	stw	rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	stw	rCHR, -12(rMEMP)
+	stw	rCHR, -16(rMEMP) /* 64th instruction from .align */
+	stw	rCHR, -20(rMEMP)
+	cmplwi	cr1, rLEN, 16
+	stw	rCHR, -24(rMEMP)
+	stw	rCHR, -28(rMEMP)
+	stwu	rCHR, -32(rMEMP)
 	beqlr
-	add    r6,r6,r7
-	b      L(medium_tail2)	# 72nd instruction from .align
+	add	rMEMP, rMEMP, rALIGN
+	b	L(medium_tail2)	/* 72nd instruction from .align */
 
 	.align 5
 	nop
 /* Clear lines of memory in 128-byte chunks.  */
 L(zloopstart):
-	clrlwi r5,r5,27
-	mtcrf  0x02,r7
-	srwi.  r0,r7,7
-	mtctr  r0
-	li     r7,0x20
-	li     r8,-0x40
-	cmplwi cr1,r5,16	# 8
-	bf     26,L(z0)
-	dcbz   0,r6
-	addi   r6,r6,0x20
-L(z0):	li     r9,-0x20
-	bf     25,L(z1)
-	dcbz   0,r6
-	dcbz   r7,r6
-	addi   r6,r6,0x40	# 16
-L(z1):	cmplwi cr5,r5,0
-	beq    L(medium)
+	clrlwi	rLEN, rLEN, 27
+	mtcrf	0x02, rALIGN
+	srwi.	rTMP, rALIGN, 7
+	mtctr	rTMP
+	li	rPOS32, 0x20
+	li	rNEG64, -0x40
+	cmplwi	cr1, rLEN, 16	/* 8 */
+	bf	26, L(z0)
+	dcbz	0, rMEMP
+	addi	rMEMP, rMEMP, 0x20
+L(z0):	li	rNEG32, -0x20
+	bf	25, L(z1)
+	dcbz	0, rMEMP
+	dcbz	rPOS32, rMEMP
+	addi	rMEMP, rMEMP, 0x40 /* 16 */
+L(z1):	cmplwi	cr5, rLEN, 0
+	beq	L(medium)
 L(zloop):
-	dcbz   0,r6
-	dcbz   r7,r6
-	addi   r6,r6,0x80
-	dcbz   r8,r6
-	dcbz   r9,r6
-	bdnz   L(zloop)
-	beqlr  cr5
-	b      L(medium_tail2)
+	dcbz	0, rMEMP
+	dcbz	rPOS32, rMEMP
+	addi	rMEMP, rMEMP, 0x80
+	dcbz	rNEG64, rMEMP
+	dcbz	rNEG32, rMEMP
+	bdnz	L(zloop)
+	beqlr	cr5
+	b	L(medium_tail2)
 
 	.align 5
 L(small):
 /* Memset of 4 bytes or less.  */
-	cmplwi cr5,r5,1
-	cmplwi cr1,r5,3
-	bltlr  cr5
-	stb    r4,0(r6)
-	beqlr  cr5
+	cmplwi	cr5, rLEN, 1
+	cmplwi	cr1, rLEN, 3
+	bltlr	cr5
+	stb	rCHR, 0(rMEMP)
+	beqlr	cr5
 	nop
-	stb    r4,1(r6)
-	bltlr  cr1
-	stb    r4,2(r6)
-	beqlr  cr1
+	stb	rCHR, 1(rMEMP)
+	bltlr	cr1
+	stb	rCHR, 2(rMEMP)
+	beqlr	cr1
 	nop
-	stb    r4,3(r6)
+	stb	rCHR, 3(rMEMP)
 	blr
 
 /* Memset of 0-31 bytes.  */
 	.align 5
 L(medium):
-	cmplwi cr1,r5,16
+	cmplwi	cr1, rLEN, 16
 L(medium_tail2):
-	add    r6,r6,r5
+	add	rMEMP, rMEMP, rLEN
 L(medium_tail):
-	bt-    31,L(medium_31t)
-	bt-    30,L(medium_30t)
+	bt-	31, L(medium_31t)
+	bt-	30, L(medium_30t)
 L(medium_30f):
-	bt-    29,L(medium_29t)
+	bt-	29, L(medium_29t)
 L(medium_29f):
-	bge-   cr1,L(medium_27t)
-	bflr-  28
-	stw    r4,-4(r6)		# 8th instruction from .align
-	stw    r4,-8(r6)
+	bge-	cr1, L(medium_27t)
+	bflr-	28
+	stw	rCHR, -4(rMEMP)	/* 8th instruction from .align */
+	stw	rCHR, -8(rMEMP)
 	blr
 
 L(medium_31t):
-	stbu   r4,-1(r6)
-	bf-    30,L(medium_30f)
+	stbu	rCHR, -1(rMEMP)
+	bf-	30, L(medium_30f)
 L(medium_30t):
-	sthu   r4,-2(r6)
-	bf-    29,L(medium_29f)
+	sthu	rCHR, -2(rMEMP)
+	bf-	29, L(medium_29f)
 L(medium_29t):
-	stwu   r4,-4(r6)
-	blt-   cr1,L(medium_27f)	# 16th instruction from .align
+	stwu	rCHR, -4(rMEMP)
+	blt-	cr1, L(medium_27f) /* 16th instruction from .align */
 L(medium_27t):
-	stw    r4,-4(r6)
-	stw    r4,-8(r6)
-	stw    r4,-12(r6)
-	stwu   r4,-16(r6)
+	stw	rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
+	stw	rCHR, -12(rMEMP)
+	stwu	rCHR, -16(rMEMP)
 L(medium_27f):
-	bflr-  28
+	bflr-	28
 L(medium_28t):
-	stw    r4,-4(r6)
-	stw    r4,-8(r6)
+	stw	rCHR, -4(rMEMP)
+	stw	rCHR, -8(rMEMP)
 	blr
 END(memset)