about summary refs log tree commit diff
path: root/sysdeps/powerpc/powerpc32/memset.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/powerpc32/memset.S')
-rw-r--r--sysdeps/powerpc/powerpc32/memset.S97
1 files changed, 52 insertions, 45 deletions
diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
index 53f1143320..f09c294674 100644
--- a/sysdeps/powerpc/powerpc32/memset.S
+++ b/sysdeps/powerpc/powerpc32/memset.S
@@ -140,7 +140,7 @@ L(nondcbz):
 
 /* We can't use dcbz here as we don't know the cache line size.  We can
    use "data cache block touch for store", which is safe.  */
-L(c3):	dcbtst rNEG64, rMEMP
+L(c3):	dcbtst	rNEG64, rMEMP
 	stw	rCHR, -4(rMEMP)
 	stw	rCHR, -8(rMEMP)
 	stw	rCHR, -12(rMEMP)
@@ -166,7 +166,7 @@ L(cloopdone):
 	add	rMEMP, rMEMP, rALIGN
 	b	L(medium_tail2)	/* 72nd instruction from .align */
 
-	.align 5
+	.align	5
 	nop
 /* Clear cache lines of memory in 128-byte chunks.
    This code is optimized for processors with 32-byte cache lines.
@@ -200,7 +200,7 @@ L(zloop):
 	beqlr	cr5
 	b	L(medium_tail2)
 
-	.align 5
+	.align	5
 L(small):
 /* Memset of 4 bytes or less.  */
 	cmplwi	cr5, rLEN, 1
@@ -218,7 +218,7 @@ L(small):
 	blr
 
 /* Memset of 0-31 bytes.  */
-	.align 5
+	.align	5
 L(medium):
 	cmplwi	cr1, rLEN, 16
 L(medium_tail2):
@@ -258,70 +258,77 @@ L(medium_28t):
 
 L(checklinesize):
 #ifdef SHARED
-	mflr rTMP
+	mflr	rTMP
 /* If the remaining length is less the 32 bytes then don't bother getting
-	 the cache line size.  */
+   the cache line size.  */
 	beq	L(medium)
 /* Establishes GOT addressability so we can load __cache_line_size
    from static. This value was set from the aux vector during startup.  */
-	bl   _GLOBAL_OFFSET_TABLE_@local-4
-	mflr rGOT
-	lwz	 rGOT,__cache_line_size@got(rGOT)
-	lwz	 rCLS,0(rGOT)
-	mtlr rTMP
+# ifdef HAVE_ASM_PPC_REL16
+	bcl	20,31,1f
+1:	mflr	rGOT
+	addis	rGOT,rGOT,__cache_line_size-1b@ha
+	lwz	rCLS,__cache_line_size-1b@l(rGOT)
+# else
+	bl	_GLOBAL_OFFSET_TABLE_@local-4
+	mflr	rGOT
+	lwz	rGOT,__cache_line_size@got(rGOT)
+	lwz	rCLS,0(rGOT)
+# endif
+	mtlr	rTMP
 #else
 /* Load __cache_line_size from static. This value was set from the
    aux vector during startup.  */
-	lis	 rCLS,__cache_line_size@ha
+	lis	rCLS,__cache_line_size@ha
 /* If the remaining length is less the 32 bytes then don't bother getting
-	 the cache line size.  */
+   the cache line size.  */
 	beq	L(medium)
-	lwz  rCLS,__cache_line_size@l(rCLS)
+	lwz	rCLS,__cache_line_size@l(rCLS)
 #endif
 
-/*If the cache line size was not set then goto to L(nondcbz), which is
-	safe for any cache line size.  */
-	cmplwi cr1,rCLS,0
+/* If the cache line size was not set then goto to L(nondcbz), which is
+   safe for any cache line size.  */
+	cmplwi	cr1,rCLS,0
 	beq	cr1,L(nondcbz)
 
 /* If the cache line size is 32 bytes then goto to L(zloopstart),
-	 which is coded specificly for 32-byte lines (and 601).  */
-	cmplwi cr1,rCLS,32
+   which is coded specificly for 32-byte lines (and 601).  */
+	cmplwi	cr1,rCLS,32
 	beq	cr1,L(zloopstart)
 
 /* Now we know the cache line size and it is not 32-bytes.  However
-	 we may not yet be aligned to the cache line and may have a partial
-	 line to fill.  Touch it 1st to fetch the cache line.  */
-	dcbtst 0,rMEMP
+   we may not yet be aligned to the cache line and may have a partial
+   line to fill.  Touch it 1st to fetch the cache line.  */
+	dcbtst	0,rMEMP
 
-	addi rCLM,rCLS,-1
+	addi	rCLM,rCLS,-1
 L(getCacheAligned):
-	cmplwi cr1,rLEN,32
-	and. rTMP,rCLM,rMEMP
-	blt	 cr1,L(handletail32)
-	beq	 L(cacheAligned)
+	cmplwi	cr1,rLEN,32
+	and.	rTMP,rCLM,rMEMP
+	blt	cr1,L(handletail32)
+	beq	L(cacheAligned)
 /* We are not aligned to start of a cache line yet.  Store 32-byte
    of data and test again.  */
-	addi rMEMP,rMEMP,32
-	addi rLEN,rLEN,-32
-	stw	 rCHR,-32(rMEMP)
-	stw	 rCHR,-28(rMEMP)
-	stw	 rCHR,-24(rMEMP)
-	stw	 rCHR,-20(rMEMP)
-	stw	 rCHR,-16(rMEMP)
-	stw	 rCHR,-12(rMEMP)
-	stw	 rCHR,-8(rMEMP)
-	stw	 rCHR,-4(rMEMP)
-	b	 L(getCacheAligned)
+	addi	rMEMP,rMEMP,32
+	addi	rLEN,rLEN,-32
+	stw	rCHR,-32(rMEMP)
+	stw	rCHR,-28(rMEMP)
+	stw	rCHR,-24(rMEMP)
+	stw	rCHR,-20(rMEMP)
+	stw	rCHR,-16(rMEMP)
+	stw	rCHR,-12(rMEMP)
+	stw	rCHR,-8(rMEMP)
+	stw	rCHR,-4(rMEMP)
+	b	L(getCacheAligned)
 
 /* Now we are aligned to the cache line and can use dcbz.  */
 L(cacheAligned):
-	cmplw cr1,rLEN,rCLS
-	blt	 cr1,L(handletail32)
-	dcbz 0,rMEMP
-	subf rLEN,rCLS,rLEN
-	add	 rMEMP,rMEMP,rCLS
-	b	 L(cacheAligned)
+	cmplw	cr1,rLEN,rCLS
+	blt	cr1,L(handletail32)
+	dcbz	0,rMEMP
+	subf	rLEN,rCLS,rLEN
+	add	rMEMP,rMEMP,rCLS
+	b	L(cacheAligned)
 
 /* We are here because; the cache line size was set, it was not
    32-bytes, and the remainder (rLEN) is now less than the actual cache
@@ -329,7 +336,7 @@ L(cacheAligned):
    store the remaining bytes.  */
 L(handletail32):
 	clrrwi.	rALIGN, rLEN, 5
-	b		L(nondcbz)
+	b	L(nondcbz)
 
 END (BP_SYM (memset))
 libc_hidden_builtin_def (memset)