about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog20
-rw-r--r--linuxthreads/ChangeLog6
-rw-r--r--nptl/ChangeLog11
-rw-r--r--nptl/sysdeps/i386/tls.h10
-rw-r--r--nptl/sysdeps/s390/tls.h8
-rw-r--r--nptl/sysdeps/sparc/tls.h8
-rw-r--r--nptl/sysdeps/x86_64/tls.h10
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_ceil.S8
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_ceilf.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_floor.S8
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_floorf.S7
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_lround.S17
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rint.S8
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rintf.S8
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_round.S14
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_roundf.S12
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_trunc.S9
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_truncf.S8
-rw-r--r--sysdeps/powerpc/powerpc32/memset.S90
19 files changed, 110 insertions, 159 deletions
diff --git a/ChangeLog b/ChangeLog
index ae5480f007..1f354e3dab 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2005-05-17  Alan Modra  <amodra@bigpond.net.au>
+
+	* sysdeps/powerpc/powerpc32/fpu/s_ceil.S (TWO52.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_ceilf.S (TWO23.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_floor.S (TWO52.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_floorf.S (TWO23.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_lround.S: Comment typo.
+	(NEGZERO.0, POINTFIVE.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_rint.S (TWO52.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_rintf.S (TWO23.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_round.S (__round): Code in .text.
+	(TWO52.0, POINTFIVE.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_roundf.S (__roundf): Code in .text.
+	(TWO23.0, POINTFIVE.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_trunc.S (__trunc): Code in .text.
+	(TWO52.0): Delete.
+	* sysdeps/powerpc/powerpc32/fpu/s_truncf.S (__truncf): Code in .text.
+	(TWO23.0): Delete.
+	* sysdeps/powerpc/powerpc32/memset.S (memset): Formatting.
+
 2005-05-19  Richard Henderson  <rth@redhat.com>
 
 	* sysdeps/unix/clock_gettime.c (clock_gettime): Fix typo around
diff --git a/linuxthreads/ChangeLog b/linuxthreads/ChangeLog
index 0079d0daf5..b2f3a08278 100644
--- a/linuxthreads/ChangeLog
+++ b/linuxthreads/ChangeLog
@@ -1,3 +1,9 @@
+2005-05-19  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/ia64/pspinlock.c (__pthread_spin_lock): Use
+	__sync_val_compare_and_swap, not explicit _si variant.
+	(__pthread_spin_trylock): Likewise.
+
 2005-05-05  Daniel Jacobowitz  <dan@codesourcery.com>
 
 	* sysdeps/unix/sysv/linux/i386/sysdep-cancel.h: Remove explicit
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index f8a2ac1a50..d3fdbe6946 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,14 @@
+2005-05-20  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/i386/tls.h (TLS_INIT_TCB_SIZE, TLS_INIT_TCB_ALIGN): Define to
+	size/alignment of struct pthread rather than tcbhead_t.
+	* sysdeps/x86_64/tls.h (TLS_INIT_TCB_SIZE, TLS_INIT_TCB_ALIGN):
+	Likewise.
+	* sysdeps/s390/tls.h (TLS_INIT_TCB_SIZE, TLS_INIT_TCB_ALIGN):
+	Likewise.
+	* sysdeps/sparc/tls.h (TLS_INIT_TCB_SIZE, TLS_INIT_TCB_ALIGN):
+	Likewise.
+
 2005-05-19  Richard Henderson  <rth@redhat.com>
 
 	* sysdeps/ia64/pthread_spin_lock.c (pthread_spin_lock): Use
diff --git a/nptl/sysdeps/i386/tls.h b/nptl/sysdeps/i386/tls.h
index 06def42a5b..c0331140f4 100644
--- a/nptl/sysdeps/i386/tls.h
+++ b/nptl/sysdeps/i386/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  nptl/i386 version.
-   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -102,11 +102,13 @@ union user_desc_init
 /* Get the thread descriptor definition.  */
 # include <nptl/descr.h>
 
-/* This is the size of the initial TCB.  */
-# define TLS_INIT_TCB_SIZE sizeof (tcbhead_t)
+/* This is the size of the initial TCB.  Can't be just sizeof (tcbhead_t),
+   because NPTL getpid, __libc_alloca_cutoff etc. need (almost) the whole
+   struct pthread even when not linked with -lpthread.  */
+# define TLS_INIT_TCB_SIZE sizeof (struct pthread)
 
 /* Alignment requirements for the initial TCB.  */
-# define TLS_INIT_TCB_ALIGN __alignof__ (tcbhead_t)
+# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread)
 
 /* This is the size of the TCB.  */
 # define TLS_TCB_SIZE sizeof (struct pthread)
diff --git a/nptl/sysdeps/s390/tls.h b/nptl/sysdeps/s390/tls.h
index e93f3d080c..060dde39df 100644
--- a/nptl/sysdeps/s390/tls.h
+++ b/nptl/sysdeps/s390/tls.h
@@ -81,11 +81,13 @@ typedef struct
 /* Get the thread descriptor definition.  */
 # include <nptl/descr.h>
 
-/* This is the size of the initial TCB.  */
-# define TLS_INIT_TCB_SIZE sizeof (tcbhead_t)
+/* This is the size of the initial TCB.  Can't be just sizeof (tcbhead_t),
+   because NPTL getpid, __libc_alloca_cutoff etc. need (almost) the whole
+   struct pthread even when not linked with -lpthread.  */
+# define TLS_INIT_TCB_SIZE sizeof (struct pthread)
 
 /* Alignment requirements for the initial TCB.  */
-# define TLS_INIT_TCB_ALIGN __alignof__ (tcbhead_t)
+# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread)
 
 /* This is the size of the TCB.  */
 # define TLS_TCB_SIZE sizeof (struct pthread)
diff --git a/nptl/sysdeps/sparc/tls.h b/nptl/sysdeps/sparc/tls.h
index 8f54a0bb23..54b54859d9 100644
--- a/nptl/sysdeps/sparc/tls.h
+++ b/nptl/sysdeps/sparc/tls.h
@@ -69,11 +69,13 @@ typedef struct
 
 register struct pthread *__thread_self __asm__("%g7");
 
-/* This is the size of the initial TCB.  */
-# define TLS_INIT_TCB_SIZE sizeof (tcbhead_t)
+/* This is the size of the initial TCB.  Can't be just sizeof (tcbhead_t),
+   because NPTL getpid, __libc_alloca_cutoff etc. need (almost) the whole
+   struct pthread even when not linked with -lpthread.  */
+# define TLS_INIT_TCB_SIZE sizeof (struct pthread)
 
 /* Alignment requirements for the initial TCB.  */
-# define TLS_INIT_TCB_ALIGN __alignof__ (tcbhead_t)
+# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread)
 
 /* This is the size of the TCB.  */
 # define TLS_TCB_SIZE sizeof (struct pthread)
diff --git a/nptl/sysdeps/x86_64/tls.h b/nptl/sysdeps/x86_64/tls.h
index 12da9dc81f..3fc9b384aa 100644
--- a/nptl/sysdeps/x86_64/tls.h
+++ b/nptl/sysdeps/x86_64/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  nptl/x86_64 version.
-   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -82,11 +82,13 @@ typedef struct
 # endif
 #endif
 
-/* This is the size of the initial TCB.  */
-# define TLS_INIT_TCB_SIZE sizeof (tcbhead_t)
+/* This is the size of the initial TCB.  Can't be just sizeof (tcbhead_t),
+   because NPTL getpid, __libc_alloca_cutoff etc. need (almost) the whole
+   struct pthread even when not linked with -lpthread.  */
+# define TLS_INIT_TCB_SIZE sizeof (struct pthread)
 
 /* Alignment requirements for the initial TCB.  */
-# define TLS_INIT_TCB_ALIGN __alignof__ (tcbhead_t)
+# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread)
 
 /* This is the size of the TCB.  */
 # define TLS_TCB_SIZE sizeof (struct pthread)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_ceil.S b/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
index 22cf76e54c..b76e9ff421 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
@@ -19,14 +19,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 3
-	.type	TWO52.0,@object
-	.size	TWO52.0,8
-TWO52.0:
-	.long 0x43300000
-	.long 0
-
 	.section	.rodata.cst8,"aM",@progbits,8
 	.align 3
 .LC0:	/* 2**52 */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
index e7a72186c9..9315d8d2df 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
@@ -19,13 +19,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 2
-	.type	TWO23.0,@object
-	.size	TWO23.0,4
-TWO23.0:
-	.long 0x4b000000
-
 	.section	.rodata.cst4,"aM",@progbits,4
 	.align 2
 .LC0:	/* 2**23 */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_floor.S b/sysdeps/powerpc/powerpc32/fpu/s_floor.S
index 812ea7ced2..6b79ca4f17 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_floor.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_floor.S
@@ -19,14 +19,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 3
-	.type	TWO52.0,@object
-	.size	TWO52.0,8
-TWO52.0:
-	.long 0x43300000
-	.long 0
-
 	.section	.rodata.cst8,"aM",@progbits,8
 	.align 3
 .LC0:	/* 2**52 */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_floorf.S b/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
index ead41d4657..8ee0644ac9 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
@@ -19,13 +19,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 2
-	.type	TWO23.0,@object
-	.size	TWO23.0,4
-TWO23.0:
-	.long 0x4b000000
-
 	.section	.rodata.cst4,"aM",@progbits,4
 	.align 2
 .LC0:	/* 2**23 */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/sysdeps/powerpc/powerpc32/fpu/s_lround.S
index dcb97e373b..ddac896e87 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_lround.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_lround.S
@@ -19,19 +19,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 3
-	.type	NEGZERO.0,@object
-	.size	NEGZERO.0,8
-NEGZERO.0:
-	.long 0x00000000
-	.long 0
-	.type	POINTFIVE.0,@object
-	.size	POINTFIVE.0,8
-POINTFIVE.0:
-	.long 0x3fe00000
-	.long 0
-
 	.section	.rodata.cst8,"aM",@progbits,8
 	.align 3
 .LC0:	/* 0.0 */
@@ -78,8 +65,8 @@ ENTRY (__lround)
 .L9:
 	fctiwz	fp2,fp1		/* Convert To Integer DW lround toward 0.  */
 	stfd	fp2,-8(r1)
-	nop	/* Insure the following load is in a different dispatch group */
-	nop	/* to avoid pipe stall on POWER4&5.  */
+	nop	/* Ensure the following load is in a different dispatch  */
+	nop	/* group to avoid pipe stall on POWER4&5.  */
 	nop
 	lwz	r3,-4(r1)
 	blr
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
index fa02dbc59c..5d65f12581 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
@@ -22,14 +22,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 3
-	.type	TWO52.0,@object
-	.size	TWO52.0,8
-TWO52.0:
-	.long 0x43300000
-	.long 0
-
 	.section	.rodata.cst8,"aM",@progbits,8
 	.align 3
 .LC0:	/* 2**52 */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
index 7825951268..d02bd066b8 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
@@ -19,14 +19,6 @@
 
 #include <sysdep.h>
 
-
-	.section	.rodata
-	.align 2
-	.type	TWO23.0,@object
-	.size	TWO23.0,4
-TWO23.0:
-	.long 0x4b000000
-
 	.section	.rodata.cst4,"aM",@progbits,4
 	.align 2
 .LC0:	/* 2**23 */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_round.S b/sysdeps/powerpc/powerpc32/fpu/s_round.S
index 39eab232f6..7eec8c641e 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_round.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_round.S
@@ -19,19 +19,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 3
-	.type	TWO52.0,@object
-	.size	TWO52.0,8
-TWO52.0:
-	.long 0x43300000
-	.long 0
-	.type	POINTFIVE.0,@object
-	.size	POINTFIVE.0,8
-POINTFIVE.0:
-	.long 0x3fe00000
-	.long 0
-
 	.section	.rodata.cst8,"aM",@progbits,8
 	.align 3
 .LC0:	/* 2**52 */
@@ -51,6 +38,7 @@ POINTFIVE.0:
    "Round toward Zero" mode and round by adding +-0.5 before rounding
    to the integer value.  */
 
+	.section	".text"
 ENTRY (__round)
 	mffs	fp11		/* Save current FPU rounding mode.  */
 #ifdef SHARED
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
index a9b42f0170..856be6f115 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
@@ -19,17 +19,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 2
-	.type	TWO23.0,@object
-	.size	TWO23.0,4
-TWO23.0:
-	.long 0x4b000000
-	.type	POINTFIVE.0,@object
-	.size	POINTFIVE.0,4
-POINTFIVE.0:
-	.long 0x3f000000
-
 	.section	.rodata.cst4,"aM",@progbits,4
 	.align 2
 .LC0:	/* 2**23 */
@@ -47,6 +36,7 @@ POINTFIVE.0:
    "Round toward Zero" mode and round by adding +-0.5 before rounding
    to the integer value.  */
 
+	.section	".text"
 ENTRY (__roundf )
 	mffs	fp11		/* Save current FPU rounding mode.  */
 #ifdef SHARED
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_trunc.S b/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
index 08acc00cb2..59c71470be 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
@@ -19,14 +19,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 3
-	.type	TWO52.0,@object
-	.size	TWO52.0,8
-TWO52.0:
-	.long 0x43300000
-	.long 0
-
 	.section	.rodata.cst8,"aM",@progbits,8
 	.align 3
 .LC0:	/* 2**52 */
@@ -40,6 +32,7 @@ TWO52.0:
    We set "round toward Zero" mode and trunc by adding +-2**52 then
    subtracting +-2**52.  */
 
+	.section	".text"
 ENTRY (__trunc)
 	mffs	fp11		/* Save current FPU rounding mode.  */
 #ifdef SHARED
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_truncf.S b/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
index 3b6fe731b4..5275c69d29 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
@@ -19,13 +19,6 @@
 
 #include <sysdep.h>
 
-	.section	.rodata
-	.align 2
-	.type	TWO23.0,@object
-	.size	TWO23.0,2
-TWO23.0:
-	.long 0x4b000000
-
 	.section	.rodata.cst4,"aM",@progbits,4
 	.align 2
 .LC0:	/* 2**23 */
@@ -38,6 +31,7 @@ TWO23.0:
    We set "round toward Zero" mode and trunc by adding +-2**23 then
    subtracting +-2**23.  */
 
+	.section	".text"
 ENTRY (__truncf)
 	mffs	fp11		/* Save current FPU rounding mode.  */
 #ifdef SHARED
diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
index 53f1143320..4c0edc8e45 100644
--- a/sysdeps/powerpc/powerpc32/memset.S
+++ b/sysdeps/powerpc/powerpc32/memset.S
@@ -140,7 +140,7 @@ L(nondcbz):
 
 /* We can't use dcbz here as we don't know the cache line size.  We can
    use "data cache block touch for store", which is safe.  */
-L(c3):	dcbtst rNEG64, rMEMP
+L(c3):	dcbtst	rNEG64, rMEMP
 	stw	rCHR, -4(rMEMP)
 	stw	rCHR, -8(rMEMP)
 	stw	rCHR, -12(rMEMP)
@@ -166,7 +166,7 @@ L(cloopdone):
 	add	rMEMP, rMEMP, rALIGN
 	b	L(medium_tail2)	/* 72nd instruction from .align */
 
-	.align 5
+	.align	5
 	nop
 /* Clear cache lines of memory in 128-byte chunks.
    This code is optimized for processors with 32-byte cache lines.
@@ -200,7 +200,7 @@ L(zloop):
 	beqlr	cr5
 	b	L(medium_tail2)
 
-	.align 5
+	.align	5
 L(small):
 /* Memset of 4 bytes or less.  */
 	cmplwi	cr5, rLEN, 1
@@ -218,7 +218,7 @@ L(small):
 	blr
 
 /* Memset of 0-31 bytes.  */
-	.align 5
+	.align	5
 L(medium):
 	cmplwi	cr1, rLEN, 16
 L(medium_tail2):
@@ -258,70 +258,70 @@ L(medium_28t):
 
 L(checklinesize):
 #ifdef SHARED
-	mflr rTMP
+	mflr	rTMP
 /* If the remaining length is less the 32 bytes then don't bother getting
-	 the cache line size.  */
+   the cache line size.  */
 	beq	L(medium)
 /* Establishes GOT addressability so we can load __cache_line_size
    from static. This value was set from the aux vector during startup.  */
-	bl   _GLOBAL_OFFSET_TABLE_@local-4
-	mflr rGOT
-	lwz	 rGOT,__cache_line_size@got(rGOT)
-	lwz	 rCLS,0(rGOT)
-	mtlr rTMP
+	bl	_GLOBAL_OFFSET_TABLE_@local-4
+	mflr	rGOT
+	lwz	rGOT,__cache_line_size@got(rGOT)
+	lwz	rCLS,0(rGOT)
+	mtlr	rTMP
 #else
 /* Load __cache_line_size from static. This value was set from the
    aux vector during startup.  */
-	lis	 rCLS,__cache_line_size@ha
+	lis	rCLS,__cache_line_size@ha
 /* If the remaining length is less the 32 bytes then don't bother getting
-	 the cache line size.  */
+   the cache line size.  */
 	beq	L(medium)
-	lwz  rCLS,__cache_line_size@l(rCLS)
+	lwz	rCLS,__cache_line_size@l(rCLS)
 #endif
 
-/*If the cache line size was not set then goto to L(nondcbz), which is
-	safe for any cache line size.  */
-	cmplwi cr1,rCLS,0
+/* If the cache line size was not set then goto to L(nondcbz), which is
+   safe for any cache line size.  */
+	cmplwi	cr1,rCLS,0
 	beq	cr1,L(nondcbz)
 
 /* If the cache line size is 32 bytes then goto to L(zloopstart),
-	 which is coded specificly for 32-byte lines (and 601).  */
-	cmplwi cr1,rCLS,32
+   which is coded specificly for 32-byte lines (and 601).  */
+	cmplwi	cr1,rCLS,32
 	beq	cr1,L(zloopstart)
 
 /* Now we know the cache line size and it is not 32-bytes.  However
-	 we may not yet be aligned to the cache line and may have a partial
-	 line to fill.  Touch it 1st to fetch the cache line.  */
-	dcbtst 0,rMEMP
+   we may not yet be aligned to the cache line and may have a partial
+   line to fill.  Touch it 1st to fetch the cache line.  */
+	dcbtst	0,rMEMP
 
-	addi rCLM,rCLS,-1
+	addi	rCLM,rCLS,-1
 L(getCacheAligned):
-	cmplwi cr1,rLEN,32
-	and. rTMP,rCLM,rMEMP
-	blt	 cr1,L(handletail32)
-	beq	 L(cacheAligned)
+	cmplwi	cr1,rLEN,32
+	and.	rTMP,rCLM,rMEMP
+	blt	cr1,L(handletail32)
+	beq	L(cacheAligned)
 /* We are not aligned to start of a cache line yet.  Store 32-byte
    of data and test again.  */
-	addi rMEMP,rMEMP,32
-	addi rLEN,rLEN,-32
-	stw	 rCHR,-32(rMEMP)
-	stw	 rCHR,-28(rMEMP)
-	stw	 rCHR,-24(rMEMP)
-	stw	 rCHR,-20(rMEMP)
-	stw	 rCHR,-16(rMEMP)
-	stw	 rCHR,-12(rMEMP)
-	stw	 rCHR,-8(rMEMP)
-	stw	 rCHR,-4(rMEMP)
-	b	 L(getCacheAligned)
+	addi	rMEMP,rMEMP,32
+	addi	rLEN,rLEN,-32
+	stw	rCHR,-32(rMEMP)
+	stw	rCHR,-28(rMEMP)
+	stw	rCHR,-24(rMEMP)
+	stw	rCHR,-20(rMEMP)
+	stw	rCHR,-16(rMEMP)
+	stw	rCHR,-12(rMEMP)
+	stw	rCHR,-8(rMEMP)
+	stw	rCHR,-4(rMEMP)
+	b	L(getCacheAligned)
 
 /* Now we are aligned to the cache line and can use dcbz.  */
 L(cacheAligned):
-	cmplw cr1,rLEN,rCLS
-	blt	 cr1,L(handletail32)
-	dcbz 0,rMEMP
-	subf rLEN,rCLS,rLEN
-	add	 rMEMP,rMEMP,rCLS
-	b	 L(cacheAligned)
+	cmplw	cr1,rLEN,rCLS
+	blt	cr1,L(handletail32)
+	dcbz	0,rMEMP
+	subf	rLEN,rCLS,rLEN
+	add	rMEMP,rMEMP,rCLS
+	b	L(cacheAligned)
 
 /* We are here because; the cache line size was set, it was not
    32-bytes, and the remainder (rLEN) is now less than the actual cache
@@ -329,7 +329,7 @@ L(cacheAligned):
    store the remaining bytes.  */
 L(handletail32):
 	clrrwi.	rALIGN, rLEN, 5
-	b		L(nondcbz)
+	b	L(nondcbz)
 
 END (BP_SYM (memset))
 libc_hidden_builtin_def (memset)