about summary refs log tree commit diff
path: root/sysdeps/alpha/stxcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/alpha/stxcpy.S')
-rw-r--r--sysdeps/alpha/stxcpy.S33
1 files changed, 17 insertions, 16 deletions
diff --git a/sysdeps/alpha/stxcpy.S b/sysdeps/alpha/stxcpy.S
index e381b704cd..49dd8e5c64 100644
--- a/sysdeps/alpha/stxcpy.S
+++ b/sysdeps/alpha/stxcpy.S
@@ -36,7 +36,7 @@ Cambridge, MA 02139, USA.  */
    Furthermore, v0, a3-a5, t11, and t12 are untouched.
 */
 
-/* This is generally scheduled for the EV5, but should still be pretty 
+/* This is generally scheduled for the EV5, but should still be pretty
    good for the EV4 too.  */
 
 #include <sysdep.h>
@@ -65,7 +65,7 @@ stxcpy_aligned:
 	lda	t2, -1		# e1    : build a mask against false zero
 	mskqh	t2, a1, t2	# e0    :   detection in the src word
 	mskqh	t1, a1, t3	# e0    :
-	ornot	t1, t2, t2	# .. e1 : 
+	ornot	t1, t2, t2	# .. e1 :
 	mskql	t0, a1, t0	# e0    : assemble the first output word
 	cmpbge	zero, t2, t7	# .. e1 : bits set iff null found
 	or	t0, t3, t1	# e0    :
@@ -99,9 +99,9 @@ $a_eos:
 	/* We're doing a partial word store and so need to combine
 	   our source and original destination words.  */
 	ldq_u	t0, 0(a0)	# e0    :
-	subq	t8, 1, t6	# .. e1 : 
+	subq	t8, 1, t6	# .. e1 :
 	zapnot	t1, t6, t1	# e0    : clear src bytes >= null
-	or	t8, t6, t7	# .. e1 : 
+	or	t8, t6, t7	# .. e1 :
 	zap	t0, t7, t0	# e0    : clear dst bytes <= null
 	or	t0, t1, t1	# e1    :
 
@@ -156,13 +156,13 @@ $u_head:
 	or	t1, t4, t1	# .. e1 :
 	mskqh	t1, a0, t1	# e0    :
 	or	t0, t1, t1	# e1    :
-	
+
 	or	t1, t6, t6	# e0    :
 	cmpbge	zero, t6, t7	# .. e1 :
 	lda	t6, -1		# e0    : for masking just below
 	bne	t7, $u_final	# .. e1 :
 
-	mskql	t6, a1, t6		# e0    : mask out the bits we have 
+	mskql	t6, a1, t6		# e0    : mask out the bits we have
 	or	t6, t2, t2		# e1    :   already extracted before
 	cmpbge	zero, t2, t7		# e0    :   testing eos
 	bne	t7, $u_late_head_exit	# .. e1 (zdb)
@@ -181,7 +181,7 @@ $u_head:
 
 	/* Unaligned copy main loop.  In order to avoid reading too much,
 	   the loop is structured to detect zeros in aligned source words.
-	   This has, unfortunately, effectively pulled half of a loop 
+	   This has, unfortunately, effectively pulled half of a loop
 	   iteration out into the head and half into the tail, but it does
 	   prevent nastiness from accumulating in the very thing we want
 	   to run as fast as possible.
@@ -207,7 +207,7 @@ $u_loop:
 
 	/* We've found a zero somewhere in the source word we just read.
 	   If it resides in the lower half, we have one (probably partial)
-	   word to write out, and if it resides in the upper half, we 
+	   word to write out, and if it resides in the upper half, we
 	   have one full and one partial word left to write out.
 
 	   On entry to this basic block:
@@ -234,7 +234,7 @@ $u_final:
 	negq	t7, t6		# e0    : isolate low bit set
 	and	t6, t7, t8	# e1    :
 
-	and	t8, 0x80, t6	# e0    : avoid dest word load if we can 
+	and	t8, 0x80, t6	# e0    : avoid dest word load if we can
 	bne	t6, 1f		# .. e1 (zdb)
 
 	ldq_u	t0, 0(a0)	# e0    :
@@ -256,7 +256,7 @@ $unaligned:
 	and	a0, 7, t4	# .. e1 : find dest misalignment
 	and	a1, 7, t5	# e0    : find src misalignment
 
-	/* Conditionally load the first destination word and a bytemask 
+	/* Conditionally load the first destination word and a bytemask
 	   with 0xff indicating that the destination byte is sacrosanct.  */
 
 	mov	zero, t0	# .. e1 :
@@ -290,18 +290,19 @@ $unaligned:
 
 	negq	t7, t6		# .. e1 : build bitmask of bytes <= zero
 	and	t6, t7, t8	# e0    :
-	nop			# .. e1 :
+	and	a1, 7, t5	# .. e1 :
 	subq	t8, 1, t6	# e0    :
 	or	t6, t8, t7	# e1    :
+	srl	t8, t5, t8	# e0    : adjust final null return value
 
-	zapnot	t2, t7, t2	# e0    : prepare source word; mirror changes
+	zapnot	t2, t7, t2	# .. e1 : prepare source word; mirror changes
 	and	t1, t2, t1	# e1    : to source validity mask
-	extql	t2, a1, t2	# e0    :
+	extql	t2, a1, t2	# .. e0 :
 	extql	t1, a1, t1	# e0    :
 
-	andnot	t0, t2, t0	# e0    : zero place for source to reside
+ 	andnot	t0, t2, t0	# .. e1 : zero place for source to reside
 	or	t0, t1, t1	# e1    : and put it there
-	stq_u	t1, 0(a0)	# e0    :
-	ret	(t9)		# .. e1 :
+	stq_u	t1, 0(a0)	# .. e0 :
+	ret	(t9)
 
 	.end __stxcpy