From 8f9fb0000e345b16902311b2d265f83b329e69c5 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Fri, 21 Mar 2003 21:39:39 +0000 Subject: * sysdeps/mips/mips64/memcpy.S: Fix porting bug that broke unaligned copying of 8-15 bytes. From Chris Demetriou . Fix label names. * sysdeps/mips/mips64/memset.S: Fix label names. Make similar change as to memcpy.S. * sysdeps/mips/memcpy.S: Formatting changes. * sysdeps/mips/memset.S: Likewise. 2003-03-21 Alexandre Oliva * sysdeps/mips/mips64/memcpy.S: Fix porting bug that broke unaligned copying of 8-15 bytes. From Chris Demetriou . Fix label names. * sysdeps/mips/mips64/memset.S: Fix label names. Make similar change as to memcpy.S. * sysdeps/mips/memcpy.S: Formatting changes. * sysdeps/mips/memset.S: Likewise. --- ChangeLog | 10 ++++++++++ sysdeps/mips/memcpy.S | 32 +++++++++++++++++++++----------- sysdeps/mips/memset.S | 21 ++++++++++++++------- sysdeps/mips/mips64/memcpy.S | 30 +++++++++++++++--------------- sysdeps/mips/mips64/memset.S | 16 ++++++++-------- 5 files changed, 68 insertions(+), 41 deletions(-) diff --git a/ChangeLog b/ChangeLog index bd574a21f5..618c3755f4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2003-03-21 Alexandre Oliva + + * sysdeps/mips/mips64/memcpy.S: Fix porting bug that broke + unaligned copying of 8-15 bytes. From Chris Demetriou + . Fix label names. + * sysdeps/mips/mips64/memset.S: Fix label names. Make similar + change as to memcpy.S. + * sysdeps/mips/memcpy.S: Formatting changes. + * sysdeps/mips/memset.S: Likewise. + 2003-03-21 Roland McGrath * sysdeps/arm/sysdep.h (CALL_MCOUNT): Add trailing semicolon. diff --git a/sysdeps/mips/memcpy.S b/sysdeps/mips/memcpy.S index 1e9130f6bd..05d209739e 100644 --- a/sysdeps/mips/memcpy.S +++ b/sysdeps/mips/memcpy.S @@ -54,12 +54,14 @@ ENTRY (memcpy) SWHI t0, 0(a0) addu a0, t1 -L(chk8w): andi t0, a2, 0x1f # 32 or more bytes left? +L(chk8w): + andi t0, a2, 0x1f # 32 or more bytes left? beq t0, a2, L(chk1w) subu a3, a2, t0 # Yes addu a3, a1 # a3 = end address of loop move a2, t0 # a2 = what will be left after loop -L(lop8w): lw t0, 0(a1) # Loop taking 8 words at a time +L(lop8w): + lw t0, 0(a1) # Loop taking 8 words at a time lw t1, 4(a1) lw t2, 8(a1) lw t3, 12(a1) @@ -79,28 +81,34 @@ L(lop8w): lw t0, 0(a1) # Loop taking 8 words at a time bne a1, a3, L(lop8w) sw t7, -4(a0) -L(chk1w): andi t0, a2, 0x3 # 4 or more bytes left? +L(chk1w): + andi t0, a2, 0x3 # 4 or more bytes left? beq t0, a2, L(last8) subu a3, a2, t0 # Yes, handle them one word at a time addu a3, a1 # a3 again end address move a2, t0 -L(lop1w): lw t0, 0(a1) +L(lop1w): + lw t0, 0(a1) addiu a0, 4 addiu a1, 4 bne a1, a3, L(lop1w) sw t0, -4(a0) -L(last8): blez a2, L(lst8e) # Handle last 8 bytes, one at a time +L(last8): + blez a2, L(lst8e) # Handle last 8 bytes, one at a time addu a3, a2, a1 -L(lst8l): lb t0, 0(a1) +L(lst8l): + lb t0, 0(a1) addiu a0, 1 addiu a1, 1 bne a1, a3, L(lst8l) sb t0, -1(a0) -L(lst8e): jr ra # Bye, bye +L(lst8e): + jr ra # Bye, bye nop -L(shift): subu a3, zero, a0 # Src and Dest unaligned +L(shift): + subu a3, zero, a0 # Src and Dest unaligned andi a3, 0x3 # (unoptimized case...) beq a3, zero, L(shft1) subu a2, a3 # a2 = bytes left @@ -109,16 +117,18 @@ L(shift): subu a3, zero, a0 # Src and Dest unaligned addu a1, a3 SWHI t0, 0(a0) addu a0, a3 -L(shft1): andi t0, a2, 0x3 +L(shft1): + andi t0, a2, 0x3 subu a3, a2, t0 addu a3, a1 -L(shfth): LWHI t1, 0(a1) # Limp through, word by word +L(shfth): + LWHI t1, 0(a1) # Limp through, word by word LWLO t1, 3(a1) addiu a0, 4 addiu a1, 4 bne a1, a3, L(shfth) sw t1, -4(a0) - b L(last8) # Handle anything which may be left + b L(last8) # Handle anything which may be left move a2, t0 .set reorder diff --git a/sysdeps/mips/memset.S b/sysdeps/mips/memset.S index b372d292f2..f120123544 100644 --- a/sysdeps/mips/memset.S +++ b/sysdeps/mips/memset.S @@ -43,35 +43,42 @@ ENTRY (memset) sll t0, a1, 16 or a1, t0 # a1 is now pattern in full word -L(ueven): subu t0, zero, a0 # Unaligned address? +L(ueven): + subu t0, zero, a0 # Unaligned address? andi t0, 0x3 beq t0, zero, L(chkw) subu a2, t0 SWHI a1, 0(a0) # Yes, handle first unaligned part addu a0, t0 # Now both a0 and a2 are updated -L(chkw): andi t0, a2, 0x7 # Enough left for one loop iteration? +L(chkw): + andi t0, a2, 0x7 # Enough left for one loop iteration? beq t0, a2, L(chkl) subu a3, a2, t0 addu a3, a0 # a3 is last loop address +1 move a2, t0 # a2 is now # of bytes left after loop -L(loopw): addiu a0, 8 # Handle 2 words pr. iteration +L(loopw): + addiu a0, 8 # Handle 2 words pr. iteration sw a1, -8(a0) bne a0, a3, L(loopw) sw a1, -4(a0) -L(chkl): andi t0, a2, 0x4 # Check if there is at least a full +L(chkl): + andi t0, a2, 0x4 # Check if there is at least a full beq t0, zero, L(last8) # word remaining after the loop subu a2, t0 sw a1, 0(a0) # Yes... addiu a0, 4 -L(last8): blez a2, L(exit) # Handle last 8 bytes (if cnt>0) +L(last8): + blez a2, L(exit) # Handle last 8 bytes (if cnt>0) addu a3, a2, a0 # a3 is last address +1 -L(lst8l): addiu a0, 1 +L(lst8l): + addiu a0, 1 bne a0, a3, L(lst8l) sb a1, -1(a0) -L(exit): j ra # Bye, bye +L(exit): + j ra # Bye, bye nop .set reorder diff --git a/sysdeps/mips/mips64/memcpy.S b/sysdeps/mips/mips64/memcpy.S index 3dbb31f49a..c4ba7a8f51 100644 --- a/sysdeps/mips/mips64/memcpy.S +++ b/sysdeps/mips/mips64/memcpy.S @@ -42,8 +42,8 @@ ENTRY (memcpy) .set noreorder - slti a4, a2, 8 # Less than 8? - bne a4, zero, L(last8) + slti a4, a2, 16 # Less than 16? + bne a4, zero, L(last16) move v0, a0 # Setup exit value before too late xor a4, a1, a0 # Find a0/a1 displacement @@ -86,53 +86,53 @@ L(lop8w): sd t7, -8(a0) L(chk1w): - andi a4, a2, 0x7 # 4 or more bytes left? - beq a4, a2, L(last8) - PTR_SUBU a3, a2, a4 # Yes, handle them one word at a time + andi a4, a2, 0x7 # 8 or more bytes left? + beq a4, a2, L(last16) + PTR_SUBU a3, a2, a4 # Yes, handle them one dword at a time PTR_ADDU a3, a1 # a3 again end address move a2, a4 -L(lop1w): +L(lop1w): ld a4, 0(a1) PTR_ADDIU a0, 8 PTR_ADDIU a1, 8 bne a1, a3, L(lop1w) sd a4, -8(a0) -L(last8): - blez a2, L(lst8e) # Handle last 8 bytes, one at a time +L(last16): + blez a2, L(lst16e) # Handle last 16 bytes, one at a time PTR_ADDU a3, a2, a1 -L(lst8l): +L(lst16l): lb a4, 0(a1) PTR_ADDIU a0, 1 PTR_ADDIU a1, 1 - bne a1, a3, L(lst8l) + bne a1, a3, L(lst16l) sb a4, -1(a0) -L(lst8e): +L(lst16e): jr ra # Bye, bye nop L(shift): PTR_SUBU a3, zero, a0 # Src and Dest unaligned andi a3, 0x7 # (unoptimized case...) - beq a3, zero, L(shfa5) + beq a3, zero, L(shft1) PTR_SUBU a2, a3 # a2 = bytes left LDHI a4, 0(a1) # Take care of first odd part LDLO a4, 7(a1) PTR_ADDU a1, a3 SDHI a4, 0(a0) PTR_ADDU a0, a3 -L(shfa5): +L(shft1): andi a4, a2, 0x7 PTR_SUBU a3, a2, a4 PTR_ADDU a3, a1 L(shfth): - LDHI a5, 0(a1) # Limp through, word by word + LDHI a5, 0(a1) # Limp through, dword by dword LDLO a5, 7(a1) PTR_ADDIU a0, 8 PTR_ADDIU a1, 8 bne a1, a3, L(shfth) sd a5, -8(a0) - b L(last8) # Handle anything which may be left + b L(last16) # Handle anything which may be left move a2, a4 .set reorder diff --git a/sysdeps/mips/mips64/memset.S b/sysdeps/mips/mips64/memset.S index 6a3b154bad..d6e1790fbe 100644 --- a/sysdeps/mips/mips64/memset.S +++ b/sysdeps/mips/mips64/memset.S @@ -36,8 +36,8 @@ ENTRY (memset) .set noreorder - slti t5, a2, 8 # Less than 8? - bne t5, zero, L(last8) + slti t5, a2, 16 # Less than 16? + bne t5, zero, L(last16) move v0, a0 # Setup exit value before too late beq a1, zero, L(ueven) # If zero pattern, no need to extend @@ -64,24 +64,24 @@ L(chkw): PTR_ADDU a3, a0 # a3 is last loop address +1 move a2, t4 # a2 is now # of bytes left after loop L(loopw): - PTR_ADDIU a0, 16 # Handle 2 words pr. iteration + PTR_ADDIU a0, 16 # Handle 2 dwords pr. iteration sd a1, -16(a0) bne a0, a3, L(loopw) sd a1, -8(a0) L(chkl): andi t4, a2, 0x8 # Check if there is at least a double - beq t4, zero, L(last8) # word remaining after the loop + beq t4, zero, L(last16) # word remaining after the loop PTR_SUBU a2, t4 sd a1, 0(a0) # Yes... PTR_ADDIU a0, 8 -L(last8): - blez a2, L(exit) # Handle last 8 bytes (if cnt>0) +L(last16): + blez a2, L(exit) # Handle last 16 bytes (if cnt>0) PTR_ADDU a3, a2, a0 # a3 is last address +1 -L(lst8l): +L(lst16l): PTR_ADDIU a0, 1 - bne a0, a3, L(lst8l) + bne a0, a3, L(lst16l) sb a1, -1(a0) L(exit): j ra # Bye, bye -- cgit 1.4.1