diff options
author | Steve Ellcey <sellcey@mips.com> | 2015-10-16 14:32:49 -0700 |
---|---|---|
committer | Steve Ellcey <sellcey@mips.com> | 2015-10-16 14:32:49 -0700 |
commit | 45d8d77ad396419c1b95f696c83bf97ceada0f3a (patch) | |
tree | 2042b034a706cf694038582a8e0f6fd513eab971 /sysdeps | |
parent | b0c3e04b71dabc701527bb07871c97a19689656d (diff) | |
download | glibc-45d8d77ad396419c1b95f696c83bf97ceada0f3a.tar.gz glibc-45d8d77ad396419c1b95f696c83bf97ceada0f3a.tar.xz glibc-45d8d77ad396419c1b95f696c83bf97ceada0f3a.zip |
Make performance improvement to MIPS memcpy for small copies.
* sysdeps/mips/memcpy.S (memcpy): Add word copies for small aligned data.
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/mips/memcpy.S | 45 |
1 files changed, 43 insertions, 2 deletions
diff --git a/sysdeps/mips/memcpy.S b/sysdeps/mips/memcpy.S index c85935b811..6f63405333 100644 --- a/sysdeps/mips/memcpy.S +++ b/sysdeps/mips/memcpy.S @@ -295,7 +295,7 @@ L(memcpy): * size, copy dst pointer to v0 for the return value. */ slti t2,a2,(2 * NSIZE) - bne t2,zero,L(lastb) + bne t2,zero,L(lasts) #if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH) move v0,zero #else @@ -546,7 +546,7 @@ L(chkw): */ L(chk1w): andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */ - beq a2,t8,L(lastb) + beq a2,t8,L(lastw) PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */ PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */ @@ -558,6 +558,20 @@ L(wordCopy_loop): bne a0,a3,L(wordCopy_loop) C_ST REG3,UNIT(-1)(a0) +/* If we have been copying double words, see if we can copy a single word + before doing byte copies. We can have, at most, one word to copy. */ + +L(lastw): +#ifdef USE_DOUBLE + andi t8,a2,3 /* a2 is the remainder past 4 byte chunks. */ + beq t8,a2,L(lastb) + lw REG3,0(a1) + sw REG3,0(a0) + PTR_ADDIU a0,a0,4 + PTR_ADDIU a1,a1,4 + move a2,t8 +#endif + /* Copy the last 8 (or 16) bytes */ L(lastb): blez a2,L(leave) @@ -572,6 +586,33 @@ L(leave): j ra nop +/* We jump here with a memcpy of less than 8 or 16 bytes, depending on + whether or not USE_DOUBLE is defined. Instead of just doing byte + copies, check the alignment and size and use lw/sw if possible. + Otherwise, do byte copies. */ + +L(lasts): + andi t8,a2,3 + beq t8,a2,L(lastb) + + andi t9,a0,3 + bne t9,zero,L(lastb) + andi t9,a1,3 + bne t9,zero,L(lastb) + + PTR_SUBU a3,a2,t8 + PTR_ADDU a3,a0,a3 + +L(wcopy_loop): + lw REG3,0(a1) + PTR_ADDIU a0,a0,4 + PTR_ADDIU a1,a1,4 + bne a0,a3,L(wcopy_loop) + sw REG3,-4(a0) + + b L(lastb) + move a2,t8 + #ifndef R6_CODE /* * UNALIGNED case, got here with a3 = "negu a0" |