From 295a453e2179b8e4e2b578e0176e76b9b574d7fc Mon Sep 17 00:00:00 2001 From: Steve Ellcey Date: Mon, 5 Jan 2015 15:06:42 -0800 Subject: 2015-01-05 Steve Ellcey * sysdeps/mips/memset.S (memset): Modify for mips32r6/mips64r6 to avoid using stl/str to align destination. --- ChangeLog | 5 +++++ sysdeps/mips/memset.S | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2afd2b297e..122d582224 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2015-01-05 Steve Ellcey + + * sysdeps/mips/memset.S (memset): Modify for mips32r6/mips64r6 + to avoid using stl/str to align destination. + 2015-01-05 Steve Ellcey * sysdeps/mips/memcpy.S: Add support for mips32r6/mips64r6. diff --git a/sysdeps/mips/memset.S b/sysdeps/mips/memset.S index abd73c2502..0a9b5cae46 100644 --- a/sysdeps/mips/memset.S +++ b/sysdeps/mips/memset.S @@ -54,6 +54,14 @@ # endif #endif +#if __mips_isa_rev > 5 +# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) +# undef PREFETCH_STORE_HINT +# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED +# endif +# define R6_CODE +#endif + /* Some asm.h files do not have the L macro definition. */ #ifndef L # if _MIPS_SIM == _ABIO32 @@ -72,6 +80,15 @@ # endif #endif +/* New R6 instructions that may not be in asm.h. */ +#ifndef PTR_LSA +# if _MIPS_SIM == _ABI64 +# define PTR_LSA dlsa +# else +# define PTR_LSA lsa +# endif +#endif + /* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE or PREFETCH_STORE_STREAMED offers a large performance advantage but PREPAREFORSTORE has some special restrictions to consider. @@ -231,11 +248,48 @@ LEAF(MEMSET_NAME) /* If the destination address is not aligned do a partial store to get it aligned. If it is already aligned just jump to L(aligned). */ L(set0): +#ifndef R6_CODE andi t2,a3,(NSIZE-1) /* word-unaligned address? */ beq t2,zero,L(aligned) /* t2 is the unalignment count */ PTR_SUBU a2,a2,t2 C_STHI a1,0(a0) PTR_ADDU a0,a0,t2 +#else /* R6_CODE */ + andi t2,a0,(NSIZE-1) + lapc t9,L(atable) + PTR_LSA t9,t2,t9,2 + jrc t9 +L(atable): + bc L(aligned) +# ifdef USE_DOUBLE + bc L(lb7) + bc L(lb6) + bc L(lb5) + bc L(lb4) +# endif + bc L(lb3) + bc L(lb2) + bc L(lb1) +L(lb7): + sb a1,6(a0) +L(lb6): + sb a1,5(a0) +L(lb5): + sb a1,4(a0) +L(lb4): + sb a1,3(a0) +L(lb3): + sb a1,2(a0) +L(lb2): + sb a1,1(a0) +L(lb1): + sb a1,0(a0) + + li t9,NSIZE + subu t2,t9,t2 + PTR_SUBU a2,a2,t2 + PTR_ADDU a0,a0,t2 +#endif /* R6_CODE */ L(aligned): /* If USE_DOUBLE is not set we may still want to align the data on a 16 @@ -286,8 +340,12 @@ L(loop16w): bgtz v1,L(skip_pref) nop #endif +#ifdef R6_CODE + PREFETCH_FOR_STORE (2, a0) +#else PREFETCH_FOR_STORE (4, a0) PREFETCH_FOR_STORE (5, a0) +#endif L(skip_pref): C_ST a1,UNIT(0)(a0) C_ST a1,UNIT(1)(a0) -- cgit 1.4.1