about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSteve Ellcey <sellcey@mips.com>2015-01-05 15:06:42 -0800
committerSteve Ellcey <sellcey@mips.com>2015-01-05 15:06:42 -0800
commit295a453e2179b8e4e2b578e0176e76b9b574d7fc (patch)
tree180a4e8e3e1666e218dfecda6696af28312ab845
parent882c4b9f1de8b0175eae6edbd48c9a7b80b63fb9 (diff)
downloadglibc-295a453e2179b8e4e2b578e0176e76b9b574d7fc.tar.gz
glibc-295a453e2179b8e4e2b578e0176e76b9b574d7fc.tar.xz
glibc-295a453e2179b8e4e2b578e0176e76b9b574d7fc.zip
2015-01-05 Steve Ellcey <sellcey@imgtec.com>
	* sysdeps/mips/memset.S (memset): Modify for mips32r6/mips64r6
	to avoid using stl/str to align destination.
-rw-r--r--ChangeLog5
-rw-r--r--sysdeps/mips/memset.S58
2 files changed, 63 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 2afd2b297e..122d582224 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2015-01-05  Steve Ellcey  <sellcey@imgtec.com>
 
+	* sysdeps/mips/memset.S (memset): Modify for mips32r6/mips64r6
+	to avoid using stl/str to align destination.
+
+2015-01-05  Steve Ellcey  <sellcey@imgtec.com>
+
 	* sysdeps/mips/memcpy.S: Add support for mips32r6/mips64r6.
 
 2015-01-05  Joseph Myers  <joseph@codesourcery.com>
diff --git a/sysdeps/mips/memset.S b/sysdeps/mips/memset.S
index abd73c2502..0a9b5cae46 100644
--- a/sysdeps/mips/memset.S
+++ b/sysdeps/mips/memset.S
@@ -54,6 +54,14 @@
 # endif
 #endif
 
+#if __mips_isa_rev > 5
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+#  undef PREFETCH_STORE_HINT
+#  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
+# endif
+# define R6_CODE
+#endif
+
 /* Some asm.h files do not have the L macro definition.  */
 #ifndef L
 # if _MIPS_SIM == _ABIO32
@@ -72,6 +80,15 @@
 # endif
 #endif
 
+/* New R6 instructions that may not be in asm.h.  */
+#ifndef PTR_LSA
+# if _MIPS_SIM == _ABI64
+#  define PTR_LSA        dlsa
+# else
+#  define PTR_LSA        lsa
+# endif
+#endif
+
 /* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
    or PREFETCH_STORE_STREAMED offers a large performance advantage
    but PREPAREFORSTORE has some special restrictions to consider.
@@ -231,11 +248,48 @@ LEAF(MEMSET_NAME)
 /* If the destination address is not aligned do a partial store to get it
    aligned.  If it is already aligned just jump to L(aligned).  */
 L(set0):
+#ifndef R6_CODE
 	andi	t2,a3,(NSIZE-1)		/* word-unaligned address?          */
 	beq	t2,zero,L(aligned)	/* t2 is the unalignment count      */
 	PTR_SUBU a2,a2,t2
 	C_STHI	a1,0(a0)
 	PTR_ADDU a0,a0,t2
+#else /* R6_CODE */
+	andi	t2,a0,(NSIZE-1)
+	lapc	t9,L(atable)
+	PTR_LSA	t9,t2,t9,2
+	jrc	t9
+L(atable):
+	bc	L(aligned)
+# ifdef USE_DOUBLE
+	bc	L(lb7)
+	bc	L(lb6)
+	bc	L(lb5)
+	bc	L(lb4)
+# endif
+	bc	L(lb3)
+	bc	L(lb2)
+	bc	L(lb1)
+L(lb7):
+	sb	a1,6(a0)
+L(lb6):
+	sb	a1,5(a0)
+L(lb5):
+	sb	a1,4(a0)
+L(lb4):
+	sb	a1,3(a0)
+L(lb3):
+	sb	a1,2(a0)
+L(lb2):
+	sb	a1,1(a0)
+L(lb1):
+	sb	a1,0(a0)
+
+	li	t9,NSIZE
+	subu	t2,t9,t2
+	PTR_SUBU a2,a2,t2
+	PTR_ADDU a0,a0,t2
+#endif /* R6_CODE */
 
 L(aligned):
 /* If USE_DOUBLE is not set we may still want to align the data on a 16
@@ -286,8 +340,12 @@ L(loop16w):
 	bgtz	v1,L(skip_pref)
 	nop
 #endif
+#ifdef R6_CODE
+	PREFETCH_FOR_STORE (2, a0)
+#else
 	PREFETCH_FOR_STORE (4, a0)
 	PREFETCH_FOR_STORE (5, a0)
+#endif
 L(skip_pref):
 	C_ST	a1,UNIT(0)(a0)
 	C_ST	a1,UNIT(1)(a0)