about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlexandre Oliva <aoliva@redhat.com>2003-03-21 19:44:24 +0000
committerAlexandre Oliva <aoliva@redhat.com>2003-03-21 19:44:24 +0000
commitee7b703635bf9c2df1c064a303eceda80d579b2d (patch)
tree34276b27957934b9d7f222feb508505a186665f6
parent5d13bfc0c0664750d8d5147f44c2bec2b6917a4e (diff)
downloadglibc-ee7b703635bf9c2df1c064a303eceda80d579b2d.tar.gz
glibc-ee7b703635bf9c2df1c064a303eceda80d579b2d.tar.xz
glibc-ee7b703635bf9c2df1c064a303eceda80d579b2d.zip
* sysdeps/mips/mips64/memcpy.S, sysdeps/mips/mips64/memset.S: New. * sysdeps/mips/memcpy.S, sysdeps/mips/memset.S: Update comments.
2003-03-21  Alexandre Oliva  <aoliva@redhat.com>

	* sysdeps/mips/mips64/memcpy.S, sysdeps/mips/mips64/memset.S: New.
	* sysdeps/mips/memcpy.S, sysdeps/mips/memset.S: Update comments.
-rw-r--r--ChangeLog5
-rw-r--r--sysdeps/mips/memcpy.S20
-rw-r--r--sysdeps/mips/memset.S14
-rw-r--r--sysdeps/mips/mips64/memcpy.S139
-rw-r--r--sysdeps/mips/mips64/memset.S91
5 files changed, 237 insertions, 32 deletions
diff --git a/ChangeLog b/ChangeLog
index 6bcdb22a56..aadb5079d4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2003-03-21  Alexandre Oliva  <aoliva@redhat.com>
+
+	* sysdeps/mips/mips64/memcpy.S, sysdeps/mips/mips64/memset.S: New.
+	* sysdeps/mips/memcpy.S, sysdeps/mips/memset.S: Update comments.
+
 2003-03-21  Roland McGrath  <roland@redhat.com>
 
 	* sysdeps/i386/i486/bits/atomic.h
diff --git a/sysdeps/mips/memcpy.S b/sysdeps/mips/memcpy.S
index c77f1b8d97..1e9130f6bd 100644
--- a/sysdeps/mips/memcpy.S
+++ b/sysdeps/mips/memcpy.S
@@ -21,12 +21,7 @@
 #include <endian.h>
 
 
-/* void *memcpy(void *s1, const void *s2, size_t n);
-
-   This routine could be optimized further for MIPS64, but this is left
-   as an exercise for the future. When it is done, the file should be kept
-   as a sisterfile to this one, and placed in the sysdeps/mips/mips64 
-   directory.  */
+/* void *memcpy(void *s1, const void *s2, size_t n);  */
 
 #if __BYTE_ORDER == __BIG_ENDIAN
 #  define LWHI	lwl		/* high part is left in big-endian	*/
@@ -40,19 +35,6 @@
 #  define SWLO	swl		/* low part is left in little-endian	*/
 #endif
 
-#ifndef t0
-# define t0 a4
-#endif
-#ifndef t1
-# define t1 a5
-#endif
-#ifndef t2
-# define t2 a6
-#endif
-#ifndef t3
-# define t3 a7
-#endif
-
 ENTRY (memcpy)
 	.set	noreorder
 
diff --git a/sysdeps/mips/memset.S b/sysdeps/mips/memset.S
index 46811340ec..b372d292f2 100644
--- a/sysdeps/mips/memset.S
+++ b/sysdeps/mips/memset.S
@@ -21,12 +21,7 @@
 #include <endian.h>
 
 
-/* void *memset(void *s, int c, size_t n).
-
-   This routine could be optimized further for MIPS64, but this is left
-   as an exercise for the future. When it is done, the file should be kept
-   as a sisterfile to this one, and placed in the sysdeps/mips/mips64 
-   directory.  */
+/* void *memset(void *s, int c, size_t n).  */
 
 #if __BYTE_ORDER == __BIG_ENDIAN
 # define SWHI	swl		/* high part is left in big-endian	*/
@@ -34,13 +29,6 @@
 # define SWHI	swr		/* high part is right in little-endian	*/
 #endif
 
-#ifndef t0
-# define t0 a4
-#endif
-#ifndef t1
-# define t1 a5
-#endif
-
 ENTRY (memset)
 	.set	noreorder
 
diff --git a/sysdeps/mips/mips64/memcpy.S b/sysdeps/mips/mips64/memcpy.S
new file mode 100644
index 0000000000..3dbb31f49a
--- /dev/null
+++ b/sysdeps/mips/mips64/memcpy.S
@@ -0,0 +1,139 @@
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
+   Ported to mips3 n32/n64 by Alexandre Oliva <aoliva@redhat.com>
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <endian.h>
+#include <sys/asm.h>
+
+
+/* void *memcpy(void *s1, const void *s2, size_t n);
+	
+   This could probably be optimized further.  */
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#  define LDHI	ldl		/* high part is left in big-endian	*/
+#  define SDHI	sdl		/* high part is left in big-endian	*/
+#  define LDLO	ldr		/* low part is right in big-endian	*/
+#  define SDLO	sdr		/* low part is right in big-endian	*/
+#else
+#  define LDHI	ldr		/* high part is right in little-endian	*/
+#  define SDHI	sdr		/* high part is right in little-endian	*/
+#  define LDLO	ldl		/* low part is left in little-endian	*/
+#  define SDLO	sdl		/* low part is left in little-endian	*/
+#endif
+
+ENTRY (memcpy)
+	.set	noreorder
+
+	slti	a4, a2, 8		# Less than 8?
+	bne	a4, zero, L(last8)
+	move	v0, a0			# Setup exit value before too late
+
+	xor	a4, a1, a0		# Find a0/a1 displacement
+	andi	a4, 0x7
+	bne	a4, zero, L(shift)	# Go handle the unaligned case
+	PTR_SUBU a5, zero, a1
+	andi	a5, 0x7			# a0/a1 are aligned, but are we
+	beq	a5, zero, L(chk8w)	#  starting in the middle of a word?
+	PTR_SUBU a2, a5
+	LDHI	a4, 0(a1)		# Yes we are... take care of that
+	PTR_ADDU a1, a5
+	SDHI	a4, 0(a0)
+	PTR_ADDU a0, a5
+
+L(chk8w):
+	andi	a4, a2, 0x3f		# 64 or more bytes left?
+	beq	a4, a2, L(chk1w)
+	PTR_SUBU a3, a2, a4		# Yes
+	PTR_ADDU a3, a1			# a3 = end address of loop
+	move	a2, a4			# a2 = what will be left after loop
+L(lop8w):	
+	ld	a4,  0(a1)		# Loop taking 8 words at a time
+	ld	a5,  8(a1)
+	ld	a6, 16(a1)
+	ld	a7, 24(a1)
+	ld	t4, 32(a1)
+	ld	t5, 40(a1)
+	ld	t6, 48(a1)
+	ld	t7, 56(a1)
+	PTR_ADDIU a0, 64
+	PTR_ADDIU a1, 64
+	sd	a4, -64(a0)
+	sd	a5, -56(a0)
+	sd	a6, -48(a0)
+	sd	a7, -40(a0)
+	sd	t4, -32(a0)
+	sd	t5, -24(a0)
+	sd	t6, -16(a0)
+	bne	a1, a3, L(lop8w)
+	sd	t7,  -8(a0)
+
+L(chk1w):
+	andi	a4, a2, 0x7		# 4 or more bytes left?
+	beq	a4, a2, L(last8)
+	PTR_SUBU a3, a2, a4		# Yes, handle them one word at a time
+	PTR_ADDU a3, a1			# a3 again end address
+	move	a2, a4
+L(lop1w):	
+	ld	a4, 0(a1)
+	PTR_ADDIU a0, 8
+	PTR_ADDIU a1, 8
+	bne	a1, a3, L(lop1w)
+	sd	a4, -8(a0)
+
+L(last8):
+	blez	a2, L(lst8e)		# Handle last 8 bytes, one at a time
+	PTR_ADDU a3, a2, a1
+L(lst8l):
+	lb	a4, 0(a1)
+	PTR_ADDIU a0, 1
+	PTR_ADDIU a1, 1
+	bne	a1, a3, L(lst8l)
+	sb	a4, -1(a0)
+L(lst8e):
+	jr	ra			# Bye, bye
+	nop
+
+L(shift):
+	PTR_SUBU a3, zero, a0		# Src and Dest unaligned 
+	andi	a3, 0x7			#  (unoptimized case...)
+	beq	a3, zero, L(shfa5)
+	PTR_SUBU a2, a3			# a2 = bytes left
+	LDHI	a4, 0(a1)		# Take care of first odd part
+	LDLO	a4, 7(a1)
+	PTR_ADDU a1, a3
+	SDHI	a4, 0(a0)
+	PTR_ADDU a0, a3
+L(shfa5):
+	andi	a4, a2, 0x7
+	PTR_SUBU a3, a2, a4
+	PTR_ADDU a3, a1
+L(shfth):
+	LDHI	a5, 0(a1)		# Limp through, word by word
+	LDLO	a5, 7(a1)
+	PTR_ADDIU a0, 8
+	PTR_ADDIU a1, 8
+	bne	a1, a3, L(shfth)
+	sd	a5, -8(a0)
+	b	L(last8)		# Handle anything which may be left
+	move	a2, a4
+
+	.set	reorder
+END (memcpy)
diff --git a/sysdeps/mips/mips64/memset.S b/sysdeps/mips/mips64/memset.S
new file mode 100644
index 0000000000..6a3b154bad
--- /dev/null
+++ b/sysdeps/mips/mips64/memset.S
@@ -0,0 +1,91 @@
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
+   Ported to mips3 n32/n64 by Alexandre Oliva <aoliva@redhat.com>
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <endian.h>
+#include <sys/asm.h>
+
+
+/* void *memset(void *s, int c, size_t n);
+	
+   This could probably be optimized further.  */
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+# define SDHI	sdl		/* high part is left in big-endian	*/
+#else
+# define SDHI	sdr		/* high part is right in little-endian	*/
+#endif
+
+ENTRY (memset)
+	.set	noreorder
+
+	slti	t5, a2, 8		# Less than 8?
+	bne	t5, zero, L(last8)
+	move	v0, a0			# Setup exit value before too late
+
+	beq	a1, zero, L(ueven)	# If zero pattern, no need to extend
+	andi	a1, 0xff		# Avoid problems with bogus arguments
+	dsll	t4, a1, 8
+	or	a1, t4
+	dsll	t4, a1, 16
+	or	a1, t4			# a1 is now pattern in full word
+	dsll	t4, a1, 32
+	or	a1, t4			# a1 is now pattern in double word
+
+L(ueven):
+	PTR_SUBU t4, zero, a0		# Unaligned address?
+	andi	t4, 0x7
+	beq	t4, zero, L(chkw)
+	PTR_SUBU a2, t4
+	SDHI	a1, 0(a0)		# Yes, handle first unaligned part
+	PTR_ADDU a0, t4			# Now both a0 and a2 are updated
+
+L(chkw):
+	andi	t4, a2, 0xf		# Enough left for one loop iteration?
+	beq	t4, a2, L(chkl)
+	PTR_SUBU a3, a2, t4
+	PTR_ADDU a3, a0			# a3 is last loop address +1
+	move	a2, t4			# a2 is now # of bytes left after loop
+L(loopw):
+	PTR_ADDIU a0, 16		# Handle 2 words pr. iteration
+	sd	a1, -16(a0)
+	bne	a0, a3, L(loopw)
+	sd	a1,  -8(a0)
+
+L(chkl):
+	andi	t4, a2, 0x8		# Check if there is at least a double
+	beq	t4, zero, L(last8)	#  word remaining after the loop
+	PTR_SUBU a2, t4
+	sd	a1, 0(a0)		# Yes...
+	PTR_ADDIU a0, 8
+
+L(last8):
+	blez	a2, L(exit)		# Handle last 8 bytes (if cnt>0)
+	PTR_ADDU a3, a2, a0		# a3 is last address +1
+L(lst8l):
+	PTR_ADDIU a0, 1
+	bne	a0, a3, L(lst8l)
+	sb	a1, -1(a0)
+L(exit):
+	j	ra			# Bye, bye
+	nop
+
+	.set	reorder
+END (memset)