about summary refs log tree commit diff
path: root/sysdeps/sparc/sparc64/memset.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/sparc/sparc64/memset.S')
-rw-r--r--sysdeps/sparc/sparc64/memset.S316
1 files changed, 316 insertions, 0 deletions
diff --git a/sysdeps/sparc/sparc64/memset.S b/sysdeps/sparc/sparc64/memset.S
new file mode 100644
index 0000000000..8106a5c999
--- /dev/null
+++ b/sysdeps/sparc/sparc64/memset.S
@@ -0,0 +1,316 @@
+/* Set a block of memory to some byte value.
+   For UltraSPARC.
+   Copyright (C) 1996, 97, 98, 99 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller (davem@caip.rutgers.edu) and
+                  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+#include <asm/asi.h>
+#ifndef XCC
+#define XCC xcc
+#define USE_BPR
+#endif
+#define FPRS_FEF	4
+
+#define SET_BLOCKS(base, offset, source)		\
+	stx		source, [base - offset - 0x18];	\
+	stx		source, [base - offset - 0x10];	\
+	stx		source, [base - offset - 0x08];	\
+	stx		source, [base - offset - 0x00];
+
+	/* Well, memset is a lot easier to get right than bcopy... */
+	.text
+	.align		32
+ENTRY(memset)
+#ifndef USE_BPR
+	srl		%o1, 0, %o1
+	mov		%o0, %g3
+#endif
+	brz,a,pt	%o1, 50f
+#ifndef USE_BPR
+	 srl		%o2, 0, %o1
+#else
+	 mov		%o2, %o1
+#endif
+	cmp		%o2, 7
+#ifndef USE_BPR
+	srl		%o2, 0, %o2
+#endif
+	bleu,pn		%XCC, 17f
+	 andcc		%o0, 3, %g5
+	be,pt		%xcc, 4f
+	 and		%o1, 0xff, %o1
+	cmp		%g5, 3
+	be,pn		%xcc, 2f
+	 stb		%o1, [%o0 + 0x00]
+	cmp		%g5, 2
+	be,pt		%xcc, 2f
+	 stb		%o1, [%o0 + 0x01]
+	stb		%o1, [%o0 + 0x02]
+2:	sub		%g5, 4, %g5
+	sub		%o0, %g5, %o0
+	add		%o2, %g5, %o2
+4:	sllx		%o1, 8, %g1
+	andcc		%o0, 4, %g0
+	or		%o1, %g1, %o1
+	sllx		%o1, 16, %g1
+	or		%o1, %g1, %o1
+	be,pt		%xcc, 2f
+	 sllx		%o1, 32, %g1
+	stw		%o1, [%o0]
+	sub		%o2, 4, %o2
+	add		%o0, 4, %o0
+2:	cmp		%o2, 128
+	or		%o1, %g1, %o1
+	blu,pn		%xcc, 9f
+	 andcc		%o0, 0x38, %g5
+	be,pn		%icc, 6f
+	 mov		64, %o5
+	andcc		%o0, 8, %g0
+	be,pn		%icc, 1f
+	 sub		%o5, %g5, %o5
+	stx		%o1, [%o0]
+	add		%o0, 8, %o0
+1:	andcc		%o5, 16, %g0
+	be,pn		%icc, 1f
+	 sub		%o2, %o5, %o2
+	stx		%o1, [%o0]
+	stx		%o1, [%o0 + 8]
+	add		%o0, 16, %o0
+1:	andcc		%o5, 32, %g0
+	be,pn		%icc, 7f
+	 andncc		%o2, 0x3f, %o3
+	stw		%o1, [%o0]
+	stw		%o1, [%o0 + 4]
+	stw		%o1, [%o0 + 8]
+	stw		%o1, [%o0 + 12]
+	stw		%o1, [%o0 + 16]
+	stw		%o1, [%o0 + 20]
+	stw		%o1, [%o0 + 24]
+	stw		%o1, [%o0 + 28]
+	add		%o0, 32, %o0
+7:	be,pn		%xcc, 9f
+	 nop
+	ldd		[%o0 - 8], %f0
+18:	wr		%g0, ASI_BLK_P, %asi
+	membar		#StoreStore | #LoadStore
+	andcc		%o3, 0xc0, %g5
+	and		%o2, 0x3f, %o2
+	fmovd		%f0, %f2
+	fmovd		%f0, %f4
+	andn		%o3, 0xff, %o3
+	fmovd		%f0, %f6
+	cmp		%g5, 64
+	fmovd		%f0, %f8
+	fmovd		%f0, %f10
+	fmovd		%f0, %f12
+	brz,pn		%g5, 10f
+	 fmovd		%f0, %f14
+	be,pn		%icc, 2f
+	 stda		%f0, [%o0 + 0x00] %asi
+	cmp		%g5, 128
+	be,pn		%icc, 2f
+	 stda		%f0, [%o0 + 0x40] %asi
+	stda		%f0, [%o0 + 0x80] %asi
+2:	brz,pn		%o3, 12f
+	 add		%o0, %g5, %o0
+10:	stda		%f0, [%o0 + 0x00] %asi
+	stda		%f0, [%o0 + 0x40] %asi
+	stda		%f0, [%o0 + 0x80] %asi
+	stda		%f0, [%o0 + 0xc0] %asi
+11:	subcc		%o3, 256, %o3
+	bne,pt		%xcc, 10b
+	 add		%o0, 256, %o0
+12:	wr		%g0, FPRS_FEF, %fprs
+	membar		#StoreLoad | #StoreStore
+9:	andcc		%o2, 0x78, %g5
+	be,pn		%xcc, 13f
+	 andcc		%o2, 7, %o2
+14:	rd		%pc, %o4
+	srl		%g5, 1, %o3
+	sub		%o4, %o3, %o4
+	jmpl		%o4 + (13f - 14b), %g0
+	 add		%o0, %g5, %o0
+12:	SET_BLOCKS	(%o0, 0x68, %o1)
+	SET_BLOCKS	(%o0, 0x48, %o1)
+	SET_BLOCKS	(%o0, 0x28, %o1)
+	SET_BLOCKS	(%o0, 0x08, %o1)
+13:	be,pn		%xcc, 8f
+	 andcc		%o2, 4, %g0
+	be,pn		%xcc, 1f
+	 andcc		%o2, 2, %g0
+	stw		%o1, [%o0]
+	add		%o0, 4, %o0
+1:	be,pn		%xcc, 1f
+	 andcc		%o2, 1, %g0
+	sth		%o1, [%o0]
+	add		%o0, 2, %o0
+1:	bne,a,pn	%xcc, 8f
+	 stb		%o1, [%o0]
+8:	retl
+	 mov		%g3, %o0
+17:	brz,pn		%o2, 0f
+8:	 add		%o0, 1, %o0
+	subcc		%o2, 1, %o2
+	bne,pt		%xcc, 8b
+	 stb		%o1, [%o0 - 1]
+0:	retl
+	 mov		%g3, %o0
+
+6:	stx		%o1, [%o0]
+	andncc		%o2, 0x3f, %o3
+	be,pn		%xcc, 9b
+	 nop
+	ba,pt		%xcc, 18b
+	 ldd		[%o0], %f0
+END(memset)
+
+#define ZERO_BLOCKS(base, offset, source)		\
+	stx		source, [base - offset - 0x38];	\
+	stx		source, [base - offset - 0x30];	\
+	stx		source, [base - offset - 0x28];	\
+	stx		source, [base - offset - 0x20];	\
+	stx		source, [base - offset - 0x18];	\
+	stx		source, [base - offset - 0x10];	\
+	stx		source, [base - offset - 0x08];	\
+	stx		source, [base - offset - 0x00];
+
+	.text
+	.align		32
+ENTRY(__bzero)
+#ifndef USE_BPR
+	srl		%o1, 0, %o1
+#endif
+	mov		%o0, %g3
+50:	cmp		%o1, 7
+	bleu,pn		%xcc, 17f
+	 andcc		%o0, 3, %o2
+	be,a,pt		%xcc, 4f
+	 andcc		%o0, 4, %g0
+	cmp		%o2, 3
+	be,pn		%xcc, 2f
+	 stb		%g0, [%o0 + 0x00]
+	cmp		%o2, 2
+	be,pt		%xcc, 2f
+	 stb		%g0, [%o0 + 0x01]
+	stb		%g0, [%o0 + 0x02]
+2:	sub		%o2, 4, %o2
+	sub		%o0, %o2, %o0
+	add		%o1, %o2, %o1
+	andcc		%o0, 4, %g0
+4:	be,pt		%xcc, 2f
+	 cmp		%o1, 128
+	stw		%g0, [%o0]
+	sub		%o1, 4, %o1
+	add		%o0, 4, %o0
+2:	blu,pn		%xcc, 9f
+	 andcc		%o0, 0x38, %o2
+	be,pn		%icc, 6f
+	 mov		64, %o5
+	andcc		%o0, 8, %g0
+	be,pn		%icc, 1f
+	 sub		%o5, %o2, %o5
+	stx		%g0, [%o0]
+	add		%o0, 8, %o0
+1:	andcc		%o5, 16, %g0
+	be,pn		%icc, 1f
+	 sub		%o1, %o5, %o1
+	stx		%g0, [%o0]
+	stx		%g0, [%o0 + 8]
+	add		%o0, 16, %o0
+1:	andcc		%o5, 32, %g0
+	be,pn		%icc, 7f
+	 andncc		%o1, 0x3f, %o3
+	stx		%g0, [%o0]
+	stx		%g0, [%o0 + 8]
+	stx		%g0, [%o0 + 16]
+	stx		%g0, [%o0 + 24]
+	add		%o0, 32, %o0
+6:	andncc		%o1, 0x3f, %o3
+7:	be,pn		%xcc, 9f
+	 wr		%g0, ASI_BLK_P, %asi
+	membar		#StoreLoad | #StoreStore | #LoadStore
+	fzero		%f0
+	andcc		%o3, 0xc0, %o2
+	and		%o1, 0x3f, %o1
+	fzero		%f2
+	andn		%o3, 0xff, %o3
+	faddd		%f0, %f2, %f4
+	fmuld		%f0, %f2, %f6
+	cmp		%o2, 64
+	faddd		%f0, %f2, %f8
+	fmuld		%f0, %f2, %f10
+	faddd		%f0, %f2, %f12
+	brz,pn		%o2, 10f
+	 fmuld		%f0, %f2, %f14
+	be,pn		%icc, 2f
+	 stda		%f0, [%o0 + 0x00] %asi
+	cmp		%o2, 128
+	be,pn		%icc, 2f
+	 stda		%f0, [%o0 + 0x40] %asi
+	stda		%f0, [%o0 + 0x80] %asi
+2:	brz,pn		%o3, 12f
+	 add		%o0, %o2, %o0
+10:	stda		%f0, [%o0 + 0x00] %asi
+	stda		%f0, [%o0 + 0x40] %asi
+	stda		%f0, [%o0 + 0x80] %asi
+	stda		%f0, [%o0 + 0xc0] %asi
+11:	subcc		%o3, 256, %o3
+	bne,pt		%xcc, 10b
+	 add		%o0, 256, %o0
+12:	wr		%g0, FPRS_FEF, %fprs
+	membar		#StoreLoad | #StoreStore
+9:	andcc		%o1, 0xf8, %o2
+	be,pn		%xcc, 13f
+	 andcc		%o1, 7, %o1
+14:	rd		%pc, %o4
+	srl		%o2, 1, %o3
+	sub		%o4, %o3, %o4
+	jmpl		%o4 + (13f - 14b), %g0
+	 add		%o0, %o2, %o0
+12:	ZERO_BLOCKS	(%o0, 0xc8, %g0)
+	ZERO_BLOCKS	(%o0, 0x88, %g0)
+	ZERO_BLOCKS	(%o0, 0x48, %g0)
+	ZERO_BLOCKS	(%o0, 0x08, %g0)
+13:	be,pn		%xcc, 8f
+	 andcc		%o1, 4, %g0
+	be,pn		%xcc, 1f
+	 andcc		%o1, 2, %g0
+	stw		%g0, [%o0]
+	add		%o0, 4, %o0
+1:	be,pn		%xcc, 1f
+	 andcc		%o1, 1, %g0
+	sth		%g0, [%o0]
+	add		%o0, 2, %o0
+1:	bne,a,pn	%xcc, 8f
+	 stb		%g0, [%o0]
+8:	retl
+	 mov		%g3, %o0
+17:	be,pn		%xcc, 13b
+	 orcc		%o1, 0, %g0
+	be,pn		%xcc, 0f
+8:	 add		%o0, 1, %o0
+	subcc		%o1, 1, %o1
+	bne,pt		%xcc, 8b
+	 stb		%g0, [%o0 - 1]
+0:	retl
+	 mov		%g3, %o0
+END(__bzero)
+
+weak_alias(__bzero, bzero)