about summary refs log tree commit diff
path: root/REORG.TODO/sysdeps/sparc/sparc32/sub_n.S
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/sysdeps/sparc/sparc32/sub_n.S')
-rw-r--r--REORG.TODO/sysdeps/sparc/sparc32/sub_n.S328
1 files changed, 328 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/sparc/sparc32/sub_n.S b/REORG.TODO/sysdeps/sparc/sparc32/sub_n.S
new file mode 100644
index 0000000000..22ca71fa93
--- /dev/null
+++ b/REORG.TODO/sysdeps/sparc/sparc32/sub_n.S
@@ -0,0 +1,328 @@
+! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+!
+! Copyright (C) 1995-2017 Free Software Foundation, Inc.
+!
+! This file is part of the GNU MP Library.
+!
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+!
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+!
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not,
+! see <http://www.gnu.org/licenses/>.
+
+
+! INPUT PARAMETERS
+#define RES_PTR	%o0
+#define S1_PTR	%o1
+#define S2_PTR	%o2
+#define SIZE	%o3
+
+#include <sysdep.h>
+
+ENTRY(__mpn_sub_n)
+	xor	S2_PTR,RES_PTR,%g1
+	andcc	%g1,4,%g0
+	bne	LOC(1)			! branch if alignment differs
+	nop
+! **  V1a  **
+	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
+	be	LOC(v1)			! if no, branch
+	nop
+/* Add least significant limb separately to align RES_PTR and S2_PTR */
+	ld	[S1_PTR],%g4
+	add	S1_PTR,4,S1_PTR
+	ld	[S2_PTR],%g2
+	add	S2_PTR,4,S2_PTR
+	add	SIZE,-1,SIZE
+	subcc	%g4,%g2,%o4
+	st	%o4,[RES_PTR]
+	add	RES_PTR,4,RES_PTR
+LOC(v1):
+	addx	%g0,%g0,%o4		! save cy in register
+	cmp	SIZE,2			! if SIZE < 2 ...
+	bl	LOC(end2)		! ... branch to tail code
+	subcc	%g0,%o4,%g0		! restore cy
+
+	ld	[S1_PTR+0],%g4
+	addcc	SIZE,-10,SIZE
+	ld	[S1_PTR+4],%g1
+	ldd	[S2_PTR+0],%g2
+	blt	LOC(fin1)
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+LOC(loop1):
+	subxcc	%g4,%g2,%o4
+	ld	[S1_PTR+8],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[S1_PTR+12],%g1
+	ldd	[S2_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
+	subxcc	%g4,%g2,%o4
+	ld	[S1_PTR+16],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[S1_PTR+20],%g1
+	ldd	[S2_PTR+16],%g2
+	std	%o4,[RES_PTR+8]
+	subxcc	%g4,%g2,%o4
+	ld	[S1_PTR+24],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[S1_PTR+28],%g1
+	ldd	[S2_PTR+24],%g2
+	std	%o4,[RES_PTR+16]
+	subxcc	%g4,%g2,%o4
+	ld	[S1_PTR+32],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[S1_PTR+36],%g1
+	ldd	[S2_PTR+32],%g2
+	std	%o4,[RES_PTR+24]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	SIZE,-8,SIZE
+	add	S1_PTR,32,S1_PTR
+	add	S2_PTR,32,S2_PTR
+	add	RES_PTR,32,RES_PTR
+	bge	LOC(loop1)
+	subcc	%g0,%o4,%g0		! restore cy
+
+LOC(fin1):
+	addcc	SIZE,8-2,SIZE
+	blt	LOC(end1)
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+LOC(loope1):
+	subxcc	%g4,%g2,%o4
+	ld	[S1_PTR+8],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[S1_PTR+12],%g1
+	ldd	[S2_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	SIZE,-2,SIZE
+	add	S1_PTR,8,S1_PTR
+	add	S2_PTR,8,S2_PTR
+	add	RES_PTR,8,RES_PTR
+	bge	LOC(loope1)
+	subcc	%g0,%o4,%g0		! restore cy
+LOC(end1):
+	subxcc	%g4,%g2,%o4
+	subxcc	%g1,%g3,%o5
+	std	%o4,[RES_PTR+0]
+	addx	%g0,%g0,%o4		! save cy in register
+
+	andcc	SIZE,1,%g0
+	be	LOC(ret1)
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add last limb */
+	ld	[S1_PTR+8],%g4
+	ld	[S2_PTR+8],%g2
+	subxcc	%g4,%g2,%o4
+	st	%o4,[RES_PTR+8]
+
+LOC(ret1):
+	retl
+	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
+
+LOC(1):	xor	S1_PTR,RES_PTR,%g1
+	andcc	%g1,4,%g0
+	bne	LOC(2)
+	nop
+! **  V1b  **
+	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
+	be	LOC(v1b)		! if no, branch
+	nop
+/* Add least significant limb separately to align RES_PTR and S1_PTR */
+	ld	[S2_PTR],%g4
+	add	S2_PTR,4,S2_PTR
+	ld	[S1_PTR],%g2
+	add	S1_PTR,4,S1_PTR
+	add	SIZE,-1,SIZE
+	subcc	%g2,%g4,%o4
+	st	%o4,[RES_PTR]
+	add	RES_PTR,4,RES_PTR
+LOC(v1b):
+	addx	%g0,%g0,%o4		! save cy in register
+	cmp	SIZE,2			! if SIZE < 2 ...
+	bl	LOC(end2)		! ... branch to tail code
+	subcc	%g0,%o4,%g0		! restore cy
+
+	ld	[S2_PTR+0],%g4
+	addcc	SIZE,-10,SIZE
+	ld	[S2_PTR+4],%g1
+	ldd	[S1_PTR+0],%g2
+	blt	LOC(fin1b)
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+LOC(loop1b):
+	subxcc	%g2,%g4,%o4
+	ld	[S2_PTR+8],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[S2_PTR+12],%g1
+	ldd	[S1_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
+	subxcc	%g2,%g4,%o4
+	ld	[S2_PTR+16],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[S2_PTR+20],%g1
+	ldd	[S1_PTR+16],%g2
+	std	%o4,[RES_PTR+8]
+	subxcc	%g2,%g4,%o4
+	ld	[S2_PTR+24],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[S2_PTR+28],%g1
+	ldd	[S1_PTR+24],%g2
+	std	%o4,[RES_PTR+16]
+	subxcc	%g2,%g4,%o4
+	ld	[S2_PTR+32],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[S2_PTR+36],%g1
+	ldd	[S1_PTR+32],%g2
+	std	%o4,[RES_PTR+24]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	SIZE,-8,SIZE
+	add	S1_PTR,32,S1_PTR
+	add	S2_PTR,32,S2_PTR
+	add	RES_PTR,32,RES_PTR
+	bge	LOC(loop1b)
+	subcc	%g0,%o4,%g0		! restore cy
+
+LOC(fin1b):
+	addcc	SIZE,8-2,SIZE
+	blt	LOC(end1b)
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+LOC(loope1b):
+	subxcc	%g2,%g4,%o4
+	ld	[S2_PTR+8],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[S2_PTR+12],%g1
+	ldd	[S1_PTR+8],%g2
+	std	%o4,[RES_PTR+0]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	SIZE,-2,SIZE
+	add	S1_PTR,8,S1_PTR
+	add	S2_PTR,8,S2_PTR
+	add	RES_PTR,8,RES_PTR
+	bge	LOC(loope1b)
+	subcc	%g0,%o4,%g0		! restore cy
+LOC(end1b):
+	subxcc	%g2,%g4,%o4
+	subxcc	%g3,%g1,%o5
+	std	%o4,[RES_PTR+0]
+	addx	%g0,%g0,%o4		! save cy in register
+
+	andcc	SIZE,1,%g0
+	be	LOC(ret1b)
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add last limb */
+	ld	[S2_PTR+8],%g4
+	ld	[S1_PTR+8],%g2
+	subxcc	%g2,%g4,%o4
+	st	%o4,[RES_PTR+8]
+
+LOC(ret1b):
+	retl
+	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
+
+! **  V2  **
+/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
+   alignment of S2_PTR and RES_PTR differ.  Since there are only two ways
+   things can be aligned (that we care about) we now know that the alignment
+   of S1_PTR and S2_PTR are the same.  */
+
+LOC(2):	cmp	SIZE,1
+	be	LOC(jone)
+	nop
+	andcc	S1_PTR,4,%g0		! S1_PTR unaligned? Side effect: cy=0
+	be	LOC(v2)			! if no, branch
+	nop
+/* Add least significant limb separately to align S1_PTR and S2_PTR */
+	ld	[S1_PTR],%g4
+	add	S1_PTR,4,S1_PTR
+	ld	[S2_PTR],%g2
+	add	S2_PTR,4,S2_PTR
+	add	SIZE,-1,SIZE
+	subcc	%g4,%g2,%o4
+	st	%o4,[RES_PTR]
+	add	RES_PTR,4,RES_PTR
+
+LOC(v2):
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	SIZE,-8,SIZE
+	blt	LOC(fin2)
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+LOC(loop2):
+	ldd	[S1_PTR+0],%g2
+	ldd	[S2_PTR+0],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[RES_PTR+0]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[RES_PTR+4]
+	ldd	[S1_PTR+8],%g2
+	ldd	[S2_PTR+8],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[RES_PTR+8]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[RES_PTR+12]
+	ldd	[S1_PTR+16],%g2
+	ldd	[S2_PTR+16],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[RES_PTR+16]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[RES_PTR+20]
+	ldd	[S1_PTR+24],%g2
+	ldd	[S2_PTR+24],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[RES_PTR+24]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[RES_PTR+28]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	SIZE,-8,SIZE
+	add	S1_PTR,32,S1_PTR
+	add	S2_PTR,32,S2_PTR
+	add	RES_PTR,32,RES_PTR
+	bge	LOC(loop2)
+	subcc	%g0,%o4,%g0		! restore cy
+
+LOC(fin2):
+	addcc	SIZE,8-2,SIZE
+	blt	LOC(end2)
+	subcc	%g0,%o4,%g0		! restore cy
+LOC(loope2):
+	ldd	[S1_PTR+0],%g2
+	ldd	[S2_PTR+0],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[RES_PTR+0]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[RES_PTR+4]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	SIZE,-2,SIZE
+	add	S1_PTR,8,S1_PTR
+	add	S2_PTR,8,S2_PTR
+	add	RES_PTR,8,RES_PTR
+	bge	LOC(loope2)
+	subcc	%g0,%o4,%g0		! restore cy
+LOC(end2):
+	andcc	SIZE,1,%g0
+	be	LOC(ret2)
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add last limb */
+LOC(jone):
+	ld	[S1_PTR],%g4
+	ld	[S2_PTR],%g2
+	subxcc	%g4,%g2,%o4
+	st	%o4,[RES_PTR]
+
+LOC(ret2):
+	retl
+	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
+
+END(__mpn_sub_n)