sysdeps/sparc/sparc32/sparcv8/mul_1.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
! store the product in a second limb vector.

! Copyright (C) 1992-2017 Free Software Foundation, Inc.

! This file is part of the GNU MP Library.

! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Lesser General Public License as published by
! the Free Software Foundation; either version 2.1 of the License, or (at your
! option) any later version.

! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
! License for more details.

! You should have received a copy of the GNU Lesser General Public License
! along with the GNU MP Library; see the file COPYING.LIB.  If not,
! see <http://www.gnu.org/licenses/>.


! INPUT PARAMETERS
! res_ptr	o0
! s1_ptr	o1
! size		o2
! s2_limb	o3

#include <sysdep.h>

ENTRY(__mpn_mul_1)
	sll	%o2,4,%g1
	mov	%o7,%g4			! Save return address register
	and	%g1,(4-1)<<4,%g1
1:	call	2f
	 add	%o7,3f-1b,%g3
2:	mov	%g4,%o7			! Restore return address register
	jmp	%g3+%g1
	 ld	[%o1+0],%o4	! 1

	.align	4
3:
LOC(00):
	add	%o0,-4,%o0
	add	%o1,-4,%o1
	b	LOC(loop00)		/* 4, 8, 12, ... */
	 orcc	%g0,%g0,%g2
LOC(01):
	b	LOC(loop01)		/* 1, 5, 9, ... */
	 orcc	%g0,%g0,%g2
	nop
	nop
LOC(10):
	add	%o0,-12,%o0	/* 2, 6, 10, ... */
	add	%o1,4,%o1
	b	LOC(loop10)
	 orcc	%g0,%g0,%g2
	nop
LOC(11):
	add	%o0,-8,%o0	/* 3, 7, 11, ... */
	add	%o1,-8,%o1
	b	LOC(loop11)
	 orcc	%g0,%g0,%g2

LOC(loop):
	addcc	%g3,%g2,%g3	! 1
	ld	[%o1+4],%o4	! 2
	st	%g3,[%o0+0]	! 1
	rd	%y,%g2		! 1
LOC(loop00):
	umul	%o4,%o3,%g3	! 2
	addxcc	%g3,%g2,%g3	! 2
	ld	[%o1+8],%o4	! 3
	st	%g3,[%o0+4]	! 2
	rd	%y,%g2		! 2
LOC(loop11):
	umul	%o4,%o3,%g3	! 3
	addxcc	%g3,%g2,%g3	! 3
	ld	[%o1+12],%o4	! 4
	add	%o1,16,%o1
	st	%g3,[%o0+8]	! 3
	rd	%y,%g2		! 3
LOC(loop10):
	umul	%o4,%o3,%g3	! 4
	addxcc	%g3,%g2,%g3	! 4
	ld	[%o1+0],%o4	! 1
	st	%g3,[%o0+12]	! 4
	add	%o0,16,%o0
	rd	%y,%g2		! 4
	addx	%g0,%g2,%g2
LOC(loop01):
	addcc	%o2,-4,%o2
	bg	LOC(loop)
	 umul	%o4,%o3,%g3	! 1

	addcc	%g3,%g2,%g3	! 4
	st	%g3,[%o0+0]	! 4
	rd	%y,%g2		! 4
	retl
	 addx	%g0,%g2,%o0

END(__mpn_mul_1)