sysdeps/sparc/sparc32/sparcv9/addmul_1.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

! SPARC v9 32-bit __mpn_addmul_1 -- Multiply a limb vector with a limb
! and add the result to a second limb vector.
!
! Copyright (C) 2013-2020 Free Software Foundation, Inc.
! This file is part of the GNU C Library.
! Contributed by David S. Miller <davem@davemloft.net>
!
! The GNU C Library is free software; you can redistribute it and/or
! modify it under the terms of the GNU Lesser General Public
! License as published by the Free Software Foundation; either
! version 2.1 of the License, or (at your option) any later version.
!
! The GNU C Library is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
! Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public
! License along with the GNU C Library; if not, see
! <https://www.gnu.org/licenses/>.

#include <sysdep.h>

#define res_ptr	%i0
#define s1_ptr	%i1
#define sz_arg	%i2
#define s2l_arg	%i3
#define sz	%o4
#define carry	%o5
#define s2_limb	%g1
#define tmp1	%l0
#define tmp2	%l1
#define tmp3	%l2
#define tmp4	%l3
#define tmp64_1	%g3
#define tmp64_2	%o3

ENTRY(__mpn_addmul_1)
	save	%sp, -96, %sp
	srl	sz_arg, 0, sz
	srl	s2l_arg, 0, s2_limb
	subcc	sz, 1, sz
	be,pn	%icc, .Lfinal_limb
	 clr	carry

.Lloop:
	lduw	[s1_ptr  + 0x00], tmp1
	lduw	[res_ptr + 0x00], tmp3
	lduw	[s1_ptr  + 0x04], tmp2
	lduw	[res_ptr + 0x04], tmp4
	mulx	tmp1, s2_limb, tmp64_1
	add	s1_ptr, 8, s1_ptr
	mulx	tmp2, s2_limb, tmp64_2
	sub	sz, 2, sz
	add	res_ptr, 8, res_ptr
	add	tmp3, tmp64_1, tmp64_1
	add	carry, tmp64_1, tmp64_1
	stw	tmp64_1, [res_ptr - 0x08]
	srlx	tmp64_1, 32, carry
	add	tmp4, tmp64_2, tmp64_2
	add	carry, tmp64_2, tmp64_2
	stw	tmp64_2, [res_ptr - 0x04]
	brgz	sz, .Lloop
	 srlx	tmp64_2, 32, carry

	brlz,pt	sz, .Lfinish
	 nop

.Lfinal_limb:
	lduw	[s1_ptr + 0x00], tmp1
	lduw	[res_ptr + 0x00], tmp3
	mulx	tmp1, s2_limb, tmp64_1
	add	tmp3, tmp64_1, tmp64_1
	add	carry, tmp64_1, tmp64_1
	stw	tmp64_1, [res_ptr + 0x00]
	srlx	tmp64_1, 32, carry

.Lfinish:
	jmpl	%i7 + 0x8, %g0
	 restore carry, 0, %o0
END(__mpn_addmul_1)