about summary refs log tree commit diff
path: root/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2013-01-11 23:31:06 -0800
committerDavid S. Miller <davem@davemloft.net>2013-01-11 23:39:02 -0800
commit32fcb36dbf4f729d0b0e6d5c386fd1d68486463d (patch)
treec48301cc86c1e7342bcc7c38f5774c65a6b63963 /sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S
parent4ae4244d4b63a8974af4b6ac28ff32941086781e (diff)
downloadglibc-32fcb36dbf4f729d0b0e6d5c386fd1d68486463d.tar.gz
glibc-32fcb36dbf4f729d0b0e6d5c386fd1d68486463d.tar.xz
glibc-32fcb36dbf4f729d0b0e6d5c386fd1d68486463d.zip
Add 64-bit VIS3 optimized GMP routines for sparc.
	* math/Makefile: Recognize gmp-sysdep_routines.
	* sysdeps/sparc/sparc64/multiarch/Makefile: Add VIS3 optimized GMP routines
	to sysdeps.
	* sysdeps/sparc/sparc64/multiarch/add_n-vis3.S: New file.
	* sysdeps/sparc/sparc64/multiarch/add_n.S: New file.
	* sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S: New file.
	* sysdeps/sparc/sparc64/multiarch/addmul_1.S: New file.
	* sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S: New file.
	* sysdeps/sparc/sparc64/multiarch/mul_1.S: New file.
	* sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S: New file.
	* sysdeps/sparc/sparc64/multiarch/sub_n.S: New file.
	* sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S: New file.
	* sysdeps/sparc/sparc64/multiarch/submul_1.S: New file.
Diffstat (limited to 'sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S')
-rw-r--r--sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S87
1 files changed, 87 insertions, 0 deletions
diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S
new file mode 100644
index 0000000000..8f10f918aa
--- /dev/null
+++ b/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S
@@ -0,0 +1,87 @@
+! SPARC v9 64-bit VIS3 __mpn_submul_1 -- Multiply a limb vector with a
+! limb and subtract the result from a second limb vector.
+!
+! Copyright (C) 2013 Free Software Foundation, Inc.
+! This file is part of the GNU C Library.
+! Contributed by David S. Miller <davem@davemloft.net>
+!
+! The GNU C Library is free software; you can redistribute it and/or
+! modify it under the terms of the GNU Lesser General Public
+! License as published by the Free Software Foundation; either
+! version 2.1 of the License, or (at your option) any later version.
+!
+! The GNU C Library is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! Lesser General Public License for more details.
+!
+! You should have received a copy of the GNU Lesser General Public
+! License along with the GNU C Library; if not, see
+! <http://www.gnu.org/licenses/>.
+
+#include <sysdep.h>
+
+#define res_ptr	%i0
+#define s1_ptr	%i1
+#define sz	%i2
+#define s2_limb	%i3
+#define carry	%o5
+#define tmp1	%g1
+#define tmp2	%g2
+#define tmp3	%g3
+#define tmp4	%o4
+#define tmp5	%l0
+#define tmp6	%l1
+#define tmp7	%l2
+#define tmp8	%l3
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+ENTRY(__mpn_submul_1_vis3)
+	save	%sp, -176, %sp
+	subcc	sz, 1, sz
+	be	.Lfinal_limb
+	 clr	carry
+
+.Lloop:
+	ldx	[s1_ptr  + 0x00], tmp1
+	ldx	[res_ptr + 0x00], tmp3
+	ldx	[s1_ptr  + 0x08], tmp2
+	ldx	[res_ptr + 0x08], tmp4
+	mulx	tmp1, s2_limb, tmp5
+	add	s1_ptr, 0x10, s1_ptr
+	umulxhi	tmp1, s2_limb, tmp6
+	add	res_ptr, 0x10, res_ptr
+	mulx	tmp2, s2_limb, tmp7
+	sub	sz, 2, sz
+	umulxhi	tmp2, s2_limb, tmp8
+	addcc	carry, tmp5, tmp5
+	addxc	%g0, tmp6, carry
+	subcc	tmp3, tmp5, tmp5
+	addxc	%g0, carry, carry
+	stx	tmp5, [res_ptr - 0x10]
+	addcc	carry, tmp7, tmp7
+	addxc	%g0, tmp8, carry
+	subcc	tmp4, tmp7, tmp7
+	addxc	%g0, carry, carry
+	brgz	sz, .Lloop
+	 stx	tmp7, [res_ptr - 0x08]
+
+	brlz,pt	sz, .Lfinish
+	 nop
+
+.Lfinal_limb:
+	ldx	[s1_ptr  + 0x00], tmp1
+	ldx	[res_ptr + 0x00], tmp3
+	mulx	tmp1, s2_limb, tmp5
+	umulxhi	tmp1, s2_limb, tmp6
+	addcc	carry, tmp5, tmp5
+	addxc	%g0, tmp6, carry
+	subcc	tmp3, tmp5, tmp5
+	addxc	%g0, carry, carry
+	stx	tmp5, [res_ptr + 0x00]
+
+.Lfinish:
+	jmpl	%i7 + 8, %g0
+	 restore carry, 0, %o0
+END(__mpn_submul_1_vis3)