about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2013-01-11 20:53:10 -0800
committerDavid S. Miller <davem@davemloft.net>2013-01-11 20:53:10 -0800
commit4ae4244d4b63a8974af4b6ac28ff32941086781e (patch)
treebc6e7fb632db08b0f0e3a03e967834cba36bb57f /sysdeps
parent8794a96418e19262d805cba44046482f41906356 (diff)
downloadglibc-4ae4244d4b63a8974af4b6ac28ff32941086781e.tar.gz
glibc-4ae4244d4b63a8974af4b6ac28ff32941086781e.tar.xz
glibc-4ae4244d4b63a8974af4b6ac28ff32941086781e.zip
Redo sparc 32-bit V9 GMP optimizations with fixed copyrights.
	* sysdeps/sparc/sparc32/sparcv9/mul_1.S: Properly optimize for 32-bit
	sparc V9 rather than using V8 code.
	* sysdeps/sparc/sparc32/sparcv9/addmul_1.S: Likewise.
	* sysdeps/sparc/sparc32/sparcv9/submul_1.S: Likewise.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/sparc/sparc32/sparcv9/addmul_1.S82
-rw-r--r--sysdeps/sparc/sparc32/sparcv9/mul_1.S71
-rw-r--r--sysdeps/sparc/sparc32/sparcv9/submul_1.S83
3 files changed, 233 insertions, 3 deletions
diff --git a/sysdeps/sparc/sparc32/sparcv9/addmul_1.S b/sysdeps/sparc/sparc32/sparcv9/addmul_1.S
index 563bfb1c0a..7ba81d8a6c 100644
--- a/sysdeps/sparc/sparc32/sparcv9/addmul_1.S
+++ b/sysdeps/sparc/sparc32/sparcv9/addmul_1.S
@@ -1 +1,81 @@
-#include <sparcv8/addmul_1.S>
+! SPARC v9 32-bit __mpn_addmul_1 -- Multiply a limb vector with a limb
+! and add the result to a second limb vector.
+!
+! Copyright (C) 2013 Free Software Foundation, Inc.
+! This file is part of the GNU C Library.
+! Contributed by David S. Miller <davem@davemloft.net>
+!
+! The GNU C Library is free software; you can redistribute it and/or
+! modify it under the terms of the GNU Lesser General Public
+! License as published by the Free Software Foundation; either
+! version 2.1 of the License, or (at your option) any later version.
+!
+! The GNU C Library is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! Lesser General Public License for more details.
+!
+! You should have received a copy of the GNU Lesser General Public
+! License along with the GNU C Library; if not, see
+! <http://www.gnu.org/licenses/>.
+
+#include <sysdep.h>
+
+#define res_ptr	%i0
+#define s1_ptr	%i1
+#define sz_arg	%i2
+#define s2l_arg	%i3
+#define sz	%o4
+#define carry	%o5
+#define s2_limb	%g1
+#define tmp1	%l0
+#define tmp2	%l1
+#define tmp3	%l2
+#define tmp4	%l3
+#define tmp64_1	%g3
+#define tmp64_2	%o3
+
+ENTRY(__mpn_addmul_1)
+	save	%sp, -96, %sp
+	srl	sz_arg, 0, sz
+	srl	s2l_arg, 0, s2_limb
+	subcc	sz, 1, sz
+	be,pn	%icc, .Lfinal_limb
+	 clr	carry
+
+.Lloop:
+	lduw	[s1_ptr  + 0x00], tmp1
+	lduw	[res_ptr + 0x00], tmp3
+	lduw	[s1_ptr  + 0x04], tmp2
+	lduw	[res_ptr + 0x04], tmp4
+	mulx	tmp1, s2_limb, tmp64_1
+	add	s1_ptr, 8, s1_ptr
+	mulx	tmp2, s2_limb, tmp64_2
+	sub	sz, 2, sz
+	add	res_ptr, 8, res_ptr
+	add	tmp3, tmp64_1, tmp64_1
+	add	carry, tmp64_1, tmp64_1
+	stw	tmp64_1, [res_ptr - 0x08]
+	srlx	tmp64_1, 32, carry
+	add	tmp4, tmp64_2, tmp64_2
+	add	carry, tmp64_2, tmp64_2
+	stw	tmp64_2, [res_ptr - 0x04]
+	brgz	sz, .Lloop
+	 srlx	tmp64_2, 32, carry
+
+	brlz,pt	sz, .Lfinish
+	 nop
+
+.Lfinal_limb:
+	lduw	[s1_ptr + 0x00], tmp1
+	lduw	[res_ptr + 0x00], tmp3
+	mulx	tmp1, s2_limb, tmp64_1
+	add	tmp3, tmp64_1, tmp64_1
+	add	carry, tmp64_1, tmp64_1
+	stw	tmp64_1, [res_ptr + 0x00]
+	srlx	tmp64_1, 32, carry
+
+.Lfinish:
+	jmpl	%i7 + 0x8, %g0
+	 restore carry, 0, %o0
+END(__mpn_addmul_1)
diff --git a/sysdeps/sparc/sparc32/sparcv9/mul_1.S b/sysdeps/sparc/sparc32/sparcv9/mul_1.S
index 42284eada6..e9a537196e 100644
--- a/sysdeps/sparc/sparc32/sparcv9/mul_1.S
+++ b/sysdeps/sparc/sparc32/sparcv9/mul_1.S
@@ -1 +1,70 @@
-#include <sparcv8/mul_1.S>
+! SPARC v9 32-bit __mpn_mul_1 -- Multiply a limb vector with a single
+! limb and store the product in a second limb vector.
+!
+! Copyright (C) 2013 Free Software Foundation, Inc.
+! This file is part of the GNU C Library.
+! Contributed by David S. Miller <davem@davemloft.net>
+!
+! The GNU C Library is free software; you can redistribute it and/or
+! modify it under the terms of the GNU Lesser General Public
+! License as published by the Free Software Foundation; either
+! version 2.1 of the License, or (at your option) any later version.
+!
+! The GNU C Library is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! Lesser General Public License for more details.
+!
+! You should have received a copy of the GNU Lesser General Public
+! License along with the GNU C Library; if not, see
+! <http://www.gnu.org/licenses/>.
+
+#include <sysdep.h>
+
+#define res_ptr	%o0
+#define s1_ptr	%o1
+#define sz	%o2
+#define s2_limb	%o3
+#define carry	%o5
+#define tmp1	%g1
+#define tmp2	%g2
+#define tmp3	%g3
+#define tmp4	%o4
+
+ENTRY(__mpn_mul_1)
+	srl	sz, 0, sz
+	srl	s2_limb, 0, s2_limb
+	subcc	sz, 1, sz
+	be,pn	%icc, .Lfinal_limb
+	 clr	carry
+
+.Lloop:
+	lduw	[s1_ptr + 0x00], tmp1
+	lduw	[s1_ptr + 0x04], tmp2
+	mulx	tmp1, s2_limb, tmp3
+	add	s1_ptr, 8, s1_ptr
+	mulx	tmp2, s2_limb, tmp4
+	sub	sz, 2, sz
+	add	res_ptr, 8, res_ptr
+	add	carry, tmp3, tmp3
+	stw	tmp3, [res_ptr - 0x08]
+	srlx	tmp3, 32, carry
+	add	carry, tmp4, tmp4
+	stw	tmp4, [res_ptr - 0x04]
+	brgz	sz, .Lloop
+	 srlx	tmp4, 32, carry
+
+	brlz,pt	sz, .Lfinish
+	 nop
+
+.Lfinal_limb:
+	lduw	[s1_ptr + 0x00], tmp1
+	mulx	tmp1, s2_limb, tmp3
+	add	carry, tmp3, tmp3
+	stw	tmp3, [res_ptr + 0x00]
+	srlx	tmp3, 32, carry
+
+.Lfinish:
+	retl
+	 mov	carry, %o0
+END(__mpn_mul_1)
diff --git a/sysdeps/sparc/sparc32/sparcv9/submul_1.S b/sysdeps/sparc/sparc32/sparcv9/submul_1.S
index de69533f63..8985e2a4cb 100644
--- a/sysdeps/sparc/sparc32/sparcv9/submul_1.S
+++ b/sysdeps/sparc/sparc32/sparcv9/submul_1.S
@@ -1 +1,82 @@
-#include <sparcv8/submul_1.S>
+! SPARC v9 32-bit __mpn_submul_1 -- Multiply a limb vector with a limb
+! and subtract the result from a second limb vector.
+!
+! Copyright (C) 2013 Free Software Foundation, Inc.
+! This file is part of the GNU C Library.
+! Contributed by David S. Miller <davem@davemloft.net>
+!
+! The GNU C Library is free software; you can redistribute it and/or
+! modify it under the terms of the GNU Lesser General Public
+! License as published by the Free Software Foundation; either
+! version 2.1 of the License, or (at your option) any later version.
+!
+! The GNU C Library is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! Lesser General Public License for more details.
+!
+! You should have received a copy of the GNU Lesser General Public
+! License along with the GNU C Library; if not, see
+! <http://www.gnu.org/licenses/>.
+
+#include <sysdep.h>
+
+#define res_ptr	%i0
+#define s1_ptr	%i1
+#define sz_arg	%i2
+#define s2l_arg	%i3
+#define sz	%o4
+#define carry	%o5
+#define s2_limb	%g1
+#define tmp1	%l0
+#define tmp2	%l1
+#define tmp3	%l2
+#define tmp4	%l3
+#define tmp64_1	%g3
+#define tmp64_2	%o3
+
+ENTRY(__mpn_submul_1)
+	save	%sp, -96, %sp
+	srl	sz_arg, 0, sz
+	srl	s2l_arg, 0, s2_limb
+	subcc	sz, 1, sz
+	be,pn	%icc, .Lfinal_limb
+	 subcc	%g0, 0, carry
+
+.Lloop:
+	lduw	[s1_ptr  + 0x00], tmp1
+	lduw	[res_ptr + 0x00], tmp3
+	lduw	[s1_ptr  + 0x04], tmp2
+	lduw	[res_ptr + 0x04], tmp4
+	mulx	tmp1, s2_limb, tmp64_1
+	add	s1_ptr, 8, s1_ptr
+	mulx	tmp2, s2_limb, tmp64_2
+	sub	sz, 2, sz
+	add	res_ptr, 8, res_ptr
+	addx	carry, tmp64_1, tmp64_1
+	srlx	tmp64_1, 32, carry
+	subcc	tmp3, tmp64_1, tmp64_1
+	stw	tmp64_1, [res_ptr - 0x08]
+	addx	carry, tmp64_2, tmp64_2
+	srlx	tmp64_2, 32, carry
+	subcc	tmp4, tmp64_2, tmp64_2
+	brgz	sz, .Lloop
+	 stw	tmp64_2, [res_ptr - 0x04]
+
+	brlz,pt	sz, .Lfinish
+	 nop
+
+.Lfinal_limb:
+	lduw	[s1_ptr + 0x00], tmp1
+	lduw	[res_ptr + 0x00], tmp3
+	mulx	tmp1, s2_limb, tmp64_1
+	addx	carry, tmp64_1, tmp64_1
+	srlx	tmp64_1, 32, carry
+	subcc	tmp3, tmp64_1, tmp64_1
+	stw	tmp64_1, [res_ptr + 0x00]
+
+.Lfinish:
+	addx	carry, 0, carry
+	jmpl	%i7 + 0x8, %g0
+	 restore carry, 0, %o0
+END(__mpn_submul_1)