diff options
Diffstat (limited to 'sysdeps/sparc/sparc32/mul_1.S')
-rw-r--r-- | sysdeps/sparc/sparc32/mul_1.S | 244 |
1 files changed, 74 insertions, 170 deletions
diff --git a/sysdeps/sparc/sparc32/mul_1.S b/sysdeps/sparc/sparc32/mul_1.S index acce57ec12..86619c71d6 100644 --- a/sysdeps/sparc/sparc32/mul_1.S +++ b/sysdeps/sparc/sparc32/mul_1.S @@ -1,198 +1,102 @@ -! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store -! the result in a second limb vector. -! +! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and +! store the product in a second limb vector. + ! Copyright (C) 1992-2019 Free Software Foundation, Inc. -! + ! This file is part of the GNU MP Library. -! + ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at your ! option) any later version. -! + ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public ! License for more details. -! + ! You should have received a copy of the GNU Lesser General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, ! see <https://www.gnu.org/licenses/>. ! INPUT PARAMETERS -! RES_PTR o0 -! S1_PTR o1 -! SIZE o2 -! S2_LIMB o3 - -! ADD CODE FOR SMALL MULTIPLIERS! -!1: ld -! st -! -!2: ld ,a -! addxcc a,a,x -! st x, -! -!3_unrolled: -! ld ,a -! addxcc a,a,x1 ! 2a + cy -! addx %g0,%g0,x2 -! addcc a,x1,x ! 3a + c -! st x, -! -! ld ,a -! addxcc a,a,y1 -! addx %g0,%g0,y2 -! addcc a,y1,x -! st x, -! -!4_unrolled: -! ld ,a -! srl a,2,x1 ! 4a -! addxcc y2,x1,x -! sll a,30,x2 -! st x, -! -! ld ,a -! srl a,2,y1 -! addxcc x2,y1,y -! sll a,30,y2 -! st x, -! -!5_unrolled: -! ld ,a -! srl a,2,x1 ! 4a -! addxcc a,x1,x ! 5a + c -! sll a,30,x2 -! addx %g0,x2,x2 -! st x, -! -! ld ,a -! srl a,2,y1 -! addxcc a,y1,x -! sll a,30,y2 -! addx %g0,y2,y2 -! st x, -! -!8_unrolled: -! ld ,a -! srl a,3,x1 ! 8a -! addxcc y2,x1,x -! sll a,29,x2 -! st x, -! -! ld ,a -! srl a,3,y1 -! addxcc x2,y1,y -! sll a,29,y2 -! st x, +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 #include <sysdep.h> ENTRY(__mpn_mul_1) - ! Make S1_PTR and RES_PTR point at the end of their blocks - ! and put (- 4 x SIZE) in index/loop counter. - sll %o2,2,%o2 - add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval - add %o1,%o2,%o1 - sub %g0,%o2,%o2 + sll %o2,4,%g1 + mov %o7,%g4 ! Save return address register + and %g1,(4-1)<<4,%g1 +1: call 2f + add %o7,3f-1b,%g3 +2: mov %g4,%o7 ! Restore return address register + jmp %g3+%g1 + ld [%o1+0],%o4 ! 1 - cmp %o3,0xfff - bgu LOC(large) + .align 4 +3: +LOC(00): + add %o0,-4,%o0 + add %o1,-4,%o1 + b LOC(loop00) /* 4, 8, 12, ... */ + orcc %g0,%g0,%g2 +LOC(01): + b LOC(loop01) /* 1, 5, 9, ... */ + orcc %g0,%g0,%g2 nop + nop +LOC(10): + add %o0,-12,%o0 /* 2, 6, 10, ... */ + add %o1,4,%o1 + b LOC(loop10) + orcc %g0,%g0,%g2 + nop +LOC(11): + add %o0,-8,%o0 /* 3, 7, 11, ... */ + add %o1,-8,%o1 + b LOC(loop11) + orcc %g0,%g0,%g2 - ld [%o1+%o2],%o5 - mov 0,%o0 - b LOC(0) - add %o4,-4,%o4 -LOC(loop0): - st %g1,[%o4+%o2] -LOC(0): wr %g0,%o3,%y - sra %o5,31,%g2 - and %o3,%g2,%g2 - andcc %g1,0,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,%o5,%g1 - mulscc %g1,0,%g1 - sra %g1,20,%g4 - sll %g1,12,%g1 - rd %y,%g3 - srl %g3,20,%g3 - or %g1,%g3,%g1 - - addcc %g1,%o0,%g1 - addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb - addcc %o2,4,%o2 ! loop counter - bne,a LOC(loop0) - ld [%o1+%o2],%o5 - - retl - st %g1,[%o4+%o2] - - -LOC(large): - ld [%o1+%o2],%o5 - mov 0,%o0 - sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 - b LOC(1) - add %o4,-4,%o4 LOC(loop): - st %g3,[%o4+%o2] -LOC(1): wr %g0,%o5,%y - and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0 - andcc %g0,%g0,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%o3,%g1 - mulscc %g1,%g0,%g1 - rd %y,%g3 - addcc %g3,%o0,%g3 - addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb - addcc %o2,4,%o2 ! loop counter - bne,a LOC(loop) - ld [%o1+%o2],%o5 + addcc %g3,%g2,%g3 ! 1 + ld [%o1+4],%o4 ! 2 + st %g3,[%o0+0] ! 1 + rd %y,%g2 ! 1 +LOC(loop00): + umul %o4,%o3,%g3 ! 2 + addxcc %g3,%g2,%g3 ! 2 + ld [%o1+8],%o4 ! 3 + st %g3,[%o0+4] ! 2 + rd %y,%g2 ! 2 +LOC(loop11): + umul %o4,%o3,%g3 ! 3 + addxcc %g3,%g2,%g3 ! 3 + ld [%o1+12],%o4 ! 4 + add %o1,16,%o1 + st %g3,[%o0+8] ! 3 + rd %y,%g2 ! 3 +LOC(loop10): + umul %o4,%o3,%g3 ! 4 + addxcc %g3,%g2,%g3 ! 4 + ld [%o1+0],%o4 ! 1 + st %g3,[%o0+12] ! 4 + add %o0,16,%o0 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 +LOC(loop01): + addcc %o2,-4,%o2 + bg LOC(loop) + umul %o4,%o3,%g3 ! 1 + addcc %g3,%g2,%g3 ! 4 + st %g3,[%o0+0] ! 4 + rd %y,%g2 ! 4 retl - st %g3,[%o4+%o2] + addx %g0,%g2,%o0 END(__mpn_mul_1) |