diff options
Diffstat (limited to 'REORG.TODO/sysdeps/sparc/sparc32/sub_n.S')
-rw-r--r-- | REORG.TODO/sysdeps/sparc/sparc32/sub_n.S | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/sparc/sparc32/sub_n.S b/REORG.TODO/sysdeps/sparc/sparc32/sub_n.S new file mode 100644 index 0000000000..22ca71fa93 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc32/sub_n.S @@ -0,0 +1,328 @@ +! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +! store difference in a third limb vector. +! +! Copyright (C) 1995-2017 Free Software Foundation, Inc. +! +! This file is part of the GNU MP Library. +! +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. +! +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. +! +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, +! see <http://www.gnu.org/licenses/>. + + +! INPUT PARAMETERS +#define RES_PTR %o0 +#define S1_PTR %o1 +#define S2_PTR %o2 +#define SIZE %o3 + +#include <sysdep.h> + +ENTRY(__mpn_sub_n) + xor S2_PTR,RES_PTR,%g1 + andcc %g1,4,%g0 + bne LOC(1) ! branch if alignment differs + nop +! ** V1a ** + andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 + be LOC(v1) ! if no, branch + nop +/* Add least significant limb separately to align RES_PTR and S2_PTR */ + ld [S1_PTR],%g4 + add S1_PTR,4,S1_PTR + ld [S2_PTR],%g2 + add S2_PTR,4,S2_PTR + add SIZE,-1,SIZE + subcc %g4,%g2,%o4 + st %o4,[RES_PTR] + add RES_PTR,4,RES_PTR +LOC(v1): + addx %g0,%g0,%o4 ! save cy in register + cmp SIZE,2 ! if SIZE < 2 ... + bl LOC(end2) ! ... branch to tail code + subcc %g0,%o4,%g0 ! restore cy + + ld [S1_PTR+0],%g4 + addcc SIZE,-10,SIZE + ld [S1_PTR+4],%g1 + ldd [S2_PTR+0],%g2 + blt LOC(fin1) + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +LOC(loop1): + subxcc %g4,%g2,%o4 + ld [S1_PTR+8],%g4 + subxcc %g1,%g3,%o5 + ld [S1_PTR+12],%g1 + ldd [S2_PTR+8],%g2 + std %o4,[RES_PTR+0] + subxcc %g4,%g2,%o4 + ld [S1_PTR+16],%g4 + subxcc %g1,%g3,%o5 + ld [S1_PTR+20],%g1 + ldd [S2_PTR+16],%g2 + std %o4,[RES_PTR+8] + subxcc %g4,%g2,%o4 + ld [S1_PTR+24],%g4 + subxcc %g1,%g3,%o5 + ld [S1_PTR+28],%g1 + ldd [S2_PTR+24],%g2 + std %o4,[RES_PTR+16] + subxcc %g4,%g2,%o4 + ld [S1_PTR+32],%g4 + subxcc %g1,%g3,%o5 + ld [S1_PTR+36],%g1 + ldd [S2_PTR+32],%g2 + std %o4,[RES_PTR+24] + addx %g0,%g0,%o4 ! save cy in register + addcc SIZE,-8,SIZE + add S1_PTR,32,S1_PTR + add S2_PTR,32,S2_PTR + add RES_PTR,32,RES_PTR + bge LOC(loop1) + subcc %g0,%o4,%g0 ! restore cy + +LOC(fin1): + addcc SIZE,8-2,SIZE + blt LOC(end1) + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 2 limbs until less than 2 limbs remain */ +LOC(loope1): + subxcc %g4,%g2,%o4 + ld [S1_PTR+8],%g4 + subxcc %g1,%g3,%o5 + ld [S1_PTR+12],%g1 + ldd [S2_PTR+8],%g2 + std %o4,[RES_PTR+0] + addx %g0,%g0,%o4 ! save cy in register + addcc SIZE,-2,SIZE + add S1_PTR,8,S1_PTR + add S2_PTR,8,S2_PTR + add RES_PTR,8,RES_PTR + bge LOC(loope1) + subcc %g0,%o4,%g0 ! restore cy +LOC(end1): + subxcc %g4,%g2,%o4 + subxcc %g1,%g3,%o5 + std %o4,[RES_PTR+0] + addx %g0,%g0,%o4 ! save cy in register + + andcc SIZE,1,%g0 + be LOC(ret1) + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ + ld [S1_PTR+8],%g4 + ld [S2_PTR+8],%g2 + subxcc %g4,%g2,%o4 + st %o4,[RES_PTR+8] + +LOC(ret1): + retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb + +LOC(1): xor S1_PTR,RES_PTR,%g1 + andcc %g1,4,%g0 + bne LOC(2) + nop +! ** V1b ** + andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 + be LOC(v1b) ! if no, branch + nop +/* Add least significant limb separately to align RES_PTR and S1_PTR */ + ld [S2_PTR],%g4 + add S2_PTR,4,S2_PTR + ld [S1_PTR],%g2 + add S1_PTR,4,S1_PTR + add SIZE,-1,SIZE + subcc %g2,%g4,%o4 + st %o4,[RES_PTR] + add RES_PTR,4,RES_PTR +LOC(v1b): + addx %g0,%g0,%o4 ! save cy in register + cmp SIZE,2 ! if SIZE < 2 ... + bl LOC(end2) ! ... branch to tail code + subcc %g0,%o4,%g0 ! restore cy + + ld [S2_PTR+0],%g4 + addcc SIZE,-10,SIZE + ld [S2_PTR+4],%g1 + ldd [S1_PTR+0],%g2 + blt LOC(fin1b) + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +LOC(loop1b): + subxcc %g2,%g4,%o4 + ld [S2_PTR+8],%g4 + subxcc %g3,%g1,%o5 + ld [S2_PTR+12],%g1 + ldd [S1_PTR+8],%g2 + std %o4,[RES_PTR+0] + subxcc %g2,%g4,%o4 + ld [S2_PTR+16],%g4 + subxcc %g3,%g1,%o5 + ld [S2_PTR+20],%g1 + ldd [S1_PTR+16],%g2 + std %o4,[RES_PTR+8] + subxcc %g2,%g4,%o4 + ld [S2_PTR+24],%g4 + subxcc %g3,%g1,%o5 + ld [S2_PTR+28],%g1 + ldd [S1_PTR+24],%g2 + std %o4,[RES_PTR+16] + subxcc %g2,%g4,%o4 + ld [S2_PTR+32],%g4 + subxcc %g3,%g1,%o5 + ld [S2_PTR+36],%g1 + ldd [S1_PTR+32],%g2 + std %o4,[RES_PTR+24] + addx %g0,%g0,%o4 ! save cy in register + addcc SIZE,-8,SIZE + add S1_PTR,32,S1_PTR + add S2_PTR,32,S2_PTR + add RES_PTR,32,RES_PTR + bge LOC(loop1b) + subcc %g0,%o4,%g0 ! restore cy + +LOC(fin1b): + addcc SIZE,8-2,SIZE + blt LOC(end1b) + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 2 limbs until less than 2 limbs remain */ +LOC(loope1b): + subxcc %g2,%g4,%o4 + ld [S2_PTR+8],%g4 + subxcc %g3,%g1,%o5 + ld [S2_PTR+12],%g1 + ldd [S1_PTR+8],%g2 + std %o4,[RES_PTR+0] + addx %g0,%g0,%o4 ! save cy in register + addcc SIZE,-2,SIZE + add S1_PTR,8,S1_PTR + add S2_PTR,8,S2_PTR + add RES_PTR,8,RES_PTR + bge LOC(loope1b) + subcc %g0,%o4,%g0 ! restore cy +LOC(end1b): + subxcc %g2,%g4,%o4 + subxcc %g3,%g1,%o5 + std %o4,[RES_PTR+0] + addx %g0,%g0,%o4 ! save cy in register + + andcc SIZE,1,%g0 + be LOC(ret1b) + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ + ld [S2_PTR+8],%g4 + ld [S1_PTR+8],%g2 + subxcc %g2,%g4,%o4 + st %o4,[RES_PTR+8] + +LOC(ret1b): + retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb + +! ** V2 ** +/* If we come here, the alignment of S1_PTR and RES_PTR as well as the + alignment of S2_PTR and RES_PTR differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of S1_PTR and S2_PTR are the same. */ + +LOC(2): cmp SIZE,1 + be LOC(jone) + nop + andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 + be LOC(v2) ! if no, branch + nop +/* Add least significant limb separately to align S1_PTR and S2_PTR */ + ld [S1_PTR],%g4 + add S1_PTR,4,S1_PTR + ld [S2_PTR],%g2 + add S2_PTR,4,S2_PTR + add SIZE,-1,SIZE + subcc %g4,%g2,%o4 + st %o4,[RES_PTR] + add RES_PTR,4,RES_PTR + +LOC(v2): + addx %g0,%g0,%o4 ! save cy in register + addcc SIZE,-8,SIZE + blt LOC(fin2) + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +LOC(loop2): + ldd [S1_PTR+0],%g2 + ldd [S2_PTR+0],%o4 + subxcc %g2,%o4,%g2 + st %g2,[RES_PTR+0] + subxcc %g3,%o5,%g3 + st %g3,[RES_PTR+4] + ldd [S1_PTR+8],%g2 + ldd [S2_PTR+8],%o4 + subxcc %g2,%o4,%g2 + st %g2,[RES_PTR+8] + subxcc %g3,%o5,%g3 + st %g3,[RES_PTR+12] + ldd [S1_PTR+16],%g2 + ldd [S2_PTR+16],%o4 + subxcc %g2,%o4,%g2 + st %g2,[RES_PTR+16] + subxcc %g3,%o5,%g3 + st %g3,[RES_PTR+20] + ldd [S1_PTR+24],%g2 + ldd [S2_PTR+24],%o4 + subxcc %g2,%o4,%g2 + st %g2,[RES_PTR+24] + subxcc %g3,%o5,%g3 + st %g3,[RES_PTR+28] + addx %g0,%g0,%o4 ! save cy in register + addcc SIZE,-8,SIZE + add S1_PTR,32,S1_PTR + add S2_PTR,32,S2_PTR + add RES_PTR,32,RES_PTR + bge LOC(loop2) + subcc %g0,%o4,%g0 ! restore cy + +LOC(fin2): + addcc SIZE,8-2,SIZE + blt LOC(end2) + subcc %g0,%o4,%g0 ! restore cy +LOC(loope2): + ldd [S1_PTR+0],%g2 + ldd [S2_PTR+0],%o4 + subxcc %g2,%o4,%g2 + st %g2,[RES_PTR+0] + subxcc %g3,%o5,%g3 + st %g3,[RES_PTR+4] + addx %g0,%g0,%o4 ! save cy in register + addcc SIZE,-2,SIZE + add S1_PTR,8,S1_PTR + add S2_PTR,8,S2_PTR + add RES_PTR,8,RES_PTR + bge LOC(loope2) + subcc %g0,%o4,%g0 ! restore cy +LOC(end2): + andcc SIZE,1,%g0 + be LOC(ret2) + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ +LOC(jone): + ld [S1_PTR],%g4 + ld [S2_PTR],%g2 + subxcc %g4,%g2,%o4 + st %o4,[RES_PTR] + +LOC(ret2): + retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb + +END(__mpn_sub_n) |