diff options
Diffstat (limited to 'sysdeps/alpha/alphaev5/add_n.s')
-rw-r--r-- | sysdeps/alpha/alphaev5/add_n.s | 175 |
1 files changed, 102 insertions, 73 deletions
diff --git a/sysdeps/alpha/alphaev5/add_n.s b/sysdeps/alpha/alphaev5/add_n.s index 2aaf041774..66cf82b3c3 100644 --- a/sysdeps/alpha/alphaev5/add_n.s +++ b/sysdeps/alpha/alphaev5/add_n.s @@ -35,84 +35,113 @@ __mpn_add_n: .frame $30,0,$26,0 - ldq $3,0($17) - ldq $4,0($18) - - subq $19,1,$19 - and $19,4-1,$2 # number of limbs in first loop - bis $31,$31,$0 - beq $2,.L0 # if multiple of 4 limbs, skip first loop - - subq $19,$2,$19 - -.Loop0: subq $2,1,$2 + or $31,$31,$25 # clear cy + subq $19,4,$19 # decr loop cnt + blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop + # Start software pipeline for 1st loop + ldq $0,0($18) + ldq $1,8($18) + ldq $4,0($17) ldq $5,8($17) - addq $4,$0,$4 - ldq $6,8($18) - cmpult $4,$0,$1 - addq $3,$4,$4 - cmpult $4,$3,$0 - stq $4,0($16) - or $0,$1,$0 - - addq $17,8,$17 - addq $18,8,$18 - bis $5,$5,$3 - bis $6,$6,$4 - addq $16,8,$16 - bne $2,.Loop0 - -.L0: beq $19,.Lend - + addq $17,32,$17 # update s1_ptr + ldq $2,16($18) + addq $0,$4,$20 # 1st main add + ldq $3,24($18) + subq $19,4,$19 # decr loop cnt + ldq $6,-16($17) + cmpult $20,$0,$25 # compute cy from last add + ldq $7,-8($17) + addq $1,$25,$28 # cy add + addq $18,32,$18 # update s2_ptr + addq $5,$28,$21 # 2nd main add + cmpult $28,$25,$8 # compute cy from last add + blt $19,.Lend1 # if less than 4 limbs remain, jump + # 1st loop handles groups of 4 limbs in a software pipeline .align 4 -.Loop: subq $19,4,$19 - unop - - ldq $6,8($18) - addq $4,$0,$0 +.Loop: cmpult $21,$28,$25 # compute cy from last add + ldq $0,0($18) + or $8,$25,$25 # combine cy from the two adds + ldq $1,8($18) + addq $2,$25,$28 # cy add + ldq $4,0($17) + addq $28,$6,$22 # 3rd main add ldq $5,8($17) - cmpult $0,$4,$1 - ldq $4,16($18) - addq $3,$0,$20 - cmpult $20,$3,$0 - ldq $3,16($17) - or $0,$1,$0 - addq $6,$0,$0 - cmpult $0,$6,$1 - ldq $6,24($18) - addq $5,$0,$21 - cmpult $21,$5,$0 - ldq $5,24($17) - or $0,$1,$0 - addq $4,$0,$0 - cmpult $0,$4,$1 - ldq $4,32($18) - addq $3,$0,$22 - cmpult $22,$3,$0 - ldq $3,32($17) - or $0,$1,$0 - addq $6,$0,$0 - cmpult $0,$6,$1 - addq $5,$0,$23 - cmpult $23,$5,$0 - or $0,$1,$0 - + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds stq $21,8($16) - stq $22,16($16) - stq $23,24($16) - - addq $17,32,$17 - addq $18,32,$18 - addq $16,32,$16 - bne $19,.Loop + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + addq $17,32,$17 # update s1_ptr + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + addq $0,$25,$28 # cy add + ldq $2,16($18) + addq $4,$28,$20 # 1st main add + ldq $3,24($18) + cmpult $28,$25,$8 # compute cy from last add + ldq $6,-16($17) + cmpult $20,$28,$25 # compute cy from last add + ldq $7,-8($17) + or $8,$25,$25 # combine cy from the two adds + subq $19,4,$19 # decr loop cnt + stq $22,-16($16) + addq $1,$25,$28 # cy add + stq $23,-8($16) + addq $5,$28,$21 # 2nd main add + addq $18,32,$18 # update s2_ptr + cmpult $28,$25,$8 # compute cy from last add + bge $19,.Loop + # Finish software pipeline for 1st loop +.Lend1: cmpult $21,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $2,$25,$28 # cy add + addq $28,$6,$22 # 3rd main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + stq $22,-16($16) + stq $23,-8($16) +.Lend2: addq $19,4,$19 # restore loop cnt + beq $19,.Lret + # Start software pipeline for 2nd loop + ldq $0,0($18) + ldq $4,0($17) + subq $19,1,$19 + beq $19,.Lend0 + # 2nd loop handles remaining 1-3 limbs + .align 4 +.Loop0: addq $0,$25,$28 # cy add + ldq $0,8($18) + addq $4,$28,$20 # main add + ldq $4,8($17) + addq $18,8,$18 + cmpult $28,$25,$8 # compute cy from last add + addq $17,8,$17 + stq $20,0($16) + cmpult $20,$28,$25 # compute cy from last add + subq $19,1,$19 # decr loop cnt + or $8,$25,$25 # combine cy from the two adds + addq $16,8,$16 + bne $19,.Loop0 +.Lend0: addq $0,$25,$28 # cy add + addq $4,$28,$20 # main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $20,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds -.Lend: addq $4,$0,$4 - cmpult $4,$0,$1 - addq $3,$4,$4 - cmpult $4,$3,$0 - stq $4,0($16) - or $0,$1,$0 +.Lret: or $25,$31,$0 # return cy ret $31,($26),1 - .end __mpn_add_n |