diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | sysdeps/alpha/div.S | 102 | ||||
-rw-r--r-- | sysdeps/alpha/divq.S | 12 | ||||
-rw-r--r-- | sysdeps/alpha/ldiv.S | 228 | ||||
-rw-r--r-- | sysdeps/alpha/remq.S | 12 |
5 files changed, 223 insertions, 138 deletions
diff --git a/ChangeLog b/ChangeLog index 15a0012898..38000c9e9d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2004-07-30 Richard Henderson <rth@redhat.com> + + * sysdeps/alpha/divq.S: Save t3 before it gets clobbered. + * sysdeps/alpha/remq.S: Likewise. + * sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the + new division algorithms in divl.S and divq.S respectively. + 2004-07-28 GOTO Masanori <gotom@debian.or.jp> * timezone/asia: Update from tzdata2004b. diff --git a/sysdeps/alpha/div.S b/sysdeps/alpha/div.S index 2bc3d56a11..e0eb7e9796 100644 --- a/sysdeps/alpha/div.S +++ b/sysdeps/alpha/div.S @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 2004 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Richard Henderson <rth@tamu.edu>. @@ -17,13 +17,13 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -#include <sysdep.h> +#include "div_libc.h" -#ifdef __linux__ -# include <asm/gentrap.h> -# include <asm/pal.h> +#undef FRAME +#ifdef __alpha_fix__ +#define FRAME 0 #else -# include <machine/pal.h> +#define FRAME 16 #endif .set noat @@ -32,78 +32,54 @@ .globl div .ent div div: - .frame sp, 0, ra + .frame sp, FRAME, ra +#if FRAME > 0 + lda sp, -FRAME(sp) +#endif #ifdef PROF + .set macro ldgp gp, 0(pv) lda AT, _mcount jsr AT, (AT), _mcount + .set nomacro .prologue 1 #else .prologue 0 #endif -#define divisor t1 -#define mask t2 -#define quotient t3 -#define modulus t4 -#define tmp1 t5 -#define tmp2 t6 -#define compare t7 - - /* find correct sign for input to unsigned divide loop. */ - negl a1, modulus # e0 : - negl a2, divisor # .. e1 : - sextl a1, a1 # e0 : - sextl a2, a2 # .. e1 : - mov zero, quotient # e0 : - mov 1, mask # .. e1 : - cmovge a1, a1, modulus # e0 : - cmovge a2, a2, divisor # .. e1 : - beq a2, $divbyzero # e1 : - unop # : - - /* shift divisor left, using 3-bit shifts for 32-bit divides as we - can't overflow. Three-bit shifts will result in looping three - times less here, but can result in two loops more later. Thus - using a large shift isn't worth it (and s8addq pairs better than - a shift). */ - -1: cmpult divisor, modulus, compare # e0 : - s8addq divisor, zero, divisor # .. e1 : - s8addq mask, zero, mask # e0 : - bne compare, 1b # .. e1 : - - /* start to go right again. */ -2: addq quotient, mask, tmp2 # e1 : - srl mask, 1, mask # .. e0 : - cmpule divisor, modulus, compare # e0 : - subq modulus, divisor, tmp1 # .. e1 : - cmovne compare, tmp2, quotient # e1 : - srl divisor, 1, divisor # .. e0 : - cmovne compare, tmp1, modulus # e0 : - bne mask, 2b # .. e1 : - - /* find correct sign for result. */ - xor a1, a2, compare # e0 : - negl quotient, tmp1 # .. e1 : - negl modulus, tmp2 # e0 : - cmovlt compare, tmp1, quotient # .. e1 : - cmovlt a1, tmp2, modulus # e1 : - - /* and store it away in the structure. */ - stl quotient, 0(a0) # .. e0 : - mov a0, v0 # e1 : - stl modulus, 4(a0) # .. e0 : - ret # e1 : + beq $18, $divbyzero + + _ITOFT2 $17, $f0, 0, $18, $f1, 8 + + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + cvttq/c $f0, $f0 + + _FTOIT $f0, $0, 0 + + mull $0, $18, $1 + subl $17, $1, $1 + + stl $0, 0(a0) + stl $1, 4(a0) + mov a0, v0 + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret $divbyzero: mov a0, v0 - ldiq a0, GEN_INTDIV + lda a0, GEN_INTDIV call_pal PAL_gentrap - - /* if trap returns, return zero. */ stl zero, 0(v0) stl zero, 4(v0) + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif ret .end div diff --git a/sysdeps/alpha/divq.S b/sysdeps/alpha/divq.S index 4df79829f9..cab6c34ad1 100644 --- a/sysdeps/alpha/divq.S +++ b/sysdeps/alpha/divq.S @@ -115,16 +115,16 @@ $fix_sign_in_ret1: _FTOIT $f0, Q, 8 .align 3 $fix_sign_in_ret2: + ldt $f0, 0(sp) + stq t3, 0(sp) + cfi_restore ($f0) + cfi_rel_offset (t3, 0) + mulq Q, Y, QY + unop stq t4, 8(sp) - - ldt $f0, 0(sp) unop cfi_rel_offset (t4, 8) - cfi_restore ($f0) - stq t3, 0(sp) - unop - cfi_rel_offset (t3, 0) subq QY, X, R mov Y, SY diff --git a/sysdeps/alpha/ldiv.S b/sysdeps/alpha/ldiv.S index 81b48cd559..c90edfb784 100644 --- a/sysdeps/alpha/ldiv.S +++ b/sysdeps/alpha/ldiv.S @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 2001 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 2001, 2004 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Richard Henderson <rth@tamu.edu>. @@ -17,93 +17,195 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -#include <sysdep.h> +#include "div_libc.h" -#ifdef __linux__ -# include <asm/gentrap.h> -# include <asm/pal.h> +#undef FRAME +#ifdef __alpha_fix__ +#define FRAME 0 #else -# include <machine/pal.h> +#define FRAME 16 #endif +#undef X +#undef Y +#define X $17 +#define Y $18 + .set noat .align 4 .globl ldiv .ent ldiv ldiv: - .frame sp, 0, ra + .frame sp, FRAME, ra +#if FRAME > 0 + lda sp, -FRAME(sp) +#endif #ifdef PROF + .set macro ldgp gp, 0(pv) lda AT, _mcount jsr AT, (AT), _mcount + .set nomacro .prologue 1 #else .prologue 0 #endif -#define divisor t1 -#define mask t2 -#define quotient t3 -#define modulus t4 -#define tmp1 t5 -#define tmp2 t6 -#define compare t7 - - /* find correct sign for input to unsigned divide loop. */ - mov a1, modulus # e0 : - mov a2, divisor # .. e1 : - negq a1, tmp1 # e0 : - negq a2, tmp2 # .. e1 : - mov zero, quotient # e0 : - mov 1, mask # .. e1 : - cmovlt a1, tmp1, modulus # e0 : - cmovlt a2, tmp2, divisor # .. e1 : - beq a2, $divbyzero # e1 : - unop # : - - /* shift divisor left. */ -1: cmpult divisor, modulus, compare # e0 : - blt divisor, 2f # .. e1 : - addq divisor, divisor, divisor # e0 : - addq mask, mask, mask # .. e1 : - bne compare, 1b # e1 : - unop # : - - /* start to go right again. */ -2: addq quotient, mask, tmp2 # e1 : - srl mask, 1, mask # .. e0 : - cmpule divisor, modulus, compare # e0 : - subq modulus, divisor, tmp1 # .. e1 : - cmovne compare, tmp2, quotient # e1 : - srl divisor, 1, divisor # .. e0 : - cmovne compare, tmp1, modulus # e0 : - bne mask, 2b # .. e1 : - - /* find correct sign for result. */ - xor a1, a2, compare # e0 : - negq quotient, tmp1 # .. e1 : - negq modulus, tmp2 # e0 : - cmovlt compare, tmp1, quotient # .. e1 : - cmovlt a1, tmp2, modulus # e1 : - - /* and store it away in the structure. */ -9: stq quotient, 0(a0) # .. e0 : - mov a0, v0 # e1 : - stq modulus, 8(a0) # .. e0 : - ret # e1 : + beq Y, $divbyzero + + _ITOFT2 X, $f0, 0, Y, $f1, 8 + + .align 4 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + unop + + /* Check to see if X fit in the double as an exact value. */ + sll X, (64-53), AT + sra AT, (64-53), AT + cmpeq X, AT, AT + beq AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert and clean up. */ + cvttq/c $f0, $f0 + _FTOIT $f0, $0, 0 + +$egress: + mulq $0, Y, $1 + subq X, $1, $1 + + stq $0, 0($16) + stq $1, 8($16) + mov $16, $0 + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret + + .align 4 +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + +#define Q v0 /* quotient */ +#define R t0 /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + /* The fixup code below can only handle unsigned values. */ + or X, Y, AT + mov $31, t5 + blt AT, $fix_sign_in +$fix_sign_in_ret1: + cvttq/c $f0, $f0 + + _FTOIT $f0, Q, 8 + .align 3 +$fix_sign_in_ret2: + mulq Q, Y, QY + + .align 4 + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + negq Q, t4 + cmovlbs t5, t4, Q + br $egress + + .align 4 + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 +$fix_sign_in: + /* If we got here, then X|Y is negative. Need to adjust everything + such that we're doing unsigned division in the fixup loop. */ + /* T5 is true if result should be negative. */ + xor X, Y, AT + cmplt AT, 0, t5 + cmplt X, 0, AT + negq X, t0 + + cmovne AT, t0, X + cmplt Y, 0, AT + negq Y, t0 + + cmovne AT, t0, Y + blbc t5, $fix_sign_in_ret1 + + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + .align 3 + negq Q, Q + br $fix_sign_in_ret2 $divbyzero: mov a0, v0 lda a0, GEN_INTDIV call_pal PAL_gentrap - - /* if trap returns, return zero. */ stq zero, 0(v0) stq zero, 8(v0) - ret - .end ldiv +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret -weak_alias(ldiv, lldiv) -weak_alias(ldiv, imaxdiv) + .end ldiv diff --git a/sysdeps/alpha/remq.S b/sysdeps/alpha/remq.S index a8795c8d24..40c68d7f10 100644 --- a/sysdeps/alpha/remq.S +++ b/sysdeps/alpha/remq.S @@ -116,16 +116,16 @@ $fix_sign_in_ret1: _FTOIT $f0, Q, 8 .align 3 $fix_sign_in_ret2: + ldt $f0, 0(sp) + stq t3, 0(sp) + cfi_restore ($f0) + cfi_rel_offset (t3, 0) + mulq Q, Y, QY + unop stq t4, 8(sp) - - ldt $f0, 0(sp) unop cfi_rel_offset (t4, 8) - cfi_restore ($f0) - stq t3, 0(sp) - unop - cfi_rel_offset (t3, 0) subq QY, X, R mov Y, SY |