diff options
Diffstat (limited to 'sysdeps/sparc/sparc32/divrem.m4')
-rw-r--r-- | sysdeps/sparc/sparc32/divrem.m4 | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/sysdeps/sparc/sparc32/divrem.m4 b/sysdeps/sparc/sparc32/divrem.m4 new file mode 100644 index 0000000000..665abf11ae --- /dev/null +++ b/sysdeps/sparc/sparc32/divrem.m4 @@ -0,0 +1,238 @@ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * NAME name of function to generate + * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1 + * S S=true => signed; S=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top `decade' of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + +define(N, `4')dnl +define(WORDSIZE, `32')dnl +define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl +dnl +define(dividend, `%o0')dnl +define(divisor, `%o1')dnl +define(Q, `%o2')dnl +define(R, `%o3')dnl +define(ITER, `%o4')dnl +define(V, `%o5')dnl +dnl +dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d +define(T, `%g1')dnl +define(SC, `%g7')dnl +ifelse(S, `true', `define(SIGN, `%g6')')dnl + +dnl +dnl This is the recursive definition for developing quotient digits. +dnl +dnl Parameters: +dnl $1 the current depth, 1 <= $1 <= N +dnl $2 the current accumulation of quotient bits +dnl N max depth +dnl +dnl We add a new bit to $2 and either recurse or insert the bits in +dnl the quotient. R, Q, and V are inputs and outputs as defined above; +dnl the condition codes are expected to reflect the input R, and are +dnl modified to reflect the output R. +dnl +define(DEVELOP_QUOTIENT_BITS, +` ! depth $1, accumulated bits $2 + bl L.$1.eval(2**N+$2) + srl V,1,V + ! remainder is positive + subcc R,V,R + ifelse($1, N, + ` b 9f + add Q, ($2*2+1), Q + ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')') +L.$1.eval(2**N+$2): + ! remainder is negative + addcc R,V,R + ifelse($1, N, + ` b 9f + add Q, ($2*2-1), Q + ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')') + ifelse($1, 1, `9:')')dnl + +#include "sysdep.h" +#ifdef __linux__ +#include <asm/traps.h> +#else +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include <machine/trap.h> +#endif +#endif + +FUNC(NAME) +ifelse(S, `true', +` ! compute sign of result; if neither is negative, no problem + orcc divisor, dividend, %g0 ! either negative? + bge 2f ! no, go do the divide +ifelse(OP, `div', +` xor divisor, dividend, SIGN ! compute sign in any case', +` mov dividend, SIGN ! sign of remainder matches dividend') + tst divisor + bge 1f + tst dividend + ! divisor is definitely negative; dividend might also be negative + bge 2f ! if dividend not negative... + sub %g0, divisor, divisor ! in any case, make divisor nonneg +1: ! dividend is negative, divisor is nonnegative + sub %g0, dividend, dividend ! make dividend nonnegative +2: +') + ! Ready to divide. Compute size of quotient; scale comparand. + orcc divisor, %g0, V + bne 1f + mov dividend, R + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp R, V ! if divisor exceeds dividend, done + blu Lgot_result ! (and algorithm fails otherwise) + clr Q + sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T + cmp R, T + blu Lnot_really_big + clr ITER + + ! `Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R.' + 1: + cmp V, T + bgeu 3f + mov 1, SC + sll V, N, V + b 1b + add ITER, 1, ITER + + ! Now compute SC. + 2: addcc V, V, V + bcc Lnot_too_big + add SC, 1, SC + + ! We get here if the divisor overflowed while shifting. + ! This means that R has the high-order bit set. + ! Restore V and subtract from R. + sll T, TOPBITS, T ! high order bit + srl V, 1, V ! rest of V + add V, T, V + b Ldo_single_div + sub SC, 1, SC + + Lnot_too_big: + 3: cmp V, R + blu 2b + nop + be Ldo_single_div + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! V > R: went too far: back up 1 step + ! srl V, 1, V + ! dec SC + ! do single-bit divide steps + ! + ! We have to be careful here. We know that R >= V, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if R >= 0. Because both R and V may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + Ldo_single_div: + subcc SC, 1, SC + bl Lend_regular_divide + nop + sub R, V, R + mov 1, Q + b Lend_single_divloop + nop + Lsingle_divloop: + sll Q, 1, Q + bl 1f + srl V, 1, V + ! R >= 0 + sub R, V, R + b 2f + add Q, 1, Q + 1: ! R < 0 + add R, V, R + sub Q, 1, Q + 2: + Lend_single_divloop: + subcc SC, 1, SC + bge Lsingle_divloop + tst R + b,a Lend_regular_divide + +Lnot_really_big: +1: + sll V, N, V + cmp V, R + bleu 1b + addcc ITER, 1, ITER + be Lgot_result + sub ITER, 1, ITER + + tst R ! set up for initial iteration +Ldivloop: + sll Q, N, Q + DEVELOP_QUOTIENT_BITS(1, 0) +Lend_regular_divide: + subcc ITER, 1, ITER + bge Ldivloop + tst R + bl,a Lgot_result + ! non-restoring fixup here (one instruction only!) +ifelse(OP, `div', +` sub Q, 1, Q +', ` add R, divisor, R +') + +Lgot_result: +ifelse(S, `true', +` ! check to see if answer should be < 0 + tst SIGN + bl,a 1f + ifelse(OP, `div', `sub %g0, Q, Q', `sub %g0, R, R') +1:') + retl + ifelse(OP, `div', `mov Q, %o0', `mov R, %o0') |