From 5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Thu, 8 Jun 2017 15:39:03 -0400 Subject: Prepare for radical source tree reorganization. All top-level files and directories are moved into a temporary storage directory, REORG.TODO, except for files that will certainly still exist in their current form at top level when we're done (COPYING, COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which are moved to the new directory OldChangeLogs, instead), and the generated file INSTALL (which is just deleted; in the new order, there will be no generated files checked into version control). --- REORG.TODO/sysdeps/sparc/sparc32/rem.S | 363 +++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 REORG.TODO/sysdeps/sparc/sparc32/rem.S (limited to 'REORG.TODO/sysdeps/sparc/sparc32/rem.S') diff --git a/REORG.TODO/sysdeps/sparc/sparc32/rem.S b/REORG.TODO/sysdeps/sparc/sparc32/rem.S new file mode 100644 index 0000000000..349d7c0115 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc32/rem.S @@ -0,0 +1,363 @@ + /* This file is generated from divrem.m4; DO NOT EDIT! */ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .rem name of function to generate + * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 + * true true=true => signed; true=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + + + +#include +#include + +ENTRY(.rem) + ! compute sign of result; if neither is negative, no problem + orcc %o1, %o0, %g0 ! either negative? + bge 2f ! no, go do the divide + mov %o0, %g3 ! sign of remainder matches %o0 + tst %o1 + bge 1f + tst %o0 + ! %o1 is definitely negative; %o0 might also be negative + bge 2f ! if %o0 not negative... + sub %g0, %o1, %o1 ! in any case, make %o1 nonneg +1: ! %o0 is negative, %o1 is nonnegative + sub %g0, %o0, %o0 ! make %o0 nonnegative +2: + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu LOC(got_result) ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu LOC(not_really_big) + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g2 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g2. + 2: addcc %o5, %o5, %o5 + bcc LOC(not_too_big) + add %g2, 1, %g2 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b LOC(do_single_div) + sub %g2, 1, %g2 + + LOC(not_too_big): + 3: cmp %o5, %o3 + blu 2b + nop + be LOC(do_single_div) + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g2 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + LOC(do_single_div): + subcc %g2, 1, %g2 + bl LOC(end_regular_divide) + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b LOC(end_single_divloop) + nop + LOC(single_divloop): + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + LOC(end_single_divloop): + subcc %g2, 1, %g2 + bge LOC(single_divloop) + tst %o3 + b,a LOC(end_regular_divide) + +LOC(not_really_big): +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be LOC(got_result) + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +LOC(divloop): + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl LOC(1.16) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl LOC(2.17) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl LOC(3.19) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl LOC(4.23) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +LOC(4.23): + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +LOC(3.19): + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl LOC(4.21) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +LOC(4.21): + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + + + +LOC(2.17): + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl LOC(3.17) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl LOC(4.19) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +LOC(4.19): + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + + +LOC(3.17): + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl LOC(4.17) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +LOC(4.17): + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + + + + +LOC(1.16): + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl LOC(2.15) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl LOC(3.15) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl LOC(4.15) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +LOC(4.15): + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + + +LOC(3.15): + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl LOC(4.13) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +LOC(4.13): + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + + + +LOC(2.15): + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl LOC(3.13) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl LOC(4.11) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +LOC(4.11): + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + + +LOC(3.13): + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl LOC(4.9) + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +LOC(4.9): + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + + + + 9: +LOC(end_regular_divide): + subcc %o4, 1, %o4 + bge LOC(divloop) + tst %o3 + bl,a LOC(got_result) + ! non-restoring fixup here (one instruction only!) + add %o3, %o1, %o3 + + +LOC(got_result): + ! check to see if answer should be < 0 + tst %g3 + bl,a 1f + sub %g0, %o3, %o3 +1: + retl + mov %o3, %o0 + +END(.rem) -- cgit 1.4.1