diff options
Diffstat (limited to 'sysdeps/sparc')
42 files changed, 1564 insertions, 1494 deletions
diff --git a/sysdeps/sparc/configure b/sysdeps/sparc/configure deleted file mode 100755 index a8f815a577..0000000000 --- a/sysdeps/sparc/configure +++ /dev/null @@ -1,4 +0,0 @@ - # Local configure fragment for sysdeps/sparc. - -# The assembler on SPARC needs the -fPIC flag even when it's assembler code. -ASFLAGS_SO=-fPIC diff --git a/sysdeps/sparc/configure.in b/sysdeps/sparc/configure.in deleted file mode 100644 index e71a7e30fa..0000000000 --- a/sysdeps/sparc/configure.in +++ /dev/null @@ -1,6 +0,0 @@ -sinclude(./aclocal.m4)dnl Autoconf lossage -GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. -# Local configure fragment for sysdeps/sparc. - -# The assembler on SPARC needs the -fPIC flag even when it's assembler code. -ASFLAGS_SO=-fPIC diff --git a/sysdeps/sparc/fpu/bits/mathdef.h b/sysdeps/sparc/fpu/bits/mathdef.h index 0bc9c94ecc..505d724fd0 100644 --- a/sysdeps/sparc/fpu/bits/mathdef.h +++ b/sysdeps/sparc/fpu/bits/mathdef.h @@ -62,3 +62,7 @@ typedef double double_t; #define INFINITY HUGE_VAL #endif + +/* The values returned by `ilogb' for 0 and NaN respectively. */ +#define FP_ILOGB0 0x80000001 +#define FP_ILOGBNAN 0x7fffffff diff --git a/sysdeps/sparc/fpu/fraiseexcpt.c b/sysdeps/sparc/fpu/fraiseexcpt.c index 308c9b2e7f..28db8b360a 100644 --- a/sysdeps/sparc/fpu/fraiseexcpt.c +++ b/sysdeps/sparc/fpu/fraiseexcpt.c @@ -20,14 +20,16 @@ #include <fenv.h> #include <math.h> -static void -ignore_me(double foo) -{ -} - void feraiseexcept (int excepts) { + static volatile double sink; + static const struct { + double zero, one, max, min, sixteen, pi; + } c = { + 0.0, 1.0, DBL_MAX, DBL_MIN, 16.0, M_PI + }; + /* Raise exceptions represented by EXPECTS. But we must raise only one signal at a time. It is important the if the overflow/underflow exception and the inexact exception are given at the same time, @@ -37,30 +39,30 @@ feraiseexcept (int excepts) if ((FE_INVALID & excepts) != 0) { /* One example of a invalid operation is 0/0. */ - ignore_me (0.0 / 0.0); + sink = c.zero / c.zero; } /* Next: division by zero. */ if ((FE_DIVBYZERO & excepts) != 0) { - ignore_me (1.0 / 0.0); + sink = c.one / c.zero; } /* Next: overflow. */ if ((FE_OVERFLOW & excepts) != 0) { - ignore_me (LDBL_MAX * LDBL_MAX); + sink = c.max * c.max; } /* Next: underflow. */ if ((FE_UNDERFLOW & excepts) != 0) { - ignore_me (LDBL_MIN / 16.0); + sink = c.min / c.sixteen; } /* Last: inexact. */ if ((FE_INEXACT & excepts) != 0) { - ignore_me (1.0 / M_PI); + sink = c.one / c.pi; } } diff --git a/sysdeps/sparc/sparc32/Makefile b/sysdeps/sparc/sparc32/Makefile index deec2f8145..c7c867ef22 100644 --- a/sysdeps/sparc/sparc32/Makefile +++ b/sysdeps/sparc/sparc32/Makefile @@ -37,7 +37,7 @@ divrem := sdiv udiv rem urem +divrem-S-rem := true +divrem-S-udiv := false +divrem-S-urem := false -$(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4 +$(divrem:%=$(sysdep_dir)/sparc/sparc32/%.S): $(sysdep_dir)/sparc/sparc32/divrem.m4 (echo "define(NAME,\`.$(+divrem-NAME)')\ define(OP,\`$(+divrem-OP-$(+divrem-NAME))')\ define(S,\`$(+divrem-S-$(+divrem-NAME))')\ @@ -48,4 +48,4 @@ $(divrem:%=$(sysdep_dir)/sparc/%.S): $(sysdep_dir)/sparc/divrem.m4 mv -f $@-tmp $@ test ! -d CVS || cvs commit -m'Regenerated from $<' $@ -sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/%.S) +sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/sparc32/%.S) diff --git a/sysdeps/sparc/sparc32/__longjmp.S b/sysdeps/sparc/sparc32/__longjmp.S index 36e1c170dd..651ede4c15 100644 --- a/sysdeps/sparc/sparc32/__longjmp.S +++ b/sysdeps/sparc/sparc32/__longjmp.S @@ -22,38 +22,43 @@ #include <bits/setjmp.h> #define ENV(reg) [%g1 + (reg * 4)] -ENTRY (__longjmp) +ENTRY(__longjmp) /* Store our arguments in global registers so we can still use them while unwinding frames and their register windows. */ mov %o0, %g1 /* ENV in %g1 */ - orcc %o1, %g0, %g6 /* VAL in %g6 */ + orcc %o1, %g0, %g2 /* VAL in %g2 */ be,a 0f /* Branch if zero; else skip delay slot. */ - mov 1, %g6 /* Delay slot only hit if zero: VAL = 1. */ + mov 1, %g2 /* Delay slot only hit if zero: VAL = 1. */ 0: - - /* Cache target FP in register %g7. */ - ld ENV (JB_FP), %g7 + /* Cache target FP in register %g3. */ + ld ENV(JB_FP), %g3 /* Now we will loop, unwinding the register windows up the stack - until the restored %fp value matches the target value in %g7. */ + until the restored %fp value matches the target value in %g3. */ -loop: cmp %fp, %g7 /* Have we reached the target frame? */ - bl,a loop /* Loop while current fp is below target. */ +LOC(loop): + cmp %fp, %g3 /* Have we reached the target frame? */ + bl,a LOC(loop) /* Loop while current fp is below target. */ restore /* Unwind register window in delay slot. */ - be,a found /* Better have hit it exactly. */ - ld ENV (JB_SP), %o0 /* Delay slot: extract target SP. */ + be,a LOC(found) /* Better have hit it exactly. */ + ld ENV(JB_SP), %o0 /* Delay slot: extract target SP. */ -bogus: /* Get here only if the jmp_buf or stack is clobbered. */ - call C_SYMBOL_NAME (abort) - nop +LOC(bogus): + /* Get here only if the jmp_buf or stack is clobbered. */ + call C_SYMBOL_NAME(abort) + nop unimp 0 -found: /* We have unwound register windows so %fp matches the target. */ +LOC(found): + /* We have unwound register windows so %fp matches the target. */ cmp %o0, %sp /* Check jmp_buf SP vs register window. */ - bge,a sp_ok /* Saved must not be deeper than register. */ + bge,a LOC(sp_ok) /* Saved must not be deeper than register. */ mov %o0, %sp /* OK, install new SP. */ - b,a bogus /* Bogus, we lose. */ + b,a LOC(bogus) /* Bogus, we lose. */ -sp_ok: ld ENV (JB_PC), %o0 /* Extract target return PC. */ +LOC(sp_ok): + ld ENV(JB_PC), %o0 /* Extract target return PC. */ jmp %o0 + 8 /* Return there. */ - mov %g6, %o0 /* Delay slot: set return value. */ + mov %g2, %o0 /* Delay slot: set return value. */ + +END(__longjmp) diff --git a/sysdeps/sparc/sparc32/add_n.S b/sysdeps/sparc/sparc32/add_n.S index 9852c256aa..5a6fccbbee 100644 --- a/sysdeps/sparc/sparc32/add_n.S +++ b/sysdeps/sparc/sparc32/add_n.S @@ -1,20 +1,20 @@ ! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store ! sum in a third limb vector. - -! Copyright (C) 1995, 1996 Free Software Foundation, Inc. - +! +! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +! ! This file is part of the GNU MP Library. - +! ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Library General Public License as published by ! the Free Software Foundation; either version 2 of the License, or (at your ! option) any later version. - +! ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ! License for more details. - +! ! You should have received a copy of the GNU Library General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @@ -22,205 +22,217 @@ ! INPUT PARAMETERS -#define res_ptr %o0 -#define s1_ptr %o1 -#define s2_ptr %o2 -#define size %o3 +#define RES_PTR %o0 +#define S1_PTR %o1 +#define S2_PTR %o2 +#define SIZE %o3 -#include "sysdep.h" +#include <sysdep.h> - .text - .align 4 - .global C_SYMBOL_NAME(__mpn_add_n) -C_SYMBOL_NAME(__mpn_add_n): - xor s2_ptr,res_ptr,%g1 +ENTRY(__mpn_add_n) + xor S2_PTR,RES_PTR,%g1 andcc %g1,4,%g0 - bne L1 ! branch if alignment differs - nop + bne LOC(1) ! branch if alignment differs + nop ! ** V1a ** -L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 - be L_v1 ! if no, branch - nop -/* Add least significant limb separately to align res_ptr and s2_ptr */ - ld [s1_ptr],%g4 - add s1_ptr,4,s1_ptr - ld [s2_ptr],%g2 - add s2_ptr,4,s2_ptr - add size,-1,size +LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 + be LOC(v1) ! if no, branch + nop +/* Add least significant limb separately to align RES_PTR and S2_PTR */ + ld [S1_PTR],%g4 + add S1_PTR,4,S1_PTR + ld [S2_PTR],%g2 + add S2_PTR,4,S2_PTR + add SIZE,-1,SIZE addcc %g4,%g2,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr -L_v1: addx %g0,%g0,%o4 ! save cy in register - cmp size,2 ! if size < 2 ... - bl Lend2 ! ... branch to tail code + st %o4,[RES_PTR] + add RES_PTR,4,RES_PTR +LOC(v1): + addx %g0,%g0,%o4 ! save cy in register + cmp SIZE,2 ! if SIZE < 2 ... + bl LOC(end2) ! ... branch to tail code subcc %g0,%o4,%g0 ! restore cy - ld [s1_ptr+0],%g4 - addcc size,-10,size - ld [s1_ptr+4],%g1 - ldd [s2_ptr+0],%g2 - blt Lfin1 + ld [S1_PTR+0],%g4 + addcc SIZE,-10,SIZE + ld [S1_PTR+4],%g1 + ldd [S2_PTR+0],%g2 + blt LOC(fin1) subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 8 limbs until less than 8 limbs remain */ -Loop1: addxcc %g4,%g2,%o4 - ld [s1_ptr+8],%g4 +LOC(loop1): + addxcc %g4,%g2,%o4 + ld [S1_PTR+8],%g4 addxcc %g1,%g3,%o5 - ld [s1_ptr+12],%g1 - ldd [s2_ptr+8],%g2 - std %o4,[res_ptr+0] + ld [S1_PTR+12],%g1 + ldd [S2_PTR+8],%g2 + std %o4,[RES_PTR+0] addxcc %g4,%g2,%o4 - ld [s1_ptr+16],%g4 + ld [S1_PTR+16],%g4 addxcc %g1,%g3,%o5 - ld [s1_ptr+20],%g1 - ldd [s2_ptr+16],%g2 - std %o4,[res_ptr+8] + ld [S1_PTR+20],%g1 + ldd [S2_PTR+16],%g2 + std %o4,[RES_PTR+8] addxcc %g4,%g2,%o4 - ld [s1_ptr+24],%g4 + ld [S1_PTR+24],%g4 addxcc %g1,%g3,%o5 - ld [s1_ptr+28],%g1 - ldd [s2_ptr+24],%g2 - std %o4,[res_ptr+16] + ld [S1_PTR+28],%g1 + ldd [S2_PTR+24],%g2 + std %o4,[RES_PTR+16] addxcc %g4,%g2,%o4 - ld [s1_ptr+32],%g4 + ld [S1_PTR+32],%g4 addxcc %g1,%g3,%o5 - ld [s1_ptr+36],%g1 - ldd [s2_ptr+32],%g2 - std %o4,[res_ptr+24] + ld [S1_PTR+36],%g1 + ldd [S2_PTR+32],%g2 + std %o4,[RES_PTR+24] addx %g0,%g0,%o4 ! save cy in register - addcc size,-8,size - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge Loop1 + addcc SIZE,-8,SIZE + add S1_PTR,32,S1_PTR + add S2_PTR,32,S2_PTR + add RES_PTR,32,RES_PTR + bge LOC(loop1) subcc %g0,%o4,%g0 ! restore cy -Lfin1: addcc size,8-2,size - blt Lend1 +LOC(fin1): + addcc SIZE,8-2,SIZE + blt LOC(end1) subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 2 limbs until less than 2 limbs remain */ -Loope1: addxcc %g4,%g2,%o4 - ld [s1_ptr+8],%g4 +LOC(loope1): + addxcc %g4,%g2,%o4 + ld [S1_PTR+8],%g4 addxcc %g1,%g3,%o5 - ld [s1_ptr+12],%g1 - ldd [s2_ptr+8],%g2 - std %o4,[res_ptr+0] + ld [S1_PTR+12],%g1 + ldd [S2_PTR+8],%g2 + std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register - addcc size,-2,size - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge Loope1 + addcc SIZE,-2,SIZE + add S1_PTR,8,S1_PTR + add S2_PTR,8,S2_PTR + add RES_PTR,8,RES_PTR + bge LOC(loope1) subcc %g0,%o4,%g0 ! restore cy -Lend1: addxcc %g4,%g2,%o4 +LOC(end1): + addxcc %g4,%g2,%o4 addxcc %g1,%g3,%o5 - std %o4,[res_ptr+0] + std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register - andcc size,1,%g0 - be Lret1 + andcc SIZE,1,%g0 + be LOC(ret1) subcc %g0,%o4,%g0 ! restore cy /* Add last limb */ - ld [s1_ptr+8],%g4 - ld [s2_ptr+8],%g2 + ld [S1_PTR+8],%g4 + ld [S2_PTR+8],%g2 addxcc %g4,%g2,%o4 - st %o4,[res_ptr+8] + st %o4,[RES_PTR+8] -Lret1: retl +LOC(ret1): + retl addx %g0,%g0,%o0 ! return carry-out from most sign. limb -L1: xor s1_ptr,res_ptr,%g1 +LOC(1): xor S1_PTR,RES_PTR,%g1 andcc %g1,4,%g0 - bne L2 + bne LOC(2) nop ! ** V1b ** - mov s2_ptr,%g1 - mov s1_ptr,s2_ptr - b L0 - mov %g1,s1_ptr + mov S2_PTR,%g1 + mov S1_PTR,S2_PTR + b LOC(0) + mov %g1,S1_PTR ! ** V2 ** -/* If we come here, the alignment of s1_ptr and res_ptr as well as the - alignment of s2_ptr and res_ptr differ. Since there are only two ways +/* If we come here, the alignment of S1_PTR and RES_PTR as well as the + alignment of S2_PTR and RES_PTR differ. Since there are only two ways things can be aligned (that we care about) we now know that the alignment - of s1_ptr and s2_ptr are the same. */ + of S1_PTR and S2_PTR are the same. */ -L2: cmp size,1 - be Ljone +LOC(2): cmp SIZE,1 + be LOC(jone) nop - andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 - be L_v2 ! if no, branch + andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 + be LOC(v2) ! if no, branch nop -/* Add least significant limb separately to align s1_ptr and s2_ptr */ - ld [s1_ptr],%g4 - add s1_ptr,4,s1_ptr - ld [s2_ptr],%g2 - add s2_ptr,4,s2_ptr - add size,-1,size +/* Add least significant limb separately to align S1_PTR and S2_PTR */ + ld [S1_PTR],%g4 + add S1_PTR,4,S1_PTR + ld [S2_PTR],%g2 + add S2_PTR,4,S2_PTR + add SIZE,-1,SIZE addcc %g4,%g2,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr + st %o4,[RES_PTR] + add RES_PTR,4,RES_PTR -L_v2: addx %g0,%g0,%o4 ! save cy in register - addcc size,-8,size - blt Lfin2 +LOC(v2): + addx %g0,%g0,%o4 ! save cy in register + addcc SIZE,-8,SIZE + blt LOC(fin2) subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 8 limbs until less than 8 limbs remain */ -Loop2: ldd [s1_ptr+0],%g2 - ldd [s2_ptr+0],%o4 +LOC(loop2): + ldd [S1_PTR+0],%g2 + ldd [S2_PTR+0],%o4 addxcc %g2,%o4,%g2 - st %g2,[res_ptr+0] + st %g2,[RES_PTR+0] addxcc %g3,%o5,%g3 - st %g3,[res_ptr+4] - ldd [s1_ptr+8],%g2 - ldd [s2_ptr+8],%o4 + st %g3,[RES_PTR+4] + ldd [S1_PTR+8],%g2 + ldd [S2_PTR+8],%o4 addxcc %g2,%o4,%g2 - st %g2,[res_ptr+8] + st %g2,[RES_PTR+8] addxcc %g3,%o5,%g3 - st %g3,[res_ptr+12] - ldd [s1_ptr+16],%g2 - ldd [s2_ptr+16],%o4 + st %g3,[RES_PTR+12] + ldd [S1_PTR+16],%g2 + ldd [S2_PTR+16],%o4 addxcc %g2,%o4,%g2 - st %g2,[res_ptr+16] + st %g2,[RES_PTR+16] addxcc %g3,%o5,%g3 - st %g3,[res_ptr+20] - ldd [s1_ptr+24],%g2 - ldd [s2_ptr+24],%o4 + st %g3,[RES_PTR+20] + ldd [S1_PTR+24],%g2 + ldd [S2_PTR+24],%o4 addxcc %g2,%o4,%g2 - st %g2,[res_ptr+24] + st %g2,[RES_PTR+24] addxcc %g3,%o5,%g3 - st %g3,[res_ptr+28] + st %g3,[RES_PTR+28] addx %g0,%g0,%o4 ! save cy in register - addcc size,-8,size - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge Loop2 + addcc SIZE,-8,SIZE + add S1_PTR,32,S1_PTR + add S2_PTR,32,S2_PTR + add RES_PTR,32,RES_PTR + bge LOC(loop2) subcc %g0,%o4,%g0 ! restore cy -Lfin2: addcc size,8-2,size - blt Lend2 +LOC(fin2): + addcc SIZE,8-2,SIZE + blt LOC(end2) subcc %g0,%o4,%g0 ! restore cy -Loope2: ldd [s1_ptr+0],%g2 - ldd [s2_ptr+0],%o4 +LOC(loope2): + ldd [S1_PTR+0],%g2 + ldd [S2_PTR+0],%o4 addxcc %g2,%o4,%g2 - st %g2,[res_ptr+0] + st %g2,[RES_PTR+0] addxcc %g3,%o5,%g3 - st %g3,[res_ptr+4] + st %g3,[RES_PTR+4] addx %g0,%g0,%o4 ! save cy in register - addcc size,-2,size - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge Loope2 + addcc SIZE,-2,SIZE + add S1_PTR,8,S1_PTR + add S2_PTR,8,S2_PTR + add RES_PTR,8,RES_PTR + bge LOC(loope2) subcc %g0,%o4,%g0 ! restore cy -Lend2: andcc size,1,%g0 - be Lret2 +LOC(end2): + andcc SIZE,1,%g0 + be LOC(ret2) subcc %g0,%o4,%g0 ! restore cy /* Add last limb */ -Ljone: ld [s1_ptr],%g4 - ld [s2_ptr],%g2 +LOC(jone): + ld [S1_PTR],%g4 + ld [S2_PTR],%g2 addxcc %g4,%g2,%o4 - st %o4,[res_ptr] + st %o4,[RES_PTR] -Lret2: retl +LOC(ret2): + retl addx %g0,%g0,%o0 ! return carry-out from most sign. limb + +END(__mpn_add_n) diff --git a/sysdeps/sparc/sparc32/addmul_1.S b/sysdeps/sparc/sparc32/addmul_1.S index 375d25db6b..5e0c381f4f 100644 --- a/sysdeps/sparc/sparc32/addmul_1.S +++ b/sysdeps/sparc/sparc32/addmul_1.S @@ -1,20 +1,20 @@ ! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add ! the result to a second limb vector. - -! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. - +! +! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc. +! ! This file is part of the GNU MP Library. - +! ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Library General Public License as published by ! the Free Software Foundation; either version 2 of the License, or (at your ! option) any later version. - +! ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ! License for more details. - +! ! You should have received a copy of the GNU Library General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @@ -22,17 +22,14 @@ ! INPUT PARAMETERS -! res_ptr o0 -! s1_ptr o1 -! size o2 -! s2_limb o3 +! RES_PTR o0 +! S1_PTR o1 +! SIZE o2 +! S2_LIMB o3 -#include "sysdep.h" +#include <sysdep.h> -.text - .align 4 - .global C_SYMBOL_NAME(__mpn_addmul_1) -C_SYMBOL_NAME(__mpn_addmul_1): +ENTRY(__mpn_addmul_1) ! Make S1_PTR and RES_PTR point at the end of their blocks ! and put (- 4 x SIZE) in index/loop counter. sll %o2,2,%o2 @@ -41,19 +38,19 @@ C_SYMBOL_NAME(__mpn_addmul_1): sub %g0,%o2,%o2 cmp %o3,0xfff - bgu Large + bgu LOC(large) nop ld [%o1+%o2],%o5 mov 0,%o0 - b L0 + b LOC(0) add %o4,-4,%o4 -Loop0: +LOC(loop0): addcc %o5,%g1,%g1 ld [%o1+%o2],%o5 addx %o0,%g0,%o0 st %g1,[%o4+%o2] -L0: wr %g0,%o3,%y +LOC(0): wr %g0,%o3,%y sra %o5,31,%g2 and %o3,%g2,%g2 andcc %g1,0,%g1 @@ -79,7 +76,7 @@ L0: wr %g0,%o3,%y addcc %g1,%o0,%g1 addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb addcc %o2,4,%o2 ! loop counter - bne Loop0 + bne LOC(loop0) ld [%o4+%o2],%o5 addcc %o5,%g1,%g1 @@ -88,17 +85,18 @@ L0: wr %g0,%o3,%y st %g1,[%o4+%o2] -Large: ld [%o1+%o2],%o5 +LOC(large): + ld [%o1+%o2],%o5 mov 0,%o0 sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 - b L1 + b LOC(1) add %o4,-4,%o4 -Loop: +LOC(loop): addcc %o5,%g3,%g3 ld [%o1+%o2],%o5 addx %o0,%g0,%o0 st %g3,[%o4+%o2] -L1: wr %g0,%o5,%y +LOC(1): wr %g0,%o5,%y and %o5,%g4,%g2 andcc %g0,%g0,%g1 mulscc %g1,%o3,%g1 @@ -138,10 +136,12 @@ L1: wr %g0,%o5,%y addcc %g3,%o0,%g3 addx %g2,%g1,%o0 addcc %o2,4,%o2 - bne Loop + bne LOC(loop) ld [%o4+%o2],%o5 addcc %o5,%g3,%g3 addx %o0,%g0,%o0 retl st %g3,[%o4+%o2] + +END(__mpn_addmul_1) diff --git a/sysdeps/sparc/sparc32/alloca.S b/sysdeps/sparc/sparc32/alloca.S index dcbd171163..1eb755afab 100644 --- a/sysdeps/sparc/sparc32/alloca.S +++ b/sysdeps/sparc/sparc32/alloca.S @@ -16,7 +16,7 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include "sysdep.h" +#include <sysdep.h> /* Code produced by Sun's C compiler calls this function with two extra arguments which it makes relocatable symbols but seem always to be @@ -30,3 +30,4 @@ ENTRY (__builtin_alloca) sub %sp, %o0, %sp /* Push some stack space. */ retl /* Return; the returned buffer leaves 96 */ add %sp, 96, %o0 /* bytes of register save area at the top. */ +END (__builtin_alloca) diff --git a/sysdeps/sparc/sparc32/bsd-_setjmp.S b/sysdeps/sparc/sparc32/bsd-_setjmp.S index bf49765f4d..4e6a2da560 100644 --- a/sysdeps/sparc/sparc32/bsd-_setjmp.S +++ b/sysdeps/sparc/sparc32/bsd-_setjmp.S @@ -1,40 +1 @@ -/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. Sparc version. - Copyright (C) 1994, 1997 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -#include <sysdep.h> - -ENTRY (_setjmp) - -#ifdef PIC - save %sp, -64, %sp -1: call 2f - sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1 -2: or %l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1 - add %g1, %o7, %g1 - sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g2 - restore - or %g2, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g2 - ld [%g1+%g2], %g1 -#else - sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g1 - or %g1, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g1 -#endif - - jmp %g1 - mov %g0, %o1 /* Pass second argument of zero. */ +/* _setjmp is in setjmp.S */ diff --git a/sysdeps/sparc/sparc32/bsd-setjmp.S b/sysdeps/sparc/sparc32/bsd-setjmp.S index 16dc260765..1da848d2f1 100644 --- a/sysdeps/sparc/sparc32/bsd-setjmp.S +++ b/sysdeps/sparc/sparc32/bsd-setjmp.S @@ -1,40 +1 @@ -/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. Sparc version. - Copyright (C) 1994, 1997 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -#include <sysdep.h> - -ENTRY (setjmp) - -#ifdef PIC - save %sp, -64, %sp -1: call 2f - sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1 -2: or %l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %g1 - add %g1, %o7, %g1 - sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g2 - restore - or %g2, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g2 - ld [%g1+%g2], %g1 -#else - sethi %hi(C_SYMBOL_NAME(__sigsetjmp)), %g1 - or %g1, %lo(C_SYMBOL_NAME(__sigsetjmp)), %g1 -#endif - - jmp %g1 - mov 1, %o1 /* Pass second argument of one. */ +/* setjmp is in setjmp.S */ diff --git a/sysdeps/sparc/sparc32/divrem.m4 b/sysdeps/sparc/sparc32/divrem.m4 index 665abf11ae..30d532ad77 100644 --- a/sysdeps/sparc/sparc32/divrem.m4 +++ b/sysdeps/sparc/sparc32/divrem.m4 @@ -47,8 +47,8 @@ define(V, `%o5')dnl dnl dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d define(T, `%g1')dnl -define(SC, `%g7')dnl -ifelse(S, `true', `define(SIGN, `%g6')')dnl +define(SC, `%g2')dnl +ifelse(S, `true', `define(SIGN, `%g3')')dnl dnl dnl This is the recursive definition for developing quotient digits. @@ -65,7 +65,7 @@ dnl modified to reflect the output R. dnl define(DEVELOP_QUOTIENT_BITS, ` ! depth $1, accumulated bits $2 - bl L.$1.eval(2**N+$2) + bl LOC($1.eval(2**N+$2)) srl V,1,V ! remainder is positive subcc R,V,R @@ -73,7 +73,7 @@ define(DEVELOP_QUOTIENT_BITS, ` b 9f add Q, ($2*2+1), Q ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')') -L.$1.eval(2**N+$2): +LOC($1.eval(2**N+$2)): ! remainder is negative addcc R,V,R ifelse($1, N, @@ -82,18 +82,10 @@ L.$1.eval(2**N+$2): ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')') ifelse($1, 1, `9:')')dnl -#include "sysdep.h" -#ifdef __linux__ -#include <asm/traps.h> -#else -#ifdef __svr4__ +#include <sysdep.h> #include <sys/trap.h> -#else -#include <machine/trap.h> -#endif -#endif -FUNC(NAME) +ENTRY(NAME) ifelse(S, `true', ` ! compute sign of result; if neither is negative, no problem orcc divisor, dividend, %g0 ! either negative? @@ -124,11 +116,11 @@ ifelse(OP, `div', 1: cmp R, V ! if divisor exceeds dividend, done - blu Lgot_result ! (and algorithm fails otherwise) + blu LOC(got_result) ! (and algorithm fails otherwise) clr Q sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T cmp R, T - blu Lnot_really_big + blu LOC(not_really_big) clr ITER ! `Here the dividend is >= 2**(31-N) or so. We must be careful here, @@ -146,7 +138,7 @@ ifelse(OP, `div', ! Now compute SC. 2: addcc V, V, V - bcc Lnot_too_big + bcc LOC(not_too_big) add SC, 1, SC ! We get here if the divisor overflowed while shifting. @@ -155,14 +147,14 @@ ifelse(OP, `div', sll T, TOPBITS, T ! high order bit srl V, 1, V ! rest of V add V, T, V - b Ldo_single_div + b LOC(do_single_div) sub SC, 1, SC - Lnot_too_big: + LOC(not_too_big): 3: cmp V, R blu 2b nop - be Ldo_single_div + be LOC(do_single_div) nop /* NB: these are commented out in the V8-Sparc manual as well */ /* (I do not understand this) */ @@ -177,15 +169,15 @@ ifelse(OP, `div', ! order bit set in the first step, just falling into the regular ! division loop will mess up the first time around. ! So we unroll slightly... - Ldo_single_div: + LOC(do_single_div): subcc SC, 1, SC - bl Lend_regular_divide + bl LOC(end_regular_divide) nop sub R, V, R mov 1, Q - b Lend_single_divloop + b LOC(end_single_divloop) nop - Lsingle_divloop: + LOC(single_divloop): sll Q, 1, Q bl 1f srl V, 1, V @@ -197,37 +189,37 @@ ifelse(OP, `div', add R, V, R sub Q, 1, Q 2: - Lend_single_divloop: + LOC(end_single_divloop): subcc SC, 1, SC - bge Lsingle_divloop + bge LOC(single_divloop) tst R - b,a Lend_regular_divide + b,a LOC(end_regular_divide) -Lnot_really_big: +LOC(not_really_big): 1: sll V, N, V cmp V, R bleu 1b addcc ITER, 1, ITER - be Lgot_result + be LOC(got_result) sub ITER, 1, ITER tst R ! set up for initial iteration -Ldivloop: +LOC(divloop): sll Q, N, Q DEVELOP_QUOTIENT_BITS(1, 0) -Lend_regular_divide: +LOC(end_regular_divide): subcc ITER, 1, ITER - bge Ldivloop + bge LOC(divloop) tst R - bl,a Lgot_result + bl,a LOC(got_result) ! non-restoring fixup here (one instruction only!) ifelse(OP, `div', ` sub Q, 1, Q ', ` add R, divisor, R ') -Lgot_result: +LOC(got_result): ifelse(S, `true', ` ! check to see if answer should be < 0 tst SIGN @@ -236,3 +228,5 @@ ifelse(S, `true', 1:') retl ifelse(OP, `div', `mov Q, %o0', `mov R, %o0') + +END(NAME) diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h index f7bcba0c2f..7c547f24ba 100644 --- a/sysdeps/sparc/sparc32/dl-machine.h +++ b/sysdeps/sparc/sparc32/dl-machine.h @@ -30,7 +30,7 @@ #define OPCODE_CALL 0x40000000 /* call ?; add PC-rel word address */ #define OPCODE_SETHI_G1 0x03000000 /* sethi ?, %g1; add value>>10 */ #define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */ -#define OPCODE_SAVE_SP64 0x9de3bfc0 /* save %sp, -64, %sp */ +#define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */ /* Return nonzero iff E_MACHINE is compatible with the running host. */ @@ -55,7 +55,7 @@ elf_machine_dynamic (void) static inline Elf32_Addr elf_machine_load_address (void) { - register Elf32_Addr pc __asm("%o7"), got; + register Elf32_Addr pc __asm("%o7"), pic __asm("%l7"), got; /* Utilize the fact that a local .got entry will be partially initialized at startup awaiting its RELATIVE fixup. */ @@ -64,50 +64,187 @@ elf_machine_load_address (void) ".Load_address:\n\t" "call 1f\n\t" "or %1,%%lo(.Load_address),%1\n" - "1:\tld [%%l7+%1],%1" - : "=r"(pc), "=r"(got)); + "1:\tld [%2+%1],%1" + : "=r"(pc), "=r"(got) : "r"(pic)); return pc - got; } - Elf32_Addr addr; - - asm ( - "add %%fp,0x44,%%o2\n\t" /* o2 = point to argc */ - "ld [%%o2 - 4],%%o0\n\t" /* o0 = load argc */ - "sll %%o0, 2, %%o0\n\t" /* o0 = argc * sizeof (int) */ - "add %%o2,%%o0,%%o2\n\t" /* o2 = skip over argv */ - "add %%o2,4,%%o2\n\t" /* skip over null after argv */ - - /* Now %o2 is pointing to env, skip over that as well. */ - "1:\n\t" - "ld [%%o2],%%o0\n\t" - "cmp %%o0,0\n\t" - "bnz 1b\n\t" - "add %%o2,4,%%o2\n\t" - - /* Note that above, we want to advance the NULL after envp so - we always add 4. */ - - /* Now, search for the AT_BASE property. */ - "2:\n\t" - "ld [%%o2],%%o0\n\t" - "cmp %%o0,0\n\t" - "be,a 3f\n\t" - "or %%g0,%%g0,%0\n\t" - "cmp %%o0,7\n\t" /* AT_BASE = 7 */ - "be,a 3f\n\t" - "ld [%%o2+4],%0\n\t" - "b 2b\n\t" - "add %%o2,8,%%o2\n\t" - /* At this point %0 has the load address for the interpreter */ - "3:\n\t" - : "=r" (addr) - : /* no inputs */ - : "o0", "o2"); - return addr; +/* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + +static inline int +elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) +{ + Elf32_Addr *plt; + extern void _dl_runtime_resolve (Elf32_Word); + + if (l->l_info[DT_JMPREL] && lazy) + { + /* The entries for functions in the PLT have not yet been filled in. + Their initial contents will arrange when called to set the high 22 + bits of %g1 with an offset into the .rela.plt section and jump to + the beginning of the PLT. */ + plt = (Elf32_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr); + + /* The beginning of the PLT does: + + save %sp, -64, %sp + pltpc: call _dl_runtime_resolve + nop + .word MAP + + This saves the register window containing the arguments, and the + PC value (pltpc) implicitly saved in %o7 by the call points near the + location where we store the link_map pointer for this object. */ + + plt[0] = OPCODE_SAVE_SP; + /* Construct PC-relative word address. */ + plt[1] = OPCODE_CALL | (((Elf32_Addr) &_dl_runtime_resolve - + (Elf32_Addr) &plt[1]) >> 2); + plt[2] = OPCODE_NOP; /* Fill call delay slot. */ + plt[3] = (Elf32_Addr) l; + } + + return lazy; } +/* This code is used in dl-runtime.c to call the `fixup' function + and then redirect to the address it returns. */ +#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\ + .globl _dl_runtime_resolve + .type _dl_runtime_resolve, @function +_dl_runtime_resolve: + /* Set up the arguments to fixup -- + %o0 = link_map out of plt0 + %o1 = offset of reloc entry */ + ld [%o7 + 8], %o0 + srl %g1, 10, %o1 + call fixup + sub %o1, 4*12, %o1 + jmp %o0 + restore + .size _dl_runtime_resolve, . - _dl_runtime_resolve"); + +/* The address of the JMP_SLOT reloc is the .plt entry, thus we don't + dereference the reloc's addr to get the final destination. Ideally + there would be a generic way to return the value of the symbol from + elf_machine_relplt, but as it is, the address of the .plt entry is + good enough. */ +#define ELF_FIXUP_RETURN_VALUE(map, result) ((Elf32_Addr) &(result)) + +/* Nonzero iff TYPE should not be allowed to resolve to one of + the main executable's symbols, as for a COPY reloc. */ +#define elf_machine_lookup_noexec_p(type) ((type) == R_SPARC_COPY) + +/* Nonzero iff TYPE describes relocation of a PLT entry, so + PLT entries should not be allowed to define the value. */ +#define elf_machine_lookup_noplt_p(type) ((type) == R_SPARC_JMP_SLOT) + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_RELOC_NOPLT R_SPARC_JMP_SLOT + +/* The SPARC never uses Elf32_Rel relocations. */ +#define ELF_MACHINE_NO_REL 1 + +/* The SPARC overlaps DT_RELA and DT_PLTREL. */ +#define ELF_MACHINE_PLTREL_OVERLAP 1 + +/* The PLT uses Elf32_Rela relocs. */ +#define elf_machine_relplt elf_machine_rela + +/* Initial entry point code for the dynamic linker. + The C function `_dl_start' is the real entry point; + its return value is the user program's entry point. */ + +#define RTLD_START __asm__ ("\ +.text + .globl _start + .type _start,@function +_start: + /* Allocate space for functions to drop their arguments. */ + sub %sp, 6*4, %sp + /* Pass pointer to argument block to _dl_start. */ + call _dl_start + add %sp, 22*4, %o0 + /* FALTHRU */ + .globl _dl_start_user + .type _dl_start_user,@function +_dl_start_user: + /* Load the PIC register. */ +1: call 2f + sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %l7 +2: or %l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %l7 + add %l7, %o7, %l7 + /* Save the user entry point address in %l0 */ + mov %o0, %l0 + /* See if we were run as a command with the executable file name as an + extra leading argument. If so, adjust the contents of the stack. */ + sethi %hi(_dl_skip_args), %g2 + or %g2, %lo(_dl_skip_args), %g2 + ld [%l7+%g2], %i0 + ld [%i0], %i0 + tst %i0 + beq 3f + nop + /* Find out how far to shift. */ + ld [%sp+22*4], %i1 /* load argc */ + sub %i1, %i0, %i1 + sll %i0, 2, %i2 + st %i1, [%sp+22*4] + add %sp, 23*4, %i1 + add %i1, %i2, %i2 + /* Copy down argv */ +21: ld [%i2], %i3 + add %i2, 4, %i2 + tst %i3 + st %i3, [%i1] + bne 21b + add %i1, 4, %i1 + /* Copy down env */ +22: ld [%i2], %i3 + add %i2, 4, %i2 + tst %i3 + st %i3, [%i1] + bne 22b + add %i1, 4, %i1 + /* Copy down auxiliary table. */ +23: ld [%i2], %i3 + ld [%i2+4], %i4 + add %i2, 8, %i2 + tst %i3 + st %i3, [%i1] + st %i4, [%i1+4] + bne 23b + add %i1, 8, %i1 + /* Load _dl_default_scope[2] to pass to _dl_init_next. */ +3: sethi %hi(_dl_default_scope), %g1 + or %g1, %lo(_dl_default_scope), %g1 + ld [%l7+%g1], %l1 + ld [%l1+2*4], %l1 + /* Call _dl_init_next to return the address of an initializer to run. */ +4: call _dl_init_next + mov %l1, %o0 + tst %o0 + beq 5f + nop + jmpl %o0, %o7 + nop + ba,a 4b + /* Clear the startup flag. */ +5: sethi %hi(_dl_starting_up), %g1 + or %g1, %lo(_dl_starting_up), %g1 + ld [%l7+%g1], %g1 + st %g0, [%g1] + /* Pass our finalizer function to the user in %g1. */ + sethi %hi(_dl_fini), %g1 + or %g1, %lo(_dl_fini), %g1 + ld [%l7+%g1], %g1 + /* Jump to the user's entry point and deallocate the extra stack we got. */ + jmp %l0 + add %sp, 6*4, %sp + .size _dl_start_user,.-_dl_start_user"); + #ifdef RESOLVE /* Perform the relocation specified by RELOC and SYM (which is fully resolved). MAP is the object containing the reloc. */ @@ -117,7 +254,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, const Elf32_Sym *sym, const struct r_found_version *version, Elf32_Addr *const reloc_addr) { - Elf32_Addr loadbase; + extern unsigned long _dl_hwcap; if (ELF32_R_TYPE (reloc->r_info) == R_SPARC_RELATIVE) { @@ -144,6 +281,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, switch (ELF32_R_TYPE (reloc->r_info)) { case R_SPARC_COPY: +#ifndef RTLD_BOOTSTRAP if (sym->st_size > refsym->st_size || (_dl_verbose && sym->st_size < refsym->st_size)) { @@ -159,14 +297,21 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, } memcpy (reloc_addr, (void *) value, MIN (sym->st_size, refsym->st_size)); +#endif break; case R_SPARC_GLOB_DAT: case R_SPARC_32: *reloc_addr = value; break; case R_SPARC_JMP_SLOT: - reloc_addr[1] = OPCODE_SETHI_G1 | (value >> 10); + /* For thread safety, write the instructions from the bottom and + flush before we overwrite the critical "b,a". */ reloc_addr[2] = OPCODE_JMP_G1 | (value & 0x3ff); + if (1 || (_dl_hwcap & 1)) /* HWCAP_SPARC_FLUSH */ + __asm __volatile ("flush %0+8" : : "r"(reloc_addr)); + reloc_addr[1] = OPCODE_SETHI_G1 | (value >> 10); + if (1 || (_dl_hwcap & 1)) /* HWCAP_SPARC_FLUSH */ + __asm __volatile ("flush %0+4" : : "r"(reloc_addr)); break; case R_SPARC_8: *(char *) reloc_addr = value; @@ -218,146 +363,3 @@ elf_machine_lazy_rel (struct link_map *map, const Elf32_Rela *reloc) } #endif /* RESOLVE */ - -/* Nonzero iff TYPE should not be allowed to resolve to one of - the main executable's symbols, as for a COPY reloc. */ -#define elf_machine_lookup_noexec_p(type) ((type) == R_SPARC_COPY) - -/* Nonzero iff TYPE describes relocation of a PLT entry, so - PLT entries should not be allowed to define the value. */ -#define elf_machine_lookup_noplt_p(type) ((type) == R_SPARC_JMP_SLOT) - -/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ -#define ELF_MACHINE_RELOC_NOPLT R_SPARC_JMP_SLOT - -/* The SPARC never uses Elf32_Rel relocations. */ -#define ELF_MACHINE_NO_REL 1 - -/* The SPARC overlaps DT_RELA and DT_PLTREL. */ -#define ELF_MACHINE_PLTREL_OVERLAP 1 - -/* Set up the loaded object described by L so its unrelocated PLT - entries will jump to the on-demand fixup code in dl-runtime.c. */ - -static inline int -elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) -{ - Elf32_Addr *plt; - extern void _dl_runtime_resolve (Elf32_Word); - - if (l->l_info[DT_JMPREL] && lazy) - { - /* The entries for functions in the PLT have not yet been filled in. - Their initial contents will arrange when called to set the high 22 - bits of %g1 with an offset into the .rela.plt section and jump to - the beginning of the PLT. */ - plt = (Elf32_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr); - - /* The beginning of the PLT does: - - save %sp, -64, %sp - pltpc: call _dl_runtime_resolve - nop - .word MAP - - This saves the register window containing the arguments, and the - PC value (pltpc) implicitly saved in %o7 by the call points near the - location where we store the link_map pointer for this object. */ - - plt[0] = OPCODE_SAVE_SP64; /* save %sp, -64, %sp */ - /* Construct PC-relative word address. */ - plt[1] = OPCODE_CALL | (((Elf32_Addr) &_dl_runtime_resolve - - (Elf32_Addr) &plt[1]) >> 2); - plt[2] = OPCODE_NOP; /* Fill call delay slot. */ - plt[3] = (Elf32_Addr *) l; - } - - return lazy; -} - -/* This code is used in dl-runtime.c to call the `fixup' function - and then redirect to the address it returns. */ -#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\ -# Trampoline for _dl_runtime_resolver - .globl _dl_runtime_resolve - .type _dl_runtime_resolve, @function -_dl_runtime_resolve: - t 1 - #call %g0 - # Pass two args to fixup: the PLT address computed from the PC saved - # in the PLT's call insn, and the reloc offset passed in %g1. - #ld [%o7 + 8], %o1 | Second arg, loaded from PLTPC[2]. - #call fixup - #shrl %g1, 22, %o0 | First arg, set in delay slot of call. - # Jump to the real function. - #jmpl %o0, %g0 - # In the delay slot of that jump, restore the register window - # saved by the first insn of the PLT. - #restore - .size _dl_runtime_resolve, . - _dl_runtime_resolve -"); - -/* The PLT uses Elf32_Rela relocs. */ -#define elf_machine_relplt elf_machine_rela - - -/* Mask identifying addresses reserved for the user program, - where the dynamic linker should not map anything. */ -#define ELF_MACHINE_USER_ADDRESS_MASK ??? - -/* Initial entry point code for the dynamic linker. - The C function `_dl_start' is the real entry point; - its return value is the user program's entry point. */ - -#define RTLD_START __asm__ ( \ -".text\n\ - .globl _start\n\ - .type _start,@function\n\ -_start:\n\ - /* Pass pointer to argument block to _dl_start. */\n\ - add %sp,64,%o0\n\ - call _dl_start\n\ - nop\n\ - \n\ - mov %o0,%l0\n\ - \n\ -2:\n\ - call 1f\n\ - nop\n\ -1:\n\ - sethi %hi(_GLOBAL_OFFSET_TABLE_-(2b-.)),%l2\n\ - sethi %hi(_dl_default_scope),%l3\n\ - or %l2,%lo(_GLOBAL_OFFSET_TABLE_-(2b-.)),%l2\n\ - or %l3,%lo(_dl_default_scope),%l3\n\ - add %o7,%l2,%l1\n\ - # %l1 has the GOT. %l3 has _dl_default_scope GOT offset\n\ - ld [%l1+%l3],%l4\n\ - # %l4 has pointer to _dl_default_scope. Now, load _dl_default_scope [2]\n\ - ld [%l4+8],%l4\n\ - # %l4 has _dl_default_scope [2]\n\ - # call _dl_init_next until it returns 0, pass _dl_default_scope [2]\n\ -3:\n\ - call _dl_init_next\n\ - mov %l4,%o0\n\ - cmp %o0,%g0\n\ - bz,a 4f\n\ - nop\n\ - call %o0\n\ - /* Pass pointer to argument block to this init function */\n\ - add %sp,64,%o0\n\ - b,a 3b\n\ -4:\n\ - # Clear the _dl_starting_up variable and pass _dl_fini in %g1 as per ELF ABI.\n\ - sethi %hi(_dl_starting_up),%l4\n\ - sethi %hi(_dl_fini),%l3\n\ - or %l4,%lo(_dl_starting_up),%l4\n\ - or %l3,%lo(_dl_fini),%l3\n\ - # clear _dl_starting_up\n\ - ld [%l1+%l4],%l5\n\ - st %g0,[%l5]\n\ - # load out fini function for atexit in %g1\n\ - ld [%l3+%l1],%g1\n\ - # jump to the user program entry point.\n\ - jmpl %l0,%g0\n\ - nop\n\ -"); diff --git a/sysdeps/sparc/sparc32/dotmul.S b/sysdeps/sparc/sparc32/dotmul.S index 7ce695cb7a..821aa8bdb4 100644 --- a/sysdeps/sparc/sparc32/dotmul.S +++ b/sysdeps/sparc/sparc32/dotmul.S @@ -10,11 +10,13 @@ * This code optimizes short (less than 13-bit) multiplies. */ -#include "sysdep.h" +#include <sysdep.h> + + ENTRY(.mul) mov %o0, %y ! multiplier -> Y andncc %o0, 0xfff, %g0 ! test bits 12..31 - be Lmul_shortway ! if zero, can do it the short way + be LOC(mul_shortway) ! if zero, can do it the short way andcc %g0, %g0, %o4 ! zero the partial product and clear N and V /* @@ -81,7 +83,7 @@ ENTRY(.mul) ! and put upper half in place #endif -Lmul_shortway: +LOC(mul_shortway): /* * Short multiply. 12 steps, followed by a final shift step. * The resulting bits are off by 12 and (32-12) = 20 bit positions, @@ -121,3 +123,5 @@ Lmul_shortway: or %o5, %o0, %o0 ! construct low part of result retl sra %o4, 20, %o1 ! ... and extract high part of result + +END(.mul) diff --git a/sysdeps/sparc/sparc32/elf/start.S b/sysdeps/sparc/sparc32/elf/start.S new file mode 100644 index 0000000000..8e01b30fc7 --- /dev/null +++ b/sysdeps/sparc/sparc32/elf/start.S @@ -0,0 +1,86 @@ +/* Startup code for elf32-sparc + Copyright (C) 1997 Free Software Foundation, Inc. + Contributed by Richard Henderson <richard@gnu.ai.mit.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + + + .section ".text" + .align 4 + .global _start + .type _start,#function +_start: + + /* Terminate the stack frame, and reserve space for functions to + drop their arguments. */ + mov %g0, %fp + sub %sp, 6*4, %sp + + /* Save %g1. When starting a binary via the dynamic linker, %g1 + contains the address of the shared library termination function, + which we will register below with atexit() to be called by exit(). + If we are statically linked, this will be NULL. */ + + /* Do essential libc initialization (sp points to argc, argv, and envp) */ + call __libc_init_first + mov %g1, %l0 + + /* Now that we have the proper stack frame, register library termination + function, if there is any: */ + + cmp %l0, 0 + beq 1f + nop + call atexit + mov %l0, %o0 +1: + + /* Extract the arguments and environment as encoded on the stack. The + argument info starts after one register window (16 words) past the SP. */ + ld [%sp+22*4], %o0 + add %sp, 23*4, %o1 + sll %o0, 4, %o2 + add %o2, %o1, %o2 + sethi %hi(__environ), %g2 + add %o2, 4, %o2 + st %o2, [%g2+%lo(__environ)] + + mov %o0, %l0 /* tuck them away */ + mov %o1, %l1 + + /* Call _init, the entry point to our own .init section. */ + call _init + mov %o2, %l2 + + /* Register our .fini section with atexit. */ + sethi %hi(_fini), %o0 + call atexit + add %o0, %lo(_fini), %o0 + + /* Call the user's main and exit with its return value. */ + mov %l0, %o0 + mov %l1, %o1 + call main + mov %l2, %o2 + call exit + nop + + /* Die very horribly if exit returns. */ + unimp + + .size _start,.-_start diff --git a/sysdeps/sparc/sparc32/elf/start.c b/sysdeps/sparc/sparc32/elf/start.c deleted file mode 100644 index f1e80195ca..0000000000 --- a/sysdeps/sparc/sparc32/elf/start.c +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (C) 1991, 1992, 1993, 1994, 1997 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -#include <errno.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> - -extern char **__environ; - -extern void __libc_init_first __P ((int argc, char **argv, char **envp)); -extern int main __P ((int argc, char **argv, char **envp)); - -register long int sp asm("%sp"), fp asm("%fp"); - -void -_start (void) -{ - /* It is important that these be declared `register'. - Otherwise, when compiled without optimization, they are put on the - stack, which loses completely after we zero the FP. */ - register int argc; - register char **argv, **envp; - register long int g1 asm ("%g1"); - unsigned long int copy_g1 = g1; - - /* Unwind the frame built when we entered the function. */ - asm("restore"); - if (copy_g1) - atexit (copy_g1); - - /* And clear the frame pointer. */ - fp = 0; - - /* The argument info starts after one register - window (64 bytes) past the SP. */ - argc = ((int *) sp)[16]; - argv = (char **) &((int *) sp)[17]; - envp = &argv[argc + 1]; - __environ = envp; - - /* Allocate 24 bytes of stack space for the register save area. */ - sp -= 24; - __libc_init_first (argc, argv, envp); -#ifdef ELF_INIT_FINI - { - extern void _fini (void); - _init (); - atexit (_fini); - } -#endif - exit (main (argc, argv, envp)); -} diff --git a/sysdeps/sparc/sparc32/fpu/bits/fenv.h b/sysdeps/sparc/sparc32/fpu/bits/fenv.h index 0560dd99b8..6a76795d6a 100644 --- a/sysdeps/sparc/sparc32/fpu/bits/fenv.h +++ b/sysdeps/sparc/sparc32/fpu/bits/fenv.h @@ -72,5 +72,5 @@ typedef unsigned int fenv_t; #endif /* For internal use only: access the fp state register. */ -#define __fenv_stfsr(X) __asm__("stfsr %0" : "=m"(X)) -#define __fenv_ldfsr(X) __asm__ __volatile__("ldfsr %0" : : "m"(X)) +#define __fenv_stfsr(X) __asm__("st %%fsr,%0" : "=m"(X)) +#define __fenv_ldfsr(X) __asm__ __volatile__("ld %0,%%fsr" : : "m"(X)) diff --git a/sysdeps/sparc/sparc32/fpu/fpu_control.h b/sysdeps/sparc/sparc32/fpu/fpu_control.h index 5ef3824702..c069318970 100644 --- a/sysdeps/sparc/sparc32/fpu/fpu_control.h +++ b/sysdeps/sparc/sparc32/fpu/fpu_control.h @@ -41,13 +41,11 @@ /* Now two recommended cw */ -/* Linux default: +/* Linux and IEEE default: - extended precision - rounding to nearest - - exceptions on overflow, zero divide and NaN */ -#define _FPU_DEFAULT 0x1e - -/* IEEE: same as above, but exceptions */ + - no exceptions. */ +#define _FPU_DEFAULT 0x0 #define _FPU_IEEE 0x0 /* Type of the control word. */ diff --git a/sysdeps/sparc/sparc32/lshift.S b/sysdeps/sparc/sparc32/lshift.S index 4f0595f2fb..35aa9378b3 100644 --- a/sysdeps/sparc/sparc32/lshift.S +++ b/sysdeps/sparc/sparc32/lshift.S @@ -1,19 +1,19 @@ -! sparc __mpn_lshift -- - -! Copyright (C) 1995, 1996 Free Software Foundation, Inc. - +! Sparc __mpn_lshift -- +! +! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +! ! This file is part of the GNU MP Library. - +! ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Library General Public License as published by ! the Free Software Foundation; either version 2 of the License, or (at your ! option) any later version. - +! ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ! License for more details. - +! ! You should have received a copy of the GNU Library General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @@ -21,17 +21,14 @@ ! INPUT PARAMETERS -! res_ptr %o0 -! src_ptr %o1 -! size %o2 -! cnt %o3 +! RES_PTR %o0 +! SRC_PTR %o1 +! SIZE %o2 +! CNT %o3 -#include "sysdep.h" +#include <sysdep.h> - .text - .align 4 - .global C_SYMBOL_NAME(__mpn_lshift) -C_SYMBOL_NAME(__mpn_lshift): +ENTRY(__mpn_lshift) sll %o2,2,%g1 add %o1,%g1,%o1 ! make %o1 point at end of src ld [%o1-4],%g2 ! load first limb @@ -40,12 +37,13 @@ C_SYMBOL_NAME(__mpn_lshift): add %o2,-1,%o2 andcc %o2,4-1,%g4 ! number of limbs in first loop srl %g2,%o5,%g1 ! compute function result - be L0 ! if multiple of 4 limbs, skip first loop + be LOC(0) ! if multiple of 4 limbs, skip first loop st %g1,[%sp+80] sub %o2,%g4,%o2 ! adjust count for main loop -Loop0: ld [%o1-8],%g3 +LOC(loop0): + ld [%o1-8],%g3 add %o0,-4,%o0 add %o1,-4,%o1 addcc %g4,-1,%g4 @@ -53,14 +51,15 @@ Loop0: ld [%o1-8],%g3 srl %g3,%o5,%g1 mov %g3,%g2 or %o4,%g1,%o4 - bne Loop0 + bne LOC(loop0) st %o4,[%o0+0] -L0: tst %o2 - be Lend +LOC(0): tst %o2 + be LOC(end) nop -Loop: ld [%o1-8],%g3 +LOC(loop): + ld [%o1-8],%g3 add %o0,-16,%o0 addcc %o2,-4,%o2 sll %g2,%o3,%o4 @@ -86,10 +85,13 @@ Loop: ld [%o1-8],%g3 add %o1,-16,%o1 or %g4,%g1,%g4 - bne Loop + bne LOC(loop) st %g4,[%o0+0] -Lend: sll %g2,%o3,%g2 +LOC(end): + sll %g2,%o3,%g2 st %g2,[%o0-4] retl ld [%sp+80],%o0 + +END(__mpn_lshift) diff --git a/sysdeps/sparc/sparc32/mul_1.S b/sysdeps/sparc/sparc32/mul_1.S index 142fd8ba2a..d39ec61028 100644 --- a/sysdeps/sparc/sparc32/mul_1.S +++ b/sysdeps/sparc/sparc32/mul_1.S @@ -1,20 +1,20 @@ ! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store ! the result in a second limb vector. - -! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. - +! +! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc. +! ! This file is part of the GNU MP Library. - +! ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Library General Public License as published by ! the Free Software Foundation; either version 2 of the License, or (at your ! option) any later version. - +! ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ! License for more details. - +! ! You should have received a copy of the GNU Library General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @@ -22,10 +22,10 @@ ! INPUT PARAMETERS -! res_ptr o0 -! s1_ptr o1 -! size o2 -! s2_limb o3 +! RES_PTR o0 +! S1_PTR o1 +! SIZE o2 +! S2_LIMB o3 ! ADD CODE FOR SMALL MULTIPLIERS! !1: ld @@ -89,12 +89,9 @@ ! sll a,29,y2 ! st x, -#include "sysdep.h" +#include <sysdep.h> -.text - .align 4 - .global C_SYMBOL_NAME(__mpn_mul_1) -C_SYMBOL_NAME(__mpn_mul_1): +ENTRY(__mpn_mul_1) ! Make S1_PTR and RES_PTR point at the end of their blocks ! and put (- 4 x SIZE) in index/loop counter. sll %o2,2,%o2 @@ -103,16 +100,16 @@ C_SYMBOL_NAME(__mpn_mul_1): sub %g0,%o2,%o2 cmp %o3,0xfff - bgu Large + bgu LOC(large) nop ld [%o1+%o2],%o5 mov 0,%o0 - b L0 + b LOC(0) add %o4,-4,%o4 -Loop0: +LOC(loop0): st %g1,[%o4+%o2] -L0: wr %g0,%o3,%y +LOC(0): wr %g0,%o3,%y sra %o5,31,%g2 and %o3,%g2,%g2 andcc %g1,0,%g1 @@ -138,21 +135,22 @@ L0: wr %g0,%o3,%y addcc %g1,%o0,%g1 addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb addcc %o2,4,%o2 ! loop counter - bne,a Loop0 + bne,a LOC(loop0) ld [%o1+%o2],%o5 retl st %g1,[%o4+%o2] -Large: ld [%o1+%o2],%o5 +LOC(large): + ld [%o1+%o2],%o5 mov 0,%o0 sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 - b L1 + b LOC(1) add %o4,-4,%o4 -Loop: +LOC(loop): st %g3,[%o4+%o2] -L1: wr %g0,%o5,%y +LOC(1): wr %g0,%o5,%y and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0 andcc %g0,%g0,%g1 mulscc %g1,%o3,%g1 @@ -192,8 +190,10 @@ L1: wr %g0,%o5,%y addcc %g3,%o0,%g3 addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb addcc %o2,4,%o2 ! loop counter - bne,a Loop + bne,a LOC(loop) ld [%o1+%o2],%o5 retl st %g3,[%o4+%o2] + +END(__mpn_mul_1) diff --git a/sysdeps/sparc/sparc32/rem.S b/sysdeps/sparc/sparc32/rem.S index d50f1af630..effacee0d0 100644 --- a/sysdeps/sparc/sparc32/rem.S +++ b/sysdeps/sparc/sparc32/rem.S @@ -37,22 +37,14 @@ -#include "sysdep.h" -#ifdef __linux__ -#include <asm/traps.h> -#else -#ifdef __svr4__ +#include <sysdep.h> #include <sys/trap.h> -#else -#include <machine/trap.h> -#endif -#endif ENTRY(.rem) ! compute sign of result; if neither is negative, no problem orcc %o1, %o0, %g0 ! either negative? bge 2f ! no, go do the divide - mov %o0, %g6 ! sign of remainder matches %o0 + mov %o0, %g3 ! sign of remainder matches %o0 tst %o1 bge 1f tst %o0 @@ -76,11 +68,11 @@ ENTRY(.rem) 1: cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) + blu LOC(got_result) ! (and algorithm fails otherwise) clr %o2 sethi %hi(1 << (32 - 4 - 1)), %g1 cmp %o3, %g1 - blu Lnot_really_big + blu LOC(not_really_big) clr %o4 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, @@ -91,15 +83,15 @@ ENTRY(.rem) 1: cmp %o5, %g1 bgeu 3f - mov 1, %g7 + mov 1, %g2 sll %o5, 4, %o5 b 1b add %o4, 1, %o4 - ! Now compute %g7. + ! Now compute %g2. 2: addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 + bcc LOC(not_too_big) + add %g2, 1, %g2 ! We get here if the %o1 overflowed while shifting. ! This means that %o3 has the high-order bit set. @@ -107,20 +99,20 @@ ENTRY(.rem) sll %g1, 4, %g1 ! high order bit srl %o5, 1, %o5 ! rest of %o5 add %o5, %g1, %o5 - b Ldo_single_div - sub %g7, 1, %g7 + b LOC(do_single_div) + sub %g2, 1, %g2 - Lnot_too_big: + LOC(not_too_big): 3: cmp %o5, %o3 blu 2b nop - be Ldo_single_div + be LOC(do_single_div) nop /* NB: these are commented out in the V8-Sparc manual as well */ /* (I do not understand this) */ ! %o5 > %o3: went too far: back up 1 step ! srl %o5, 1, %o5 - ! dec %g7 + ! dec %g2 ! do single-bit divide steps ! ! We have to be careful here. We know that %o3 >= %o5, so we can do the @@ -129,15 +121,15 @@ ENTRY(.rem) ! order bit set in the first step, just falling into the regular ! division loop will mess up the first time around. ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide + LOC(do_single_div): + subcc %g2, 1, %g2 + bl LOC(end_regular_divide) nop sub %o3, %o5, %o3 mov 1, %o2 - b Lend_single_divloop + b LOC(end_single_divloop) nop - Lsingle_divloop: + LOC(single_divloop): sll %o2, 1, %o2 bl 1f srl %o5, 1, %o5 @@ -149,221 +141,223 @@ ENTRY(.rem) add %o3, %o5, %o3 sub %o2, 1, %o2 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop + LOC(end_single_divloop): + subcc %g2, 1, %g2 + bge LOC(single_divloop) tst %o3 - b,a Lend_regular_divide + b,a LOC(end_regular_divide) -Lnot_really_big: +LOC(not_really_big): 1: sll %o5, 4, %o5 cmp %o5, %o3 bleu 1b addcc %o4, 1, %o4 - be Lgot_result + be LOC(got_result) sub %o4, 1, %o4 tst %o3 ! set up for initial iteration -Ldivloop: +LOC(divloop): sll %o2, 4, %o2 ! depth 1, accumulated bits 0 - bl L.1.16 + bl LOC(1.16) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 2, accumulated bits 1 - bl L.2.17 + bl LOC(2.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 3, accumulated bits 3 - bl L.3.19 + bl LOC(3.19) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits 7 - bl L.4.23 + bl LOC(4.23) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (7*2+1), %o2 - -L.4.23: + +LOC(4.23): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (7*2-1), %o2 - - -L.3.19: + + +LOC(3.19): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits 5 - bl L.4.21 + bl LOC(4.21) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (5*2+1), %o2 - -L.4.21: + +LOC(4.21): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (5*2-1), %o2 - - - -L.2.17: + + + +LOC(2.17): ! remainder is negative addcc %o3,%o5,%o3 ! depth 3, accumulated bits 1 - bl L.3.17 + bl LOC(3.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits 3 - bl L.4.19 + bl LOC(4.19) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (3*2+1), %o2 - -L.4.19: + +LOC(4.19): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (3*2-1), %o2 - - -L.3.17: + + +LOC(3.17): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits 1 - bl L.4.17 + bl LOC(4.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (1*2+1), %o2 - -L.4.17: + +LOC(4.17): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (1*2-1), %o2 - - - - -L.1.16: + + + + +LOC(1.16): ! remainder is negative addcc %o3,%o5,%o3 ! depth 2, accumulated bits -1 - bl L.2.15 + bl LOC(2.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 3, accumulated bits -1 - bl L.3.15 + bl LOC(3.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits -1 - bl L.4.15 + bl LOC(4.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-1*2+1), %o2 - -L.4.15: + +LOC(4.15): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-1*2-1), %o2 - - -L.3.15: + + +LOC(3.15): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits -3 - bl L.4.13 + bl LOC(4.13) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-3*2+1), %o2 - -L.4.13: + +LOC(4.13): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-3*2-1), %o2 - - - -L.2.15: + + + +LOC(2.15): ! remainder is negative addcc %o3,%o5,%o3 ! depth 3, accumulated bits -3 - bl L.3.13 + bl LOC(3.13) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits -5 - bl L.4.11 + bl LOC(4.11) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-5*2+1), %o2 - -L.4.11: + +LOC(4.11): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-5*2-1), %o2 - - -L.3.13: + + +LOC(3.13): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits -7 - bl L.4.9 + bl LOC(4.9) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-7*2+1), %o2 - -L.4.9: + +LOC(4.9): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-7*2-1), %o2 - - - - + + + + 9: -Lend_regular_divide: +LOC(end_regular_divide): subcc %o4, 1, %o4 - bge Ldivloop + bge LOC(divloop) tst %o3 - bl,a Lgot_result + bl,a LOC(got_result) ! non-restoring fixup here (one instruction only!) add %o3, %o1, %o3 -Lgot_result: +LOC(got_result): ! check to see if answer should be < 0 - tst %g6 + tst %g3 bl,a 1f sub %g0, %o3, %o3 1: retl mov %o3, %o0 + +END(.rem) diff --git a/sysdeps/sparc/sparc32/rshift.S b/sysdeps/sparc/sparc32/rshift.S index fea4f3b926..db1d2526cd 100644 --- a/sysdeps/sparc/sparc32/rshift.S +++ b/sysdeps/sparc/sparc32/rshift.S @@ -1,19 +1,19 @@ ! sparc __mpn_rshift -- - -! Copyright (C) 1995, 1996 Free Software Foundation, Inc. - +! +! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +! ! This file is part of the GNU MP Library. - +! ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Library General Public License as published by ! the Free Software Foundation; either version 2 of the License, or (at your ! option) any later version. - +! ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ! License for more details. - +! ! You should have received a copy of the GNU Library General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @@ -21,28 +21,26 @@ ! INPUT PARAMETERS -! res_ptr %o0 -! src_ptr %o1 -! size %o2 -! cnt %o3 +! RES_PTR %o0 +! SRC_PTR %o1 +! SIZE %o2 +! CNT %o3 -#include "sysdep.h" +#include <sysdep.h> - .text - .align 4 - .global C_SYMBOL_NAME(__mpn_rshift) -C_SYMBOL_NAME(__mpn_rshift): +ENTRY(__mpn_rshift) ld [%o1],%g2 ! load first limb sub %g0,%o3,%o5 ! negate shift count add %o2,-1,%o2 andcc %o2,4-1,%g4 ! number of limbs in first loop sll %g2,%o5,%g1 ! compute function result - be L0 ! if multiple of 4 limbs, skip first loop + be LOC(0) ! if multiple of 4 limbs, skip first loop st %g1,[%sp+80] sub %o2,%g4,%o2 ! adjust count for main loop -Loop0: ld [%o1+4],%g3 +LOC(loop0): + ld [%o1+4],%g3 add %o0,4,%o0 add %o1,4,%o1 addcc %g4,-1,%g4 @@ -50,14 +48,15 @@ Loop0: ld [%o1+4],%g3 sll %g3,%o5,%g1 mov %g3,%g2 or %o4,%g1,%o4 - bne Loop0 + bne LOC(loop0) st %o4,[%o0-4] -L0: tst %o2 - be Lend +LOC(0): tst %o2 + be LOC(end) nop -Loop: ld [%o1+4],%g3 +LOC(loop): + ld [%o1+4],%g3 add %o0,16,%o0 addcc %o2,-4,%o2 srl %g2,%o3,%o4 @@ -83,10 +82,13 @@ Loop: ld [%o1+4],%g3 add %o1,16,%o1 or %g4,%g1,%g4 - bne Loop + bne LOC(loop) st %g4,[%o0-4] -Lend: srl %g2,%o3,%g2 +LOC(end): + srl %g2,%o3,%g2 st %g2,[%o0-0] retl ld [%sp+80],%o0 + +END(__mpn_rshift) diff --git a/sysdeps/sparc/sparc32/sdiv.S b/sysdeps/sparc/sparc32/sdiv.S index 02ed2e973c..4b325476d7 100644 --- a/sysdeps/sparc/sparc32/sdiv.S +++ b/sysdeps/sparc/sparc32/sdiv.S @@ -37,22 +37,14 @@ -#include "sysdep.h" -#ifdef __linux__ -#include <asm/traps.h> -#else -#ifdef __svr4__ +#include <sysdep.h> #include <sys/trap.h> -#else -#include <machine/trap.h> -#endif -#endif ENTRY(.div) ! compute sign of result; if neither is negative, no problem orcc %o1, %o0, %g0 ! either negative? bge 2f ! no, go do the divide - xor %o1, %o0, %g6 ! compute sign in any case + xor %o1, %o0, %g3 ! compute sign in any case tst %o1 bge 1f tst %o0 @@ -76,11 +68,11 @@ ENTRY(.div) 1: cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) + blu LOC(got_result) ! (and algorithm fails otherwise) clr %o2 sethi %hi(1 << (32 - 4 - 1)), %g1 cmp %o3, %g1 - blu Lnot_really_big + blu LOC(not_really_big) clr %o4 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, @@ -91,15 +83,15 @@ ENTRY(.div) 1: cmp %o5, %g1 bgeu 3f - mov 1, %g7 + mov 1, %g2 sll %o5, 4, %o5 b 1b add %o4, 1, %o4 - ! Now compute %g7. + ! Now compute %g2. 2: addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 + bcc LOC(not_too_big) + add %g2, 1, %g2 ! We get here if the %o1 overflowed while shifting. ! This means that %o3 has the high-order bit set. @@ -107,20 +99,20 @@ ENTRY(.div) sll %g1, 4, %g1 ! high order bit srl %o5, 1, %o5 ! rest of %o5 add %o5, %g1, %o5 - b Ldo_single_div - sub %g7, 1, %g7 + b LOC(do_single_div) + sub %g2, 1, %g2 - Lnot_too_big: + LOC(not_too_big): 3: cmp %o5, %o3 blu 2b nop - be Ldo_single_div + be LOC(do_single_div) nop /* NB: these are commented out in the V8-Sparc manual as well */ /* (I do not understand this) */ ! %o5 > %o3: went too far: back up 1 step ! srl %o5, 1, %o5 - ! dec %g7 + ! dec %g2 ! do single-bit divide steps ! ! We have to be careful here. We know that %o3 >= %o5, so we can do the @@ -129,15 +121,15 @@ ENTRY(.div) ! order bit set in the first step, just falling into the regular ! division loop will mess up the first time around. ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide + LOC(do_single_div): + subcc %g2, 1, %g2 + bl LOC(end_regular_divide) nop sub %o3, %o5, %o3 mov 1, %o2 - b Lend_single_divloop + b LOC(end_single_divloop) nop - Lsingle_divloop: + LOC(single_divloop): sll %o2, 1, %o2 bl 1f srl %o5, 1, %o5 @@ -149,221 +141,223 @@ ENTRY(.div) add %o3, %o5, %o3 sub %o2, 1, %o2 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop + LOC(end_single_divloop): + subcc %g2, 1, %g2 + bge LOC(single_divloop) tst %o3 - b,a Lend_regular_divide + b,a LOC(end_regular_divide) -Lnot_really_big: +LOC(not_really_big): 1: sll %o5, 4, %o5 cmp %o5, %o3 bleu 1b addcc %o4, 1, %o4 - be Lgot_result + be LOC(got_result) sub %o4, 1, %o4 tst %o3 ! set up for initial iteration -Ldivloop: +LOC(divloop): sll %o2, 4, %o2 ! depth 1, accumulated bits 0 - bl L.1.16 + bl LOC(1.16) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 2, accumulated bits 1 - bl L.2.17 + bl LOC(2.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 3, accumulated bits 3 - bl L.3.19 + bl LOC(3.19) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits 7 - bl L.4.23 + bl LOC(4.23) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (7*2+1), %o2 - -L.4.23: + +LOC(4.23): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (7*2-1), %o2 - - -L.3.19: + + +LOC(3.19): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits 5 - bl L.4.21 + bl LOC(4.21) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (5*2+1), %o2 - -L.4.21: + +LOC(4.21): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (5*2-1), %o2 - - - -L.2.17: + + + +LOC(2.17): ! remainder is negative addcc %o3,%o5,%o3 ! depth 3, accumulated bits 1 - bl L.3.17 + bl LOC(3.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits 3 - bl L.4.19 + bl LOC(4.19) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (3*2+1), %o2 - -L.4.19: + +LOC(4.19): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (3*2-1), %o2 - - -L.3.17: + + +LOC(3.17): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits 1 - bl L.4.17 + bl LOC(4.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (1*2+1), %o2 - -L.4.17: + +LOC(4.17): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (1*2-1), %o2 - - - - -L.1.16: + + + + +LOC(1.16): ! remainder is negative addcc %o3,%o5,%o3 ! depth 2, accumulated bits -1 - bl L.2.15 + bl LOC(2.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 3, accumulated bits -1 - bl L.3.15 + bl LOC(3.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits -1 - bl L.4.15 + bl LOC(4.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-1*2+1), %o2 - -L.4.15: + +LOC(4.15): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-1*2-1), %o2 - - -L.3.15: + + +LOC(3.15): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits -3 - bl L.4.13 + bl LOC(4.13) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-3*2+1), %o2 - -L.4.13: + +LOC(4.13): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-3*2-1), %o2 - - - -L.2.15: + + + +LOC(2.15): ! remainder is negative addcc %o3,%o5,%o3 ! depth 3, accumulated bits -3 - bl L.3.13 + bl LOC(3.13) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits -5 - bl L.4.11 + bl LOC(4.11) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-5*2+1), %o2 - -L.4.11: + +LOC(4.11): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-5*2-1), %o2 - - -L.3.13: + + +LOC(3.13): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits -7 - bl L.4.9 + bl LOC(4.9) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-7*2+1), %o2 - -L.4.9: + +LOC(4.9): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-7*2-1), %o2 - - - - + + + + 9: -Lend_regular_divide: +LOC(end_regular_divide): subcc %o4, 1, %o4 - bge Ldivloop + bge LOC(divloop) tst %o3 - bl,a Lgot_result + bl,a LOC(got_result) ! non-restoring fixup here (one instruction only!) sub %o2, 1, %o2 -Lgot_result: +LOC(got_result): ! check to see if answer should be < 0 - tst %g6 + tst %g3 bl,a 1f sub %g0, %o2, %o2 1: retl mov %o2, %o0 + +END(.div) diff --git a/sysdeps/sparc/sparc32/setjmp.S b/sysdeps/sparc/sparc32/setjmp.S index 772ec723a3..58d94119ef 100644 --- a/sysdeps/sparc/sparc32/setjmp.S +++ b/sysdeps/sparc/sparc32/setjmp.S @@ -22,33 +22,29 @@ #define _ASM 1 #include <bits/setjmp.h> +ENTRY(_setjmp) + b 1f + set 0, %o1 +END(_setjmp) + +ENTRY(setjmp) + set 1, %o1 +END(setjmp) + ENTRY (__sigsetjmp) - /* Save our SP and FP; in the delay slot of the jump, save our - return PC. Save the signal mask if requested with a tail-call - for simplicity; it always returns zero. */ - ta ST_FLUSH_WINDOWS -#ifdef PIC - mov %o7,%g1 -2: - call 1f - nop 1: - sethi %hi(_GLOBAL_OFFSET_TABLE_-(2b-.)),%g2 - or %g2,%lo(_GLOBAL_OFFSET_TABLE_-(2b-.)),%g2 - add %g2,%o7,%g2 - sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g3 - or %g3,%lo(C_SYMBOL_NAME (__sigjmp_save)), %g3 - st %sp, [%o0 + (JB_SP * 4)] - st %fp, [%o0 + (JB_FP * 4)] - mov %g1,%o7 - ld [%g2+%g3],%g1 - jmp %g1 - st %o7, [%o0+(JB_PC*4)] -#else - sethi %hi(C_SYMBOL_NAME (__sigjmp_save)), %g1 - st %sp, [%o0 + (JB_SP*4)] - or %lo(C_SYMBOL_NAME (__sigjmp_save)), %g1, %g1 - st %fp, [%o0 + (JB_FP*4)] - jmp %g1 - st %o7, [%o0 + (JB_PC*4)] -#endif /* PIC */ + /* Save our PC, SP and FP. Save the signal mask if requested with + a tail-call for simplicity; it always returns zero. */ + ta ST_FLUSH_WINDOWS + + st %o7, [%o0 + (JB_PC * 4)] + st %sp, [%o0 + (JB_SP * 4)] + st %fp, [%o0 + (JB_FP * 4)] + + mov %o7, %g1 + call __sigjmp_save + mov %g1, %o7 +END(__sigsetjmp) + +weak_extern(_setjmp) +weak_extern(setjmp) diff --git a/sysdeps/sparc/sparc32/sparcv8/addmul_1.S b/sysdeps/sparc/sparc32/sparcv8/addmul_1.S index fb9ea7cf0e..7b871b8e8b 100644 --- a/sysdeps/sparc/sparc32/sparcv8/addmul_1.S +++ b/sysdeps/sparc/sparc32/sparcv8/addmul_1.S @@ -27,54 +27,52 @@ ! size o2 ! s2_limb o3 -#include "sysdep.h" +#include <sysdep.h> -.text - .align 4 - .global C_SYMBOL_NAME(__mpn_addmul_1) -C_SYMBOL_NAME(__mpn_addmul_1): - orcc %g0,%g0,%g2 +ENTRY(__mpn_addmul_1) ld [%o1+0],%o4 ! 1 - sll %o2,4,%g1 - and %g1,(4-1)<<4,%g1 -#if PIC + orcc %g0,%g0,%g2 mov %o7,%g4 ! Save return address register - call 1f - add %o7,LL-1f,%g3 -1: mov %g4,%o7 ! Restore return address register -#else - sethi %hi(LL),%g3 - or %g3,%lo(LL),%g3 -#endif - jmp %g3+%g1 - nop -LL: -LL00: add %o0,-4,%o0 - b Loop00 /* 4, 8, 12, ... */ - add %o1,-4,%o1 - nop -LL01: b Loop01 /* 1, 5, 9, ... */ + and %g1,(4-1)<<4,%g1 +1: call 2f + add %o7,3f-1b,%g3 +2: jmp %g3+%g1 + mov %g4,%o7 ! Restore return address register + + .align 4 +3: +LOC(00): + add %o0,-4,%o0 + b LOC(loop00) /* 4, 8, 12, ... */ + add %o1,-4,%o1 nop +LOC(01): + b LOC(loop01) /* 1, 5, 9, ... */ + nop nop nop -LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ - b Loop10 - add %o1,4,%o1 +LOC(10): + add %o0,-12,%o0 /* 2, 6, 10, ... */ + b LOC(loop10) + add %o1,4,%o1 nop -LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ - b Loop11 - add %o1,-8,%o1 +LOC(11): + add %o0,-8,%o0 /* 3, 7, 11, ... */ + b LOC(loop11) + add %o1,-8,%o1 nop -1: addcc %g3,%g2,%g3 ! 1 +LOC(loop): + addcc %g3,%g2,%g3 ! 1 ld [%o1+4],%o4 ! 2 rd %y,%g2 ! 1 addx %g0,%g2,%g2 ld [%o0+0],%g1 ! 2 addcc %g1,%g3,%g3 st %g3,[%o0+0] ! 1 -Loop00: umul %o4,%o3,%g3 ! 2 +LOC(loop00): + umul %o4,%o3,%g3 ! 2 ld [%o0+4],%g1 ! 2 addxcc %g3,%g2,%g3 ! 2 ld [%o1+8],%o4 ! 3 @@ -83,7 +81,8 @@ Loop00: umul %o4,%o3,%g3 ! 2 nop addcc %g1,%g3,%g3 st %g3,[%o0+4] ! 2 -Loop11: umul %o4,%o3,%g3 ! 3 +LOC(loop11): + umul %o4,%o3,%g3 ! 3 addxcc %g3,%g2,%g3 ! 3 ld [%o1+12],%o4 ! 4 rd %y,%g2 ! 3 @@ -92,7 +91,8 @@ Loop11: umul %o4,%o3,%g3 ! 3 ld [%o0+8],%g1 ! 2 addcc %g1,%g3,%g3 st %g3,[%o0+8] ! 3 -Loop10: umul %o4,%o3,%g3 ! 4 +LOC(loop10): + umul %o4,%o3,%g3 ! 4 addxcc %g3,%g2,%g3 ! 4 ld [%o1+0],%o4 ! 1 rd %y,%g2 ! 4 @@ -102,9 +102,10 @@ Loop10: umul %o4,%o3,%g3 ! 4 st %g3,[%o0+12] ! 4 add %o0,16,%o0 addx %g0,%g2,%g2 -Loop01: addcc %o2,-4,%o2 - bg 1b - umul %o4,%o3,%g3 ! 1 +LOC(loop01): + addcc %o2,-4,%o2 + bg LOC(loop) + umul %o4,%o3,%g3 ! 1 addcc %g3,%g2,%g3 ! 4 rd %y,%g2 ! 4 @@ -112,13 +113,7 @@ Loop01: addcc %o2,-4,%o2 ld [%o0+0],%g1 ! 2 addcc %g1,%g3,%g3 st %g3,[%o0+0] ! 4 - addx %g0,%g2,%o0 - retl - nop - - -! umul, ld, addxcc, rd, st - -! umul, ld, addxcc, rd, ld, addcc, st, addx + addx %g0,%g2,%o0 +END(__mpn_addmul_1) diff --git a/sysdeps/sparc/sparc32/sparcv8/dotmul.S b/sysdeps/sparc/sparc32/sparcv8/dotmul.S new file mode 100644 index 0000000000..9b20cc3684 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv8/dotmul.S @@ -0,0 +1,13 @@ +/* + * Sparc v8 has multiply. + */ + +#include <sysdep.h> + +ENTRY(.mul) + + smul %o0, %o1, %o0 + retl + rd %y, %o1 + +END(.mul) diff --git a/sysdeps/sparc/sparc32/sparcv8/mul_1.S b/sysdeps/sparc/sparc32/sparcv8/mul_1.S index b641feb453..801e224c15 100644 --- a/sysdeps/sparc/sparc32/sparcv8/mul_1.S +++ b/sysdeps/sparc/sparc32/sparcv8/mul_1.S @@ -27,73 +27,77 @@ ! size o2 ! s2_limb o3 -#include "sysdep.h" +#include <sysdep.h> -.text - .align 8 - .global C_SYMBOL_NAME(__mpn_mul_1) -C_SYMBOL_NAME(__mpn_mul_1): +ENTRY(__mpn_mul_1) sll %o2,4,%g1 - and %g1,(4-1)<<4,%g1 -#if PIC mov %o7,%g4 ! Save return address register - call 1f - add %o7,LL-1f,%g3 -1: mov %g4,%o7 ! Restore return address register -#else - sethi %hi(LL),%g3 - or %g3,%lo(LL),%g3 -#endif + and %g1,(4-1)<<4,%g1 +1: call 2f + add %o7,3f-1b,%g3 +2: mov %g4,%o7 ! Restore return address register jmp %g3+%g1 - ld [%o1+0],%o4 ! 1 -LL: -LL00: add %o0,-4,%o0 + ld [%o1+0],%o4 ! 1 + + .align 4 +3: +LOC(00): + add %o0,-4,%o0 add %o1,-4,%o1 - b Loop00 /* 4, 8, 12, ... */ - orcc %g0,%g0,%g2 -LL01: b Loop01 /* 1, 5, 9, ... */ - orcc %g0,%g0,%g2 + b LOC(loop00) /* 4, 8, 12, ... */ + orcc %g0,%g0,%g2 +LOC(01): + b LOC(loop01) /* 1, 5, 9, ... */ + orcc %g0,%g0,%g2 nop nop -LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ +LOC(10): + add %o0,-12,%o0 /* 2, 6, 10, ... */ add %o1,4,%o1 - b Loop10 - orcc %g0,%g0,%g2 + b LOC(loop10) + orcc %g0,%g0,%g2 nop -LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ +LOC(11): + add %o0,-8,%o0 /* 3, 7, 11, ... */ add %o1,-8,%o1 - b Loop11 - orcc %g0,%g0,%g2 + b LOC(loop11) + orcc %g0,%g0,%g2 -Loop: addcc %g3,%g2,%g3 ! 1 +LOC(loop): + addcc %g3,%g2,%g3 ! 1 ld [%o1+4],%o4 ! 2 st %g3,[%o0+0] ! 1 rd %y,%g2 ! 1 -Loop00: umul %o4,%o3,%g3 ! 2 +LOC(loop00): + umul %o4,%o3,%g3 ! 2 addxcc %g3,%g2,%g3 ! 2 ld [%o1+8],%o4 ! 3 st %g3,[%o0+4] ! 2 rd %y,%g2 ! 2 -Loop11: umul %o4,%o3,%g3 ! 3 +LOC(loop11): + umul %o4,%o3,%g3 ! 3 addxcc %g3,%g2,%g3 ! 3 ld [%o1+12],%o4 ! 4 add %o1,16,%o1 st %g3,[%o0+8] ! 3 rd %y,%g2 ! 3 -Loop10: umul %o4,%o3,%g3 ! 4 +LOC(loop10): + umul %o4,%o3,%g3 ! 4 addxcc %g3,%g2,%g3 ! 4 ld [%o1+0],%o4 ! 1 st %g3,[%o0+12] ! 4 add %o0,16,%o0 rd %y,%g2 ! 4 addx %g0,%g2,%g2 -Loop01: addcc %o2,-4,%o2 - bg Loop - umul %o4,%o3,%g3 ! 1 +LOC(loop01): + addcc %o2,-4,%o2 + bg LOC(loop) + umul %o4,%o3,%g3 ! 1 addcc %g3,%g2,%g3 ! 4 st %g3,[%o0+0] ! 4 rd %y,%g2 ! 4 - retl - addx %g0,%g2,%o0 + addx %g0,%g2,%o0 + +END(__mpn_mul_1) diff --git a/sysdeps/sparc/sparc32/sparcv8/rem.S b/sysdeps/sparc/sparc32/sparcv8/rem.S new file mode 100644 index 0000000000..061e80093c --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv8/rem.S @@ -0,0 +1,18 @@ +/* + * Sparc v8 has divide. + */ + +#include <sysdep.h> + +ENTRY(.rem) + + sra %o0, 31, %o2 + wr %o2, 0, %y + sdivcc %o0, %o1, %o2 + bvs,a 1f + xnor %o2, %g0, %o2 +1: smul %o2, %o1, %o2 + retl + sub %o0, %o2, %o0 + +END(.rem) diff --git a/sysdeps/sparc/sparc32/sparcv8/sdiv.S b/sysdeps/sparc/sparc32/sparcv8/sdiv.S new file mode 100644 index 0000000000..81c3ac1c97 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv8/sdiv.S @@ -0,0 +1,14 @@ +/* + * Sparc v8 has divide. + */ + +#include <sysdep.h> + +ENTRY(.div) + + sra %o0, 31, %o2 + wr %o2, 0, %y + ret + sdiv %o0, %o1, %o0 + +END(.div) diff --git a/sysdeps/sparc/sparc32/sparcv8/submul_1.S b/sysdeps/sparc/sparc32/sparcv8/submul_1.S index e40119d011..c71f228511 100644 --- a/sysdeps/sparc/sparc32/sparcv8/submul_1.S +++ b/sysdeps/sparc/sparc32/sparcv8/submul_1.S @@ -27,12 +27,9 @@ ! size o2 ! s2_limb o3 -#include "sysdep.h" +#include <sysdep.h> -.text - .align 4 - .global C_SYMBOL_NAME(__mpn_submul_1) -C_SYMBOL_NAME(__mpn_submul_1): +ENTRY(__mpn_submul_1) sub %g0,%o2,%o2 ! negate ... sll %o2,2,%o2 ! ... and scale size sub %o1,%o2,%o1 ! o1 is offset s1_ptr @@ -40,7 +37,8 @@ C_SYMBOL_NAME(__mpn_submul_1): mov 0,%o0 ! clear cy_limb -Loop: ld [%o1+%o2],%o4 +LOC(loop): + ld [%o1+%o2],%o4 ld [%g1+%o2],%g2 umul %o4,%o3,%o5 rd %y,%g3 @@ -51,8 +49,10 @@ Loop: ld [%o1+%o2],%o4 st %g2,[%g1+%o2] addcc %o2,4,%o2 - bne Loop + bne LOC(loop) nop retl nop + +END(__mpn_submul_1) diff --git a/sysdeps/sparc/sparc32/sparcv8/udiv.S b/sysdeps/sparc/sparc32/sparcv8/udiv.S new file mode 100644 index 0000000000..4e3cddc63f --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv8/udiv.S @@ -0,0 +1,13 @@ +/* + * Sparc v8 has divide. + */ + +#include <sysdep.h> + +ENTRY(.udiv) + + wr %g0, 0, %y + retl + udiv %o0, %o1, %o0 + +END(.udiv) diff --git a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S b/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S index 49c2398806..5b0d42923a 100644 --- a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S +++ b/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S @@ -27,66 +27,75 @@ #include "sysdep.h" - .text - .align 4 - .global C_SYMBOL_NAME(__udiv_qrnnd) -C_SYMBOL_NAME(__udiv_qrnnd): +ENTRY(__udiv_qrnnd) tst %o3 - bneg Largedivisor + bneg LOC(largedivisor) mov 8,%g1 - b Lp1 + b LOC(p1) addxcc %o2,%o2,%o2 -Lplop: bcc Ln1 +LOC(plop): + bcc LOC(n1) addxcc %o2,%o2,%o2 -Lp1: addx %o1,%o1,%o1 +LOC(p1): + addx %o1,%o1,%o1 subcc %o1,%o3,%o4 - bcc Ln2 + bcc LOC(n2) addxcc %o2,%o2,%o2 -Lp2: addx %o1,%o1,%o1 +LOC(p2): + addx %o1,%o1,%o1 subcc %o1,%o3,%o4 - bcc Ln3 + bcc LOC(n3) addxcc %o2,%o2,%o2 -Lp3: addx %o1,%o1,%o1 +LOC(p3): + addx %o1,%o1,%o1 subcc %o1,%o3,%o4 - bcc Ln4 + bcc LOC(n4) addxcc %o2,%o2,%o2 -Lp4: addx %o1,%o1,%o1 +LOC(p4): + addx %o1,%o1,%o1 addcc %g1,-1,%g1 - bne Lplop + bne LOC(plop) subcc %o1,%o3,%o4 - bcc Ln5 + bcc LOC(n5) addxcc %o2,%o2,%o2 -Lp5: st %o1,[%o0] +LOC(p5): + st %o1,[%o0] retl xnor %g0,%o2,%o0 -Lnlop: bcc Lp1 +LOC(nlop): + bcc LOC(p1) addxcc %o2,%o2,%o2 -Ln1: addx %o4,%o4,%o4 +LOC(n1): + addx %o4,%o4,%o4 subcc %o4,%o3,%o1 - bcc Lp2 + bcc LOC(p2) addxcc %o2,%o2,%o2 -Ln2: addx %o4,%o4,%o4 +LOC(n2): + addx %o4,%o4,%o4 subcc %o4,%o3,%o1 - bcc Lp3 + bcc LOC(p3) addxcc %o2,%o2,%o2 -Ln3: addx %o4,%o4,%o4 +LOC(n3): + addx %o4,%o4,%o4 subcc %o4,%o3,%o1 - bcc Lp4 + bcc LOC(p4) addxcc %o2,%o2,%o2 -Ln4: addx %o4,%o4,%o4 +LOC(n4): + addx %o4,%o4,%o4 addcc %g1,-1,%g1 - bne Lnlop + bne LOC(nlop) subcc %o4,%o3,%o1 - bcc Lp5 + bcc LOC(p5) addxcc %o2,%o2,%o2 -Ln5: st %o4,[%o0] +LOC(n5): + st %o4,[%o0] retl xnor %g0,%o2,%o0 -Largedivisor: +LOC(largedivisor): and %o2,1,%o5 ! %o5 = n0 & 1 srl %o2,1,%o2 @@ -98,89 +107,109 @@ Largedivisor: srl %o3,1,%g3 ! %g3 = floor(d / 2) add %g3,%g2,%g3 ! %g3 = ceil(d / 2) - b LLp1 + b LOC(Lp1) addxcc %o2,%o2,%o2 -LLplop: bcc LLn1 +LOC(Lplop): + bcc LOC(Ln1) addxcc %o2,%o2,%o2 -LLp1: addx %o1,%o1,%o1 +LOC(Lp1): + addx %o1,%o1,%o1 subcc %o1,%g3,%o4 - bcc LLn2 + bcc LOC(Ln2) addxcc %o2,%o2,%o2 -LLp2: addx %o1,%o1,%o1 +LOC(Lp2): + addx %o1,%o1,%o1 subcc %o1,%g3,%o4 - bcc LLn3 + bcc LOC(Ln3) addxcc %o2,%o2,%o2 -LLp3: addx %o1,%o1,%o1 +LOC(Lp3): + addx %o1,%o1,%o1 subcc %o1,%g3,%o4 - bcc LLn4 + bcc LOC(Ln4) addxcc %o2,%o2,%o2 -LLp4: addx %o1,%o1,%o1 +LOC(Lp4): + addx %o1,%o1,%o1 addcc %g1,-1,%g1 - bne LLplop + bne LOC(Lplop) subcc %o1,%g3,%o4 - bcc LLn5 + bcc LOC(Ln5) addxcc %o2,%o2,%o2 -LLp5: add %o1,%o1,%o1 ! << 1 +LOC(Lp5): + add %o1,%o1,%o1 ! << 1 tst %g2 - bne Oddp + bne LOC(Oddp) add %o5,%o1,%o1 st %o1,[%o0] retl xnor %g0,%o2,%o0 -LLnlop: bcc LLp1 +LOC(Lnlop): + bcc LOC(Lp1) addxcc %o2,%o2,%o2 -LLn1: addx %o4,%o4,%o4 +LOC(Ln1): + addx %o4,%o4,%o4 subcc %o4,%g3,%o1 - bcc LLp2 + bcc LOC(Lp2) addxcc %o2,%o2,%o2 -LLn2: addx %o4,%o4,%o4 +LOC(Ln2): + addx %o4,%o4,%o4 subcc %o4,%g3,%o1 - bcc LLp3 + bcc LOC(Lp3) addxcc %o2,%o2,%o2 -LLn3: addx %o4,%o4,%o4 +LOC(Ln3): + addx %o4,%o4,%o4 subcc %o4,%g3,%o1 - bcc LLp4 + bcc LOC(Lp4) addxcc %o2,%o2,%o2 -LLn4: addx %o4,%o4,%o4 +LOC(Ln4): + addx %o4,%o4,%o4 addcc %g1,-1,%g1 - bne LLnlop + bne LOC(Lnlop) subcc %o4,%g3,%o1 - bcc LLp5 + bcc LOC(Lp5) addxcc %o2,%o2,%o2 -LLn5: add %o4,%o4,%o4 ! << 1 +LOC(Ln5): + add %o4,%o4,%o4 ! << 1 tst %g2 - bne Oddn + bne LOC(Oddn) add %o5,%o4,%o4 st %o4,[%o0] retl xnor %g0,%o2,%o0 -Oddp: xnor %g0,%o2,%o2 +LOC(Oddp): + xnor %g0,%o2,%o2 ! q' in %o2. r' in %o1 addcc %o1,%o2,%o1 - bcc LLp6 + bcc LOC(Lp6) addx %o2,0,%o2 sub %o1,%o3,%o1 -LLp6: subcc %o1,%o3,%g0 - bcs LLp7 +LOC(Lp6): + subcc %o1,%o3,%g0 + bcs LOC(Lp7) subx %o2,-1,%o2 sub %o1,%o3,%o1 -LLp7: st %o1,[%o0] +LOC(Lp7): + st %o1,[%o0] retl mov %o2,%o0 -Oddn: xnor %g0,%o2,%o2 +LOC(Oddn): + xnor %g0,%o2,%o2 ! q' in %o2. r' in %o4 addcc %o4,%o2,%o4 - bcc LLn6 + bcc LOC(Ln6) addx %o2,0,%o2 sub %o4,%o3,%o4 -LLn6: subcc %o4,%o3,%g0 - bcs LLn7 +LOC(Ln6): + subcc %o4,%o3,%g0 + bcs LOC(Ln7) subx %o2,-1,%o2 sub %o4,%o3,%o4 -LLn7: st %o4,[%o0] +LOC(Ln7): + st %o4,[%o0] retl mov %o2,%o0 + +END(__udiv_qrnnd) diff --git a/sysdeps/sparc/sparc32/sparcv8/umul.S b/sysdeps/sparc/sparc32/sparcv8/umul.S new file mode 100644 index 0000000000..cec454a7dd --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv8/umul.S @@ -0,0 +1,13 @@ +/* + * Sparc v8 has multiply. + */ + +#include <sysdep.h> + +ENTRY(.umul) + + umul %o0, %o1, %o0 + retl + rd %y, %o1 + +END(.umul) diff --git a/sysdeps/sparc/sparc32/sparcv8/urem.S b/sysdeps/sparc/sparc32/sparcv8/urem.S new file mode 100644 index 0000000000..81e123f3da --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv8/urem.S @@ -0,0 +1,15 @@ +/* + * Sparc v8 has divide. + */ + +#include <sysdep.h> + +ENTRY(.urem) + + wr %g0, 0, %y + udiv %o0, %o1, %o2 + umul %o2, %o1, %o2 + retl + sub %o0, %o2, %o0 + +END(.urem) diff --git a/sysdeps/sparc/sparc32/sub_n.S b/sysdeps/sparc/sparc32/sub_n.S index b7a11958e2..74400600bd 100644 --- a/sysdeps/sparc/sparc32/sub_n.S +++ b/sysdeps/sparc/sparc32/sub_n.S @@ -1,20 +1,20 @@ ! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and ! store difference in a third limb vector. - +! ! Copyright (C) 1995, 1996 Free Software Foundation, Inc. - +! ! This file is part of the GNU MP Library. - +! ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Library General Public License as published by ! the Free Software Foundation; either version 2 of the License, or (at your ! option) any later version. - +! ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ! License for more details. - +! ! You should have received a copy of the GNU Library General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @@ -22,290 +22,308 @@ ! INPUT PARAMETERS -#define res_ptr %o0 -#define s1_ptr %o1 -#define s2_ptr %o2 -#define size %o3 +#define RES_PTR %o0 +#define S1_PTR %o1 +#define S2_PTR %o2 +#define SIZE %o3 -#include "sysdep.h" +#include <sysdep.h> - .text - .align 4 - .global C_SYMBOL_NAME(__mpn_sub_n) -C_SYMBOL_NAME(__mpn_sub_n): - xor s2_ptr,res_ptr,%g1 +ENTRY(__mpn_sub_n) + xor S2_PTR,RES_PTR,%g1 andcc %g1,4,%g0 - bne L1 ! branch if alignment differs + bne LOC(1) ! branch if alignment differs nop ! ** V1a ** - andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 - be L_v1 ! if no, branch + andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 + be LOC(v1) ! if no, branch nop -/* Add least significant limb separately to align res_ptr and s2_ptr */ - ld [s1_ptr],%g4 - add s1_ptr,4,s1_ptr - ld [s2_ptr],%g2 - add s2_ptr,4,s2_ptr - add size,-1,size +/* Add least significant limb separately to align RES_PTR and S2_PTR */ + ld [S1_PTR],%g4 + add S1_PTR,4,S1_PTR + ld [S2_PTR],%g2 + add S2_PTR,4,S2_PTR + add SIZE,-1,SIZE subcc %g4,%g2,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr -L_v1: addx %g0,%g0,%o4 ! save cy in register - cmp size,2 ! if size < 2 ... - bl Lend2 ! ... branch to tail code + st %o4,[RES_PTR] + add RES_PTR,4,RES_PTR +LOC(v1): + addx %g0,%g0,%o4 ! save cy in register + cmp SIZE,2 ! if SIZE < 2 ... + bl LOC(end2) ! ... branch to tail code subcc %g0,%o4,%g0 ! restore cy - ld [s1_ptr+0],%g4 - addcc size,-10,size - ld [s1_ptr+4],%g1 - ldd [s2_ptr+0],%g2 - blt Lfin1 + ld [S1_PTR+0],%g4 + addcc SIZE,-10,SIZE + ld [S1_PTR+4],%g1 + ldd [S2_PTR+0],%g2 + blt LOC(fin1) subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 8 limbs until less than 8 limbs remain */ -Loop1: subxcc %g4,%g2,%o4 - ld [s1_ptr+8],%g4 +LOC(loop1): + subxcc %g4,%g2,%o4 + ld [S1_PTR+8],%g4 subxcc %g1,%g3,%o5 - ld [s1_ptr+12],%g1 - ldd [s2_ptr+8],%g2 - std %o4,[res_ptr+0] + ld [S1_PTR+12],%g1 + ldd [S2_PTR+8],%g2 + std %o4,[RES_PTR+0] subxcc %g4,%g2,%o4 - ld [s1_ptr+16],%g4 + ld [S1_PTR+16],%g4 subxcc %g1,%g3,%o5 - ld [s1_ptr+20],%g1 - ldd [s2_ptr+16],%g2 - std %o4,[res_ptr+8] + ld [S1_PTR+20],%g1 + ldd [S2_PTR+16],%g2 + std %o4,[RES_PTR+8] subxcc %g4,%g2,%o4 - ld [s1_ptr+24],%g4 + ld [S1_PTR+24],%g4 subxcc %g1,%g3,%o5 - ld [s1_ptr+28],%g1 - ldd [s2_ptr+24],%g2 - std %o4,[res_ptr+16] + ld [S1_PTR+28],%g1 + ldd [S2_PTR+24],%g2 + std %o4,[RES_PTR+16] subxcc %g4,%g2,%o4 - ld [s1_ptr+32],%g4 + ld [S1_PTR+32],%g4 subxcc %g1,%g3,%o5 - ld [s1_ptr+36],%g1 - ldd [s2_ptr+32],%g2 - std %o4,[res_ptr+24] + ld [S1_PTR+36],%g1 + ldd [S2_PTR+32],%g2 + std %o4,[RES_PTR+24] addx %g0,%g0,%o4 ! save cy in register - addcc size,-8,size - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge Loop1 + addcc SIZE,-8,SIZE + add S1_PTR,32,S1_PTR + add S2_PTR,32,S2_PTR + add RES_PTR,32,RES_PTR + bge LOC(loop1) subcc %g0,%o4,%g0 ! restore cy -Lfin1: addcc size,8-2,size - blt Lend1 +LOC(fin1): + addcc SIZE,8-2,SIZE + blt LOC(end1) subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 2 limbs until less than 2 limbs remain */ -Loope1: subxcc %g4,%g2,%o4 - ld [s1_ptr+8],%g4 +LOC(loope1): + subxcc %g4,%g2,%o4 + ld [S1_PTR+8],%g4 subxcc %g1,%g3,%o5 - ld [s1_ptr+12],%g1 - ldd [s2_ptr+8],%g2 - std %o4,[res_ptr+0] + ld [S1_PTR+12],%g1 + ldd [S2_PTR+8],%g2 + std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register - addcc size,-2,size - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge Loope1 + addcc SIZE,-2,SIZE + add S1_PTR,8,S1_PTR + add S2_PTR,8,S2_PTR + add RES_PTR,8,RES_PTR + bge LOC(loope1) subcc %g0,%o4,%g0 ! restore cy -Lend1: subxcc %g4,%g2,%o4 +LOC(end1): + subxcc %g4,%g2,%o4 subxcc %g1,%g3,%o5 - std %o4,[res_ptr+0] + std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register - andcc size,1,%g0 - be Lret1 + andcc SIZE,1,%g0 + be LOC(ret1) subcc %g0,%o4,%g0 ! restore cy /* Add last limb */ - ld [s1_ptr+8],%g4 - ld [s2_ptr+8],%g2 + ld [S1_PTR+8],%g4 + ld [S2_PTR+8],%g2 subxcc %g4,%g2,%o4 - st %o4,[res_ptr+8] + st %o4,[RES_PTR+8] -Lret1: retl +LOC(ret1): + retl addx %g0,%g0,%o0 ! return carry-out from most sign. limb -L1: xor s1_ptr,res_ptr,%g1 +LOC(1): xor S1_PTR,RES_PTR,%g1 andcc %g1,4,%g0 - bne L2 + bne LOC(2) nop ! ** V1b ** - andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 - be L_v1b ! if no, branch + andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 + be LOC(v1b) ! if no, branch nop -/* Add least significant limb separately to align res_ptr and s1_ptr */ - ld [s2_ptr],%g4 - add s2_ptr,4,s2_ptr - ld [s1_ptr],%g2 - add s1_ptr,4,s1_ptr - add size,-1,size +/* Add least significant limb separately to align RES_PTR and S1_PTR */ + ld [S2_PTR],%g4 + add S2_PTR,4,S2_PTR + ld [S1_PTR],%g2 + add S1_PTR,4,S1_PTR + add SIZE,-1,SIZE subcc %g2,%g4,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr -L_v1b: addx %g0,%g0,%o4 ! save cy in register - cmp size,2 ! if size < 2 ... - bl Lend2 ! ... branch to tail code + st %o4,[RES_PTR] + add RES_PTR,4,RES_PTR +LOC(v1b): + addx %g0,%g0,%o4 ! save cy in register + cmp SIZE,2 ! if SIZE < 2 ... + bl LOC(end2) ! ... branch to tail code subcc %g0,%o4,%g0 ! restore cy - ld [s2_ptr+0],%g4 - addcc size,-10,size - ld [s2_ptr+4],%g1 - ldd [s1_ptr+0],%g2 - blt Lfin1b + ld [S2_PTR+0],%g4 + addcc SIZE,-10,SIZE + ld [S2_PTR+4],%g1 + ldd [S1_PTR+0],%g2 + blt LOC(fin1b) subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 8 limbs until less than 8 limbs remain */ -Loop1b: subxcc %g2,%g4,%o4 - ld [s2_ptr+8],%g4 +LOC(loop1b): + subxcc %g2,%g4,%o4 + ld [S2_PTR+8],%g4 subxcc %g3,%g1,%o5 - ld [s2_ptr+12],%g1 - ldd [s1_ptr+8],%g2 - std %o4,[res_ptr+0] + ld [S2_PTR+12],%g1 + ldd [S1_PTR+8],%g2 + std %o4,[RES_PTR+0] subxcc %g2,%g4,%o4 - ld [s2_ptr+16],%g4 + ld [S2_PTR+16],%g4 subxcc %g3,%g1,%o5 - ld [s2_ptr+20],%g1 - ldd [s1_ptr+16],%g2 - std %o4,[res_ptr+8] + ld [S2_PTR+20],%g1 + ldd [S1_PTR+16],%g2 + std %o4,[RES_PTR+8] subxcc %g2,%g4,%o4 - ld [s2_ptr+24],%g4 + ld [S2_PTR+24],%g4 subxcc %g3,%g1,%o5 - ld [s2_ptr+28],%g1 - ldd [s1_ptr+24],%g2 - std %o4,[res_ptr+16] + ld [S2_PTR+28],%g1 + ldd [S1_PTR+24],%g2 + std %o4,[RES_PTR+16] subxcc %g2,%g4,%o4 - ld [s2_ptr+32],%g4 + ld [S2_PTR+32],%g4 subxcc %g3,%g1,%o5 - ld [s2_ptr+36],%g1 - ldd [s1_ptr+32],%g2 - std %o4,[res_ptr+24] + ld [S2_PTR+36],%g1 + ldd [S1_PTR+32],%g2 + std %o4,[RES_PTR+24] addx %g0,%g0,%o4 ! save cy in register - addcc size,-8,size - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge Loop1b + addcc SIZE,-8,SIZE + add S1_PTR,32,S1_PTR + add S2_PTR,32,S2_PTR + add RES_PTR,32,RES_PTR + bge LOC(loop1b) subcc %g0,%o4,%g0 ! restore cy -Lfin1b: addcc size,8-2,size - blt Lend1b +LOC(fin1b): + addcc SIZE,8-2,SIZE + blt LOC(end1b) subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 2 limbs until less than 2 limbs remain */ -Loope1b:subxcc %g2,%g4,%o4 - ld [s2_ptr+8],%g4 +LOC(loope1b): + subxcc %g2,%g4,%o4 + ld [S2_PTR+8],%g4 subxcc %g3,%g1,%o5 - ld [s2_ptr+12],%g1 - ldd [s1_ptr+8],%g2 - std %o4,[res_ptr+0] + ld [S2_PTR+12],%g1 + ldd [S1_PTR+8],%g2 + std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register - addcc size,-2,size - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge Loope1b + addcc SIZE,-2,SIZE + add S1_PTR,8,S1_PTR + add S2_PTR,8,S2_PTR + add RES_PTR,8,RES_PTR + bge LOC(loope1b) subcc %g0,%o4,%g0 ! restore cy -Lend1b: subxcc %g2,%g4,%o4 +LOC(end1b): + subxcc %g2,%g4,%o4 subxcc %g3,%g1,%o5 - std %o4,[res_ptr+0] + std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register - andcc size,1,%g0 - be Lret1b + andcc SIZE,1,%g0 + be LOC(ret1b) subcc %g0,%o4,%g0 ! restore cy /* Add last limb */ - ld [s2_ptr+8],%g4 - ld [s1_ptr+8],%g2 + ld [S2_PTR+8],%g4 + ld [S1_PTR+8],%g2 subxcc %g2,%g4,%o4 - st %o4,[res_ptr+8] + st %o4,[RES_PTR+8] -Lret1b: retl +LOC(ret1b): + retl addx %g0,%g0,%o0 ! return carry-out from most sign. limb ! ** V2 ** -/* If we come here, the alignment of s1_ptr and res_ptr as well as the - alignment of s2_ptr and res_ptr differ. Since there are only two ways +/* If we come here, the alignment of S1_PTR and RES_PTR as well as the + alignment of S2_PTR and RES_PTR differ. Since there are only two ways things can be aligned (that we care about) we now know that the alignment - of s1_ptr and s2_ptr are the same. */ + of S1_PTR and S2_PTR are the same. */ -L2: cmp size,1 - be Ljone +LOC(2): cmp SIZE,1 + be LOC(jone) nop - andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 - be L_v2 ! if no, branch + andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 + be LOC(v2) ! if no, branch nop -/* Add least significant limb separately to align s1_ptr and s2_ptr */ - ld [s1_ptr],%g4 - add s1_ptr,4,s1_ptr - ld [s2_ptr],%g2 - add s2_ptr,4,s2_ptr - add size,-1,size +/* Add least significant limb separately to align S1_PTR and S2_PTR */ + ld [S1_PTR],%g4 + add S1_PTR,4,S1_PTR + ld [S2_PTR],%g2 + add S2_PTR,4,S2_PTR + add SIZE,-1,SIZE subcc %g4,%g2,%o4 - st %o4,[res_ptr] - add res_ptr,4,res_ptr + st %o4,[RES_PTR] + add RES_PTR,4,RES_PTR -L_v2: addx %g0,%g0,%o4 ! save cy in register - addcc size,-8,size - blt Lfin2 +LOC(v2): + addx %g0,%g0,%o4 ! save cy in register + addcc SIZE,-8,SIZE + blt LOC(fin2) subcc %g0,%o4,%g0 ! restore cy /* Add blocks of 8 limbs until less than 8 limbs remain */ -Loop2: ldd [s1_ptr+0],%g2 - ldd [s2_ptr+0],%o4 +LOC(loop2): + ldd [S1_PTR+0],%g2 + ldd [S2_PTR+0],%o4 subxcc %g2,%o4,%g2 - st %g2,[res_ptr+0] + st %g2,[RES_PTR+0] subxcc %g3,%o5,%g3 - st %g3,[res_ptr+4] - ldd [s1_ptr+8],%g2 - ldd [s2_ptr+8],%o4 + st %g3,[RES_PTR+4] + ldd [S1_PTR+8],%g2 + ldd [S2_PTR+8],%o4 subxcc %g2,%o4,%g2 - st %g2,[res_ptr+8] + st %g2,[RES_PTR+8] subxcc %g3,%o5,%g3 - st %g3,[res_ptr+12] - ldd [s1_ptr+16],%g2 - ldd [s2_ptr+16],%o4 + st %g3,[RES_PTR+12] + ldd [S1_PTR+16],%g2 + ldd [S2_PTR+16],%o4 subxcc %g2,%o4,%g2 - st %g2,[res_ptr+16] + st %g2,[RES_PTR+16] subxcc %g3,%o5,%g3 - st %g3,[res_ptr+20] - ldd [s1_ptr+24],%g2 - ldd [s2_ptr+24],%o4 + st %g3,[RES_PTR+20] + ldd [S1_PTR+24],%g2 + ldd [S2_PTR+24],%o4 subxcc %g2,%o4,%g2 - st %g2,[res_ptr+24] + st %g2,[RES_PTR+24] subxcc %g3,%o5,%g3 - st %g3,[res_ptr+28] + st %g3,[RES_PTR+28] addx %g0,%g0,%o4 ! save cy in register - addcc size,-8,size - add s1_ptr,32,s1_ptr - add s2_ptr,32,s2_ptr - add res_ptr,32,res_ptr - bge Loop2 + addcc SIZE,-8,SIZE + add S1_PTR,32,S1_PTR + add S2_PTR,32,S2_PTR + add RES_PTR,32,RES_PTR + bge LOC(loop2) subcc %g0,%o4,%g0 ! restore cy -Lfin2: addcc size,8-2,size - blt Lend2 +LOC(fin2): + addcc SIZE,8-2,SIZE + blt LOC(end2) subcc %g0,%o4,%g0 ! restore cy -Loope2: ldd [s1_ptr+0],%g2 - ldd [s2_ptr+0],%o4 +LOC(loope2): + ldd [S1_PTR+0],%g2 + ldd [S2_PTR+0],%o4 subxcc %g2,%o4,%g2 - st %g2,[res_ptr+0] + st %g2,[RES_PTR+0] subxcc %g3,%o5,%g3 - st %g3,[res_ptr+4] + st %g3,[RES_PTR+4] addx %g0,%g0,%o4 ! save cy in register - addcc size,-2,size - add s1_ptr,8,s1_ptr - add s2_ptr,8,s2_ptr - add res_ptr,8,res_ptr - bge Loope2 + addcc SIZE,-2,SIZE + add S1_PTR,8,S1_PTR + add S2_PTR,8,S2_PTR + add RES_PTR,8,RES_PTR + bge LOC(loope2) subcc %g0,%o4,%g0 ! restore cy -Lend2: andcc size,1,%g0 - be Lret2 +LOC(end2): + andcc SIZE,1,%g0 + be LOC(ret2) subcc %g0,%o4,%g0 ! restore cy /* Add last limb */ -Ljone: ld [s1_ptr],%g4 - ld [s2_ptr],%g2 +LOC(jone): + ld [S1_PTR],%g4 + ld [S2_PTR],%g2 subxcc %g4,%g2,%o4 - st %o4,[res_ptr] + st %o4,[RES_PTR] -Lret2: retl +LOC(ret2): + retl addx %g0,%g0,%o0 ! return carry-out from most sign. limb + +END(__mpn_sub_n) diff --git a/sysdeps/sparc/sparc32/submul_1.S b/sysdeps/sparc/sparc32/submul_1.S index a8ebd501a7..805699b7b2 100644 --- a/sysdeps/sparc/sparc32/submul_1.S +++ b/sysdeps/sparc/sparc32/submul_1.S @@ -1,20 +1,20 @@ ! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract ! the result from a second limb vector. - -! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. - +! +! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc. +! ! This file is part of the GNU MP Library. - +! ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Library General Public License as published by ! the Free Software Foundation; either version 2 of the License, or (at your ! option) any later version. - +! ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ! License for more details. - +! ! You should have received a copy of the GNU Library General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @@ -22,17 +22,14 @@ ! INPUT PARAMETERS -! res_ptr o0 -! s1_ptr o1 -! size o2 -! s2_limb o3 +! RES_PTR o0 +! S1_PTR o1 +! SIZE o2 +! S2_LIMB o3 -#include "sysdep.h" +#include <sysdep.h> -.text - .align 4 - .global C_SYMBOL_NAME(__mpn_submul_1) -C_SYMBOL_NAME(__mpn_submul_1): +ENTRY(__mpn_submul_1) ! Make S1_PTR and RES_PTR point at the end of their blocks ! and put (- 4 x SIZE) in index/loop counter. sll %o2,2,%o2 @@ -41,19 +38,19 @@ C_SYMBOL_NAME(__mpn_submul_1): sub %g0,%o2,%o2 cmp %o3,0xfff - bgu Large + bgu LOC(large) nop ld [%o1+%o2],%o5 mov 0,%o0 - b L0 + b LOC(0) add %o4,-4,%o4 -Loop0: +LOC(loop0): subcc %o5,%g1,%g1 ld [%o1+%o2],%o5 addx %o0,%g0,%o0 st %g1,[%o4+%o2] -L0: wr %g0,%o3,%y +LOC(0): wr %g0,%o3,%y sra %o5,31,%g2 and %o3,%g2,%g2 andcc %g1,0,%g1 @@ -79,7 +76,7 @@ L0: wr %g0,%o3,%y addcc %g1,%o0,%g1 addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb addcc %o2,4,%o2 ! loop counter - bne Loop0 + bne LOC(loop0) ld [%o4+%o2],%o5 subcc %o5,%g1,%g1 @@ -88,17 +85,18 @@ L0: wr %g0,%o3,%y st %g1,[%o4+%o2] -Large: ld [%o1+%o2],%o5 +LOC(large): + ld [%o1+%o2],%o5 mov 0,%o0 sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0 - b L1 + b LOC(1) add %o4,-4,%o4 -Loop: +LOC(loop): subcc %o5,%g3,%g3 ld [%o1+%o2],%o5 addx %o0,%g0,%o0 st %g3,[%o4+%o2] -L1: wr %g0,%o5,%y +LOC(1): wr %g0,%o5,%y and %o5,%g4,%g2 andcc %g0,%g0,%g1 mulscc %g1,%o3,%g1 @@ -138,10 +136,12 @@ L1: wr %g0,%o5,%y addcc %g3,%o0,%g3 addx %g2,%g1,%o0 addcc %o2,4,%o2 - bne Loop + bne LOC(loop) ld [%o4+%o2],%o5 subcc %o5,%g3,%g3 addx %o0,%g0,%o0 retl st %g3,[%o4+%o2] + +END(__mpn_submul_1) diff --git a/sysdeps/sparc/sparc32/udiv.S b/sysdeps/sparc/sparc32/udiv.S index 4a7d1526b0..0dd48f32b5 100644 --- a/sysdeps/sparc/sparc32/udiv.S +++ b/sysdeps/sparc/sparc32/udiv.S @@ -37,16 +37,8 @@ -#include "sysdep.h" -#ifdef __linux__ -#include <asm/traps.h> -#else -#ifdef __svr4__ +#include <sysdep.h> #include <sys/trap.h> -#else -#include <machine/trap.h> -#endif -#endif ENTRY(.udiv) @@ -63,11 +55,11 @@ ENTRY(.udiv) 1: cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) + blu LOC(got_result) ! (and algorithm fails otherwise) clr %o2 sethi %hi(1 << (32 - 4 - 1)), %g1 cmp %o3, %g1 - blu Lnot_really_big + blu LOC(not_really_big) clr %o4 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, @@ -78,15 +70,15 @@ ENTRY(.udiv) 1: cmp %o5, %g1 bgeu 3f - mov 1, %g7 + mov 1, %g2 sll %o5, 4, %o5 b 1b add %o4, 1, %o4 - ! Now compute %g7. + ! Now compute %g2. 2: addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 + bcc LOC(not_too_big) + add %g2, 1, %g2 ! We get here if the %o1 overflowed while shifting. ! This means that %o3 has the high-order bit set. @@ -94,20 +86,20 @@ ENTRY(.udiv) sll %g1, 4, %g1 ! high order bit srl %o5, 1, %o5 ! rest of %o5 add %o5, %g1, %o5 - b Ldo_single_div - sub %g7, 1, %g7 + b LOC(do_single_div) + sub %g2, 1, %g2 - Lnot_too_big: + LOC(not_too_big): 3: cmp %o5, %o3 blu 2b nop - be Ldo_single_div + be LOC(do_single_div) nop /* NB: these are commented out in the V8-Sparc manual as well */ /* (I do not understand this) */ ! %o5 > %o3: went too far: back up 1 step ! srl %o5, 1, %o5 - ! dec %g7 + ! dec %g2 ! do single-bit divide steps ! ! We have to be careful here. We know that %o3 >= %o5, so we can do the @@ -116,15 +108,15 @@ ENTRY(.udiv) ! order bit set in the first step, just falling into the regular ! division loop will mess up the first time around. ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide + LOC(do_single_div): + subcc %g2, 1, %g2 + bl LOC(end_regular_divide) nop sub %o3, %o5, %o3 mov 1, %o2 - b Lend_single_divloop + b LOC(end_single_divloop) nop - Lsingle_divloop: + LOC(single_divloop): sll %o2, 1, %o2 bl 1f srl %o5, 1, %o5 @@ -136,217 +128,219 @@ ENTRY(.udiv) add %o3, %o5, %o3 sub %o2, 1, %o2 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop + LOC(end_single_divloop): + subcc %g2, 1, %g2 + bge LOC(single_divloop) tst %o3 - b,a Lend_regular_divide + b,a LOC(end_regular_divide) -Lnot_really_big: +LOC(not_really_big): 1: sll %o5, 4, %o5 cmp %o5, %o3 bleu 1b addcc %o4, 1, %o4 - be Lgot_result + be LOC(got_result) sub %o4, 1, %o4 tst %o3 ! set up for initial iteration -Ldivloop: +LOC(divloop): sll %o2, 4, %o2 ! depth 1, accumulated bits 0 - bl L.1.16 + bl LOC(1.16) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 2, accumulated bits 1 - bl L.2.17 + bl LOC(2.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 3, accumulated bits 3 - bl L.3.19 + bl LOC(3.19) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits 7 - bl L.4.23 + bl LOC(4.23) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (7*2+1), %o2 - -L.4.23: + +LOC(4.23): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (7*2-1), %o2 - - -L.3.19: + + +LOC(3.19): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits 5 - bl L.4.21 + bl LOC(4.21) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (5*2+1), %o2 - -L.4.21: + +LOC(4.21): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (5*2-1), %o2 - - - -L.2.17: + + + +LOC(2.17): ! remainder is negative addcc %o3,%o5,%o3 ! depth 3, accumulated bits 1 - bl L.3.17 + bl LOC(3.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits 3 - bl L.4.19 + bl LOC(4.19) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (3*2+1), %o2 - -L.4.19: + +LOC(4.19): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (3*2-1), %o2 - - -L.3.17: + + +LOC(3.17): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits 1 - bl L.4.17 + bl LOC(4.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (1*2+1), %o2 - -L.4.17: + +LOC(4.17): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (1*2-1), %o2 - - - - -L.1.16: + + + + +LOC(1.16): ! remainder is negative addcc %o3,%o5,%o3 ! depth 2, accumulated bits -1 - bl L.2.15 + bl LOC(2.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 3, accumulated bits -1 - bl L.3.15 + bl LOC(3.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits -1 - bl L.4.15 + bl LOC(4.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-1*2+1), %o2 - -L.4.15: + +LOC(4.15): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-1*2-1), %o2 - - -L.3.15: + + +LOC(3.15): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits -3 - bl L.4.13 + bl LOC(4.13) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-3*2+1), %o2 - -L.4.13: + +LOC(4.13): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-3*2-1), %o2 - - - -L.2.15: + + + +LOC(2.15): ! remainder is negative addcc %o3,%o5,%o3 ! depth 3, accumulated bits -3 - bl L.3.13 + bl LOC(3.13) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits -5 - bl L.4.11 + bl LOC(4.11) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-5*2+1), %o2 - -L.4.11: + +LOC(4.11): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-5*2-1), %o2 - - -L.3.13: + + +LOC(3.13): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits -7 - bl L.4.9 + bl LOC(4.9) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-7*2+1), %o2 - -L.4.9: + +LOC(4.9): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-7*2-1), %o2 - - - - + + + + 9: -Lend_regular_divide: +LOC(end_regular_divide): subcc %o4, 1, %o4 - bge Ldivloop + bge LOC(divloop) tst %o3 - bl,a Lgot_result + bl,a LOC(got_result) ! non-restoring fixup here (one instruction only!) sub %o2, 1, %o2 -Lgot_result: +LOC(got_result): retl mov %o2, %o0 + +END(.udiv) diff --git a/sysdeps/sparc/sparc32/udiv_qrnnd.S b/sysdeps/sparc/sparc32/udiv_qrnnd.S index 5b67f874c3..2fbb8a5f54 100644 --- a/sysdeps/sparc/sparc32/udiv_qrnnd.S +++ b/sysdeps/sparc/sparc32/udiv_qrnnd.S @@ -1,50 +1,52 @@ ! SPARC __udiv_qrnnd division support, used from longlong.h. - +! ! Copyright (C) 1993, 1994, 1997 Free Software Foundation, Inc. - +! ! This file is part of the GNU MP Library. - +! ! The GNU MP Library is free software; you can redistribute it and/or modify ! it under the terms of the GNU Library General Public License as published by ! the Free Software Foundation; either version 2 of the License, or (at your ! option) any later version. - +! ! The GNU MP Library is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ! License for more details. - +! ! You should have received a copy of the GNU Library General Public License ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to ! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - +! ! Added PIC support - May/96, Miguel de Icaza - +! ! INPUT PARAMETERS ! rem_ptr i0 ! n1 i1 ! n0 i2 ! d i3 -#include "sysdep.h" +#include <sysdep.h> #undef ret /* Kludge for glibc */ +#ifdef PIC .text +#else + .section .rodata,#alloc +#endif .align 8 .type two_to_32,@object + .size two_to_32,8 two_to_32: .double 0r4294967296 - .size two_to_32,8 .type two_to_31,@object + .size two_to_31,8 two_to_31: .double 0r2147483648 - .size two_to_31,8 - .align 4 - .global __udiv_qrnnd - .type __udiv_qrnnd,@function + .text ENTRY(__udiv_qrnnd) !#PROLOGUE# 0 save %sp,-104,%sp @@ -52,57 +54,58 @@ ENTRY(__udiv_qrnnd) st %i1,[%fp-8] ld [%fp-8],%f10 #ifdef PIC -.Lbase: call 1f +LOC(base): + call 1f fitod %f10,%f4 -1: ldd [%o7-(.Lbase-two_to_32)],%f8 +1: ldd [%o7-(LOC(base)-two_to_32)],%f8 #else sethi %hi(two_to_32),%o7 fitod %f10,%f4 ldd [%o7+%lo(two_to_32)],%f8 #endif cmp %i1,0 - bge L248 + bge LOC(248) mov %i0,%i5 faddd %f4,%f8,%f4 -.L248: +LOC(248): st %i2,[%fp-8] ld [%fp-8],%f10 fmuld %f4,%f8,%f6 cmp %i2,0 - bge L249 + bge LOC(249) fitod %f10,%f2 faddd %f2,%f8,%f2 -.L249: +LOC(249): st %i3,[%fp-8] faddd %f6,%f2,%f2 ld [%fp-8],%f10 cmp %i3,0 - bge L250 + bge LOC(250) fitod %f10,%f4 faddd %f4,%f8,%f4 -.L250: +LOC(250): fdivd %f2,%f4,%f2 #ifdef PIC - ldd [%o7-(.Lbase-two_to_31)],%f4 + ldd [%o7-(LOC(base)-two_to_31)],%f4 #else sethi %hi(two_to_31),%o7 ldd [%o7+%lo(two_to_31)],%f4 #endif fcmped %f2,%f4 nop - fbge,a L251 + fbge,a LOC(251) fsubd %f2,%f4,%f2 fdtoi %f2,%f2 st %f2,[%fp-8] - b L252 + b LOC(252) ld [%fp-8],%i4 -.L251: +LOC(251): fdtoi %f2,%f2 st %f2,[%fp-8] ld [%fp-8],%i4 sethi %hi(-2147483648),%g2 xor %i4,%g2,%i4 -.L252: +LOC(252): wr %g0,%i4,%y sra %i3,31,%g2 and %i4,%g2,%g2 @@ -144,7 +147,7 @@ ENTRY(__udiv_qrnnd) rd %y,%g3 subcc %i2,%g3,%o7 subxcc %i1,%i0,%g0 - be L253 + be LOC(253) cmp %o7,%i3 add %i4,-1,%i0 @@ -152,14 +155,14 @@ ENTRY(__udiv_qrnnd) st %o7,[%i5] ret restore -.L253: - blu L246 +LOC(253): + blu LOC(246) mov %i4,%i0 add %i4,1,%i0 sub %o7,%i3,%o7 -.L246: +LOC(246): st %o7,[%i5] ret restore - .size __udiv_qrnnd, .-__udiv_qrnnd +END(__udiv_qrnnd) diff --git a/sysdeps/sparc/sparc32/umul.S b/sysdeps/sparc/sparc32/umul.S index 7a26c295cb..096554a2bc 100644 --- a/sysdeps/sparc/sparc32/umul.S +++ b/sysdeps/sparc/sparc32/umul.S @@ -14,13 +14,14 @@ * bnz overflow (or tnz) */ -#include "DEFS.h" -FUNC(.umul) +#include <sysdep.h> + +ENTRY(.umul) or %o0, %o1, %o4 - mov %o0, %y ! multiplier -> Y - andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args - be Lmul_shortway ! if zero, can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear N and V + mov %o0, %y ! multiplier -> Y + andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args + be LOC(mul_shortway) ! if zero, can do it the short way + andcc %g0, %g0, %o4 ! zero the partial product; clear N & V /* * Long multiply. 32 steps, followed by a final shift step. @@ -59,7 +60,6 @@ FUNC(.umul) mulscc %o4, %o1, %o4 ! 32 mulscc %o4, %g0, %o4 ! final shift - /* * Normally, with the shift-and-add approach, if both numbers are * positive you get the correct result. With 32-bit two's-complement @@ -97,20 +97,20 @@ FUNC(.umul) #if 0 tst %o1 bl,a 1f ! if %o1 < 0 (high order bit = 1), - add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) + add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) 1: rd %y, %o0 ! get lower half of product retl - addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 + addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 #else /* Faster code from tege@sics.se. */ sra %o1, 31, %o2 ! make mask from sign bit and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1 rd %y, %o0 ! get lower half of product retl - addcc %o4, %o2, %o1 ! add compensation and put upper half in place + addcc %o4, %o2, %o1 ! add compensation and put upper half in place #endif -Lmul_shortway: +LOC(mul_shortway): /* * Short multiply. 12 steps, followed by a final shift step. * The resulting bits are off by 12 and (32-12) = 20 bit positions, @@ -150,4 +150,6 @@ Lmul_shortway: srl %o5, 20, %o5 ! shift low bits right 20 or %o5, %o0, %o0 retl - addcc %g0, %g0, %o1 ! %o1 = zero, and set Z + addcc %g0, %g0, %o1 ! %o1 = zero, and set Z + +END(.umul) diff --git a/sysdeps/sparc/sparc32/urem.S b/sysdeps/sparc/sparc32/urem.S index e72c33dd63..5644e7a892 100644 --- a/sysdeps/sparc/sparc32/urem.S +++ b/sysdeps/sparc/sparc32/urem.S @@ -37,16 +37,8 @@ -#include "sysdep.h" -#ifdef __linux__ -#include <asm/traps.h> -#else -#ifdef __svr4__ +#include <sysdep.h> #include <sys/trap.h> -#else -#include <machine/trap.h> -#endif -#endif ENTRY(.urem) @@ -63,11 +55,11 @@ ENTRY(.urem) 1: cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) + blu LOC(got_result) ! (and algorithm fails otherwise) clr %o2 sethi %hi(1 << (32 - 4 - 1)), %g1 cmp %o3, %g1 - blu Lnot_really_big + blu LOC(not_really_big) clr %o4 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, @@ -78,15 +70,15 @@ ENTRY(.urem) 1: cmp %o5, %g1 bgeu 3f - mov 1, %g7 + mov 1, %g2 sll %o5, 4, %o5 b 1b add %o4, 1, %o4 - ! Now compute %g7. + ! Now compute %g2. 2: addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 + bcc LOC(not_too_big) + add %g2, 1, %g2 ! We get here if the %o1 overflowed while shifting. ! This means that %o3 has the high-order bit set. @@ -94,20 +86,20 @@ ENTRY(.urem) sll %g1, 4, %g1 ! high order bit srl %o5, 1, %o5 ! rest of %o5 add %o5, %g1, %o5 - b Ldo_single_div - sub %g7, 1, %g7 + b LOC(do_single_div) + sub %g2, 1, %g2 - Lnot_too_big: + LOC(not_too_big): 3: cmp %o5, %o3 blu 2b nop - be Ldo_single_div + be LOC(do_single_div) nop /* NB: these are commented out in the V8-Sparc manual as well */ /* (I do not understand this) */ ! %o5 > %o3: went too far: back up 1 step ! srl %o5, 1, %o5 - ! dec %g7 + ! dec %g2 ! do single-bit divide steps ! ! We have to be careful here. We know that %o3 >= %o5, so we can do the @@ -116,15 +108,15 @@ ENTRY(.urem) ! order bit set in the first step, just falling into the regular ! division loop will mess up the first time around. ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide + LOC(do_single_div): + subcc %g2, 1, %g2 + bl LOC(end_regular_divide) nop sub %o3, %o5, %o3 mov 1, %o2 - b Lend_single_divloop + b LOC(end_single_divloop) nop - Lsingle_divloop: + LOC(single_divloop): sll %o2, 1, %o2 bl 1f srl %o5, 1, %o5 @@ -136,217 +128,219 @@ ENTRY(.urem) add %o3, %o5, %o3 sub %o2, 1, %o2 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop + LOC(end_single_divloop): + subcc %g2, 1, %g2 + bge LOC(single_divloop) tst %o3 - b,a Lend_regular_divide + b,a LOC(end_regular_divide) -Lnot_really_big: +LOC(not_really_big): 1: sll %o5, 4, %o5 cmp %o5, %o3 bleu 1b addcc %o4, 1, %o4 - be Lgot_result + be LOC(got_result) sub %o4, 1, %o4 tst %o3 ! set up for initial iteration -Ldivloop: +LOC(divloop): sll %o2, 4, %o2 ! depth 1, accumulated bits 0 - bl L.1.16 + bl LOC(1.16) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 2, accumulated bits 1 - bl L.2.17 + bl LOC(2.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 3, accumulated bits 3 - bl L.3.19 + bl LOC(3.19) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits 7 - bl L.4.23 + bl LOC(4.23) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (7*2+1), %o2 - -L.4.23: + +LOC(4.23): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (7*2-1), %o2 - - -L.3.19: + + +LOC(3.19): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits 5 - bl L.4.21 + bl LOC(4.21) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (5*2+1), %o2 - -L.4.21: + +LOC(4.21): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (5*2-1), %o2 - - - -L.2.17: + + + +LOC(2.17): ! remainder is negative addcc %o3,%o5,%o3 ! depth 3, accumulated bits 1 - bl L.3.17 + bl LOC(3.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits 3 - bl L.4.19 + bl LOC(4.19) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (3*2+1), %o2 - -L.4.19: + +LOC(4.19): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (3*2-1), %o2 - - -L.3.17: + + +LOC(3.17): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits 1 - bl L.4.17 + bl LOC(4.17) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (1*2+1), %o2 - -L.4.17: + +LOC(4.17): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (1*2-1), %o2 - - - - -L.1.16: + + + + +LOC(1.16): ! remainder is negative addcc %o3,%o5,%o3 ! depth 2, accumulated bits -1 - bl L.2.15 + bl LOC(2.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 3, accumulated bits -1 - bl L.3.15 + bl LOC(3.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits -1 - bl L.4.15 + bl LOC(4.15) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-1*2+1), %o2 - -L.4.15: + +LOC(4.15): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-1*2-1), %o2 - - -L.3.15: + + +LOC(3.15): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits -3 - bl L.4.13 + bl LOC(4.13) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-3*2+1), %o2 - -L.4.13: + +LOC(4.13): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-3*2-1), %o2 - - - -L.2.15: + + + +LOC(2.15): ! remainder is negative addcc %o3,%o5,%o3 ! depth 3, accumulated bits -3 - bl L.3.13 + bl LOC(3.13) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 ! depth 4, accumulated bits -5 - bl L.4.11 + bl LOC(4.11) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-5*2+1), %o2 - -L.4.11: + +LOC(4.11): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-5*2-1), %o2 - - -L.3.13: + + +LOC(3.13): ! remainder is negative addcc %o3,%o5,%o3 ! depth 4, accumulated bits -7 - bl L.4.9 + bl LOC(4.9) srl %o5,1,%o5 ! remainder is positive subcc %o3,%o5,%o3 b 9f add %o2, (-7*2+1), %o2 - -L.4.9: + +LOC(4.9): ! remainder is negative addcc %o3,%o5,%o3 b 9f add %o2, (-7*2-1), %o2 - - - - + + + + 9: -Lend_regular_divide: +LOC(end_regular_divide): subcc %o4, 1, %o4 - bge Ldivloop + bge LOC(divloop) tst %o3 - bl,a Lgot_result + bl,a LOC(got_result) ! non-restoring fixup here (one instruction only!) add %o3, %o1, %o3 -Lgot_result: +LOC(got_result): retl mov %o3, %o0 + +END(.urem) diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h index eb46ba92be..e302f4a1fe 100644 --- a/sysdeps/sparc/sparc64/dl-machine.h +++ b/sysdeps/sparc/sparc64/dl-machine.h @@ -220,6 +220,13 @@ elf_machine_lazy_rel (struct link_map *map, const Elf64_Rela *reloc) /* The SPARC overlaps DT_RELA and DT_PLTREL. */ #define ELF_MACHINE_PLTREL_OVERLAP 1 +/* The return value from dl-runtime's fixup, if it should be special. */ +#define ELF_FIXUP_RETURN_VALUE(map, result) \ + ((map)->l_info[DT_SPARC(PLTFMT)] \ + && (map)->l_info[DT_SPARC(PLTFMT)]->d_un.d_val == 2 \ + ? (result) + (map)->l_info[DT_PLTGOT]->d_un.d_ptr + (map)->l_addr \ + : (result)) + /* Set up the loaded object described by L so its unrelocated PLT entries will jump to the on-demand fixup code in dl-runtime.c. */ @@ -232,10 +239,10 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) if (l->l_info[DT_JMPREL] && lazy) { got = (Elf64_Addr *) (l->l_addr + l->l_info[DT_PLTGOT]->d_un.d_ptr); - got[1] = (Elf64_Addr) l; /* Identify this shared object. */ /* This function will get called to fix up the GOT entry indicated by the offset on the stack, and then jump to the resolved address. */ - got[2] = (Elf64_Addr) &_dl_runtime_resolve; + got[1] = (Elf64_Addr) &_dl_runtime_resolve; + got[2] = (Elf64_Addr) l; /* Identify this shared object. */ } return lazy; @@ -248,9 +255,9 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) .type _dl_runtime_resolve, @function _dl_runtime_resolve: save %sp, -160, %sp - mov %g5, %o0 + mov %g1, %o0 call fixup - mov %g6, %o1 + mov %g2, %o1 jmp %o0 restore .size _dl_runtime_resolve, .-_dl_runtime_resolve diff --git a/sysdeps/sparc/sparc64/fpu/fpu_control.h b/sysdeps/sparc/sparc64/fpu/fpu_control.h index fd8abb19f1..bedc034eaa 100644 --- a/sysdeps/sparc/sparc64/fpu/fpu_control.h +++ b/sysdeps/sparc/sparc64/fpu/fpu_control.h @@ -41,13 +41,11 @@ /* Now two recommended cw */ -/* Linux default: +/* Linux and IEEE default: - extended precision - rounding to nearest - no exceptions */ #define _FPU_DEFAULT 0x0 - -/* IEEE: same as above */ #define _FPU_IEEE 0x0 /* Type of the control word. */ |