diff options
Diffstat (limited to 'REORG.TODO/sysdeps/alpha')
210 files changed, 18135 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/alpha/Implies b/REORG.TODO/sysdeps/alpha/Implies new file mode 100644 index 0000000000..d03783b127 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/Implies @@ -0,0 +1,7 @@ +wordsize-64 +# Alpha uses IEEE 754 single, double and quad precision floating point. +ieee754/ldbl-128 +ieee754/dbl-64/wordsize-64 +ieee754/dbl-64 +ieee754/flt-32 +alpha/soft-fp diff --git a/REORG.TODO/sysdeps/alpha/Makefile b/REORG.TODO/sysdeps/alpha/Makefile new file mode 100644 index 0000000000..98da3b57e6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/Makefile @@ -0,0 +1,62 @@ +# Copyright (C) 1993-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# Contributed by Brendan Kehoe (brendan@zen.org). + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library. If not, see +# <http://www.gnu.org/licenses/>. + +ifeq ($(subdir),db2) +CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_ALPHA=1 +endif + +ifeq ($(subdir),debug) +# Consider making this GCC's default... +CFLAGS-backtrace.c = -fasynchronous-unwind-tables +endif + +ifeq ($(subdir),gmon) +sysdep_routines += _mcount +endif + +ifeq ($(subdir),gnulib) +sysdep_routines += divl divlu divq divqu reml remlu remq remqu +endif + +ifeq ($(subdir),string) +sysdep_routines += stxcpy stxncpy +endif + +ifeq ($(subdir),elf) +# The ld.so startup code cannot use literals until it self-relocates. +CFLAGS-rtld.c = -mbuild-constants +endif + +ifeq ($(subdir),math) +# The fma routines rely on inexact being raised for correct results. +CFLAGS-s_fma.c = -mieee-with-inexact +CFLAGS-s_fmaf.c = -mieee-with-inexact +# This test tries to check for inexact being raised by arithmetic. +CFLAGS-test-misc.c += -mieee-with-inexact +# Avoid "conflicting types for built-in function" warnings +CFLAGS-s_isnan.c += -fno-builtin-isnanf +endif + +# Build everything with full IEEE math support, and with dynamic rounding; +# there are a number of math routines that are defined to work with the +# "current" rounding mode, and it's easiest to set this with all of them. +sysdep-CFLAGS += -mieee -mfp-rounding-mode=d + +# libc.so requires about 16k for the small data area, which is well +# below the 64k maximum. +pic-ccflag = -fpic diff --git a/REORG.TODO/sysdeps/alpha/Subdirs b/REORG.TODO/sysdeps/alpha/Subdirs new file mode 100644 index 0000000000..87eadf3024 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/Subdirs @@ -0,0 +1 @@ +soft-fp diff --git a/REORG.TODO/sysdeps/alpha/Versions b/REORG.TODO/sysdeps/alpha/Versions new file mode 100644 index 0000000000..ae8fde7b23 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/Versions @@ -0,0 +1,17 @@ +libc { + GLIBC_2.0 { + # functions with special/multiple interfaces + __divqu; __remqu; __divqs; __remqs; __divlu; __remlu; __divls; + __remls; __divl; __reml; __divq; __remq; __divqu; __remqu; + } +} +libm { + GLIBC_2.0 { + # used in inline functions. + __atan2; + } + GLIBC_2.18 { + # forgotten when the symbols were added to glibc 2.15 for other targets + __sqrt_finite; __sqrtf_finite; __sqrtl_finite; + } +} diff --git a/REORG.TODO/sysdeps/alpha/__longjmp.S b/REORG.TODO/sysdeps/alpha/__longjmp.S new file mode 100644 index 0000000000..ae8de82669 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/__longjmp.S @@ -0,0 +1,63 @@ +/* Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ASSEMBLY__ + +#include <sysdep.h> +#include <jmpbuf-offsets.h> + + +ENTRY(__longjmp) +#ifdef PROF + ldgp gp, 0(pv) + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at + .prologue 1 +#else + .prologue 0 +#endif + + mov a1, v0 + ldq s0, JB_S0*8(a0) + ldq s1, JB_S1*8(a0) + ldq s2, JB_S2*8(a0) + ldq s3, JB_S3*8(a0) + ldq s4, JB_S4*8(a0) + ldq s5, JB_S5*8(a0) + ldq ra, JB_PC*8(a0) + ldq fp, JB_FP*8(a0) + ldq t0, JB_SP*8(a0) + ldt $f2, JB_F2*8(a0) + ldt $f3, JB_F3*8(a0) + ldt $f4, JB_F4*8(a0) + ldt $f5, JB_F5*8(a0) + ldt $f6, JB_F6*8(a0) + ldt $f7, JB_F7*8(a0) + ldt $f8, JB_F8*8(a0) + ldt $f9, JB_F9*8(a0) +#ifdef PTR_DEMANGLE + PTR_DEMANGLE(ra, t1) + PTR_DEMANGLE2(t0, t1) + PTR_DEMANGLE2(fp, t1) +#endif + cmoveq v0, 1, v0 + mov t0, sp + ret + +END(__longjmp) diff --git a/REORG.TODO/sysdeps/alpha/_mcount.S b/REORG.TODO/sysdeps/alpha/_mcount.S new file mode 100644 index 0000000000..8a35f3903b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/_mcount.S @@ -0,0 +1,105 @@ +/* Machine-specific calling sequence for `mcount' profiling function. alpha + Copyright (C) 1995-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Assembly stub to invoke _mcount(). Compiler generated code calls + this stub after executing a function's prologue and without saving any + registers. It is therefore necessary to preserve a0..a5 as they may + contain function arguments. To work correctly with frame- less + functions, it is also necessary to preserve ra. Finally, division + routines are invoked with a special calling convention and the + compiler treats those calls as if they were instructions. In + particular, it doesn't save any of the temporary registers (caller + saved registers). It is therefore necessary to preserve all + caller-saved registers as well. + + Upon entering _mcount, register $at holds the return address and ra + holds the return address of the function's caller (selfpc and frompc, + respectively in gmon.c language...). */ + +#include <sysdep.h> + + .set noat + .set noreorder + +LEAF(_mcount, 0xb0) + subq sp, 0xb0, sp + .prologue 0 + stq a0, 0x00(sp) + mov ra, a0 # a0 = caller-pc + stq a1, 0x08(sp) + mov $at, a1 # a1 = self-pc + stq $at, 0x10(sp) + + stq a2, 0x18(sp) + stq a3, 0x20(sp) + stq a4, 0x28(sp) + stq a5, 0x30(sp) + stq ra, 0x38(sp) + stq gp, 0x40(sp) + + br gp, 1f +1: ldgp gp, 0(gp) + + stq t0, 0x48(sp) + stq t1, 0x50(sp) + stq t2, 0x58(sp) + stq t3, 0x60(sp) + stq t4, 0x68(sp) + stq t5, 0x70(sp) + stq t6, 0x78(sp) + + stq t7, 0x80(sp) + stq t8, 0x88(sp) + stq t9, 0x90(sp) + stq t10, 0x98(sp) + stq t11, 0xa0(sp) + stq v0, 0xa8(sp) + + jsr ra, __mcount + + ldq a0, 0x00(sp) + ldq a1, 0x08(sp) + ldq $at, 0x10(sp) # restore self-pc + ldq a2, 0x18(sp) + ldq a3, 0x20(sp) + ldq a4, 0x28(sp) + ldq a5, 0x30(sp) + ldq ra, 0x38(sp) + ldq gp, 0x40(sp) + mov $at, pv # make pv point to return address + ldq t0, 0x48(sp) # this is important under OSF/1 to + ldq t1, 0x50(sp) # ensure that the code that we return + ldq t2, 0x58(sp) # can correctly compute its gp + ldq t3, 0x60(sp) + ldq t4, 0x68(sp) + ldq t5, 0x70(sp) + ldq t6, 0x78(sp) + ldq t7, 0x80(sp) + ldq t8, 0x88(sp) + ldq t9, 0x90(sp) + ldq t10, 0x98(sp) + ldq t11, 0xa0(sp) + ldq v0, 0xa8(sp) + + addq sp, 0xb0, sp + ret zero,($at),1 + + END(_mcount) + +weak_alias (_mcount, mcount) diff --git a/REORG.TODO/sysdeps/alpha/add_n.S b/REORG.TODO/sysdeps/alpha/add_n.S new file mode 100644 index 0000000000..85f8e270cb --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/add_n.S @@ -0,0 +1,118 @@ + # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .frame $30,0,$26,0 + + ldq $3,0($17) + ldq $4,0($18) + + subq $19,1,$19 + and $19,4-1,$2 # number of limbs in first loop + bis $31,$31,$0 + beq $2,.L0 # if multiple of 4 limbs, skip first loop + + subq $19,$2,$19 + +.Loop0: subq $2,1,$2 + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + addq $17,8,$17 + addq $18,8,$18 + bis $5,$5,$3 + bis $6,$6,$4 + addq $16,8,$16 + bne $2,.Loop0 + +.L0: beq $19,.Lend + + .align 3 +.Loop: subq $19,4,$19 + + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + ldq $3,16($17) + addq $6,$0,$6 + ldq $4,16($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,8($16) + or $0,$1,$0 + + ldq $5,24($17) + addq $4,$0,$4 + ldq $6,24($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,16($16) + or $0,$1,$0 + + ldq $3,32($17) + addq $6,$0,$6 + ldq $4,32($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,24($16) + or $0,$1,$0 + + addq $17,32,$17 + addq $18,32,$18 + addq $16,32,$16 + bne $19,.Loop + +.Lend: addq $4,$0,$4 + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + ret $31,($26),1 + + .end __mpn_add_n diff --git a/REORG.TODO/sysdeps/alpha/addmul_1.S b/REORG.TODO/sysdeps/alpha/addmul_1.S new file mode 100644 index 0000000000..11bd135e83 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/addmul_1.S @@ -0,0 +1,90 @@ + # Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + # the result to a second limb vector. + + # Copyright (C) 1992-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 2 +__mpn_addmul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + umulh $2,$19,$0 # $0 = prod_high + beq $18,.Lend1 # jump if size was == 1 + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + addq $5,$3,$3 + cmpult $3,$5,$4 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + beq $18,.Lend2 # jump if size was == 2 + + .align 3 +.Loop: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + addq $5,$0,$0 # combine carries + bne $18,.Loop + +.Lend2: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $5,$0,$0 # combine carries + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +.Lend1: addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $0,$5,$0 + ret $31,($26),1 + + .end __mpn_addmul_1 diff --git a/REORG.TODO/sysdeps/alpha/alphaev5/add_n.S b/REORG.TODO/sysdeps/alpha/alphaev5/add_n.S new file mode 100644 index 0000000000..d7db8f4672 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev5/add_n.S @@ -0,0 +1,146 @@ + # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .frame $30,0,$26,0 + + or $31,$31,$25 # clear cy + subq $19,4,$19 # decr loop cnt + blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop + # Start software pipeline for 1st loop + ldq $0,0($18) + ldq $1,8($18) + ldq $4,0($17) + ldq $5,8($17) + addq $17,32,$17 # update s1_ptr + ldq $2,16($18) + addq $0,$4,$20 # 1st main add + ldq $3,24($18) + subq $19,4,$19 # decr loop cnt + ldq $6,-16($17) + cmpult $20,$0,$25 # compute cy from last add + ldq $7,-8($17) + addq $1,$25,$28 # cy add + addq $18,32,$18 # update s2_ptr + addq $5,$28,$21 # 2nd main add + cmpult $28,$25,$8 # compute cy from last add + blt $19,.Lend1 # if less than 4 limbs remain, jump + # 1st loop handles groups of 4 limbs in a software pipeline + .align 4 +.Loop: cmpult $21,$28,$25 # compute cy from last add + ldq $0,0($18) + or $8,$25,$25 # combine cy from the two adds + ldq $1,8($18) + addq $2,$25,$28 # cy add + ldq $4,0($17) + addq $28,$6,$22 # 3rd main add + ldq $5,8($17) + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + addq $17,32,$17 # update s1_ptr + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + addq $0,$25,$28 # cy add + ldq $2,16($18) + addq $4,$28,$20 # 1st main add + ldq $3,24($18) + cmpult $28,$25,$8 # compute cy from last add + ldq $6,-16($17) + cmpult $20,$28,$25 # compute cy from last add + ldq $7,-8($17) + or $8,$25,$25 # combine cy from the two adds + subq $19,4,$19 # decr loop cnt + stq $22,-16($16) + addq $1,$25,$28 # cy add + stq $23,-8($16) + addq $5,$28,$21 # 2nd main add + addq $18,32,$18 # update s2_ptr + cmpult $28,$25,$8 # compute cy from last add + bge $19,.Loop + # Finish software pipeline for 1st loop +.Lend1: cmpult $21,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $2,$25,$28 # cy add + addq $28,$6,$22 # 3rd main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + stq $22,-16($16) + stq $23,-8($16) +.Lend2: addq $19,4,$19 # restore loop cnt + beq $19,.Lret + # Start software pipeline for 2nd loop + ldq $0,0($18) + ldq $4,0($17) + subq $19,1,$19 + beq $19,.Lend0 + # 2nd loop handles remaining 1-3 limbs + .align 4 +.Loop0: addq $0,$25,$28 # cy add + ldq $0,8($18) + addq $4,$28,$20 # main add + ldq $4,8($17) + addq $18,8,$18 + cmpult $28,$25,$8 # compute cy from last add + addq $17,8,$17 + stq $20,0($16) + cmpult $20,$28,$25 # compute cy from last add + subq $19,1,$19 # decr loop cnt + or $8,$25,$25 # combine cy from the two adds + addq $16,8,$16 + bne $19,.Loop0 +.Lend0: addq $0,$25,$28 # cy add + addq $4,$28,$20 # main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $20,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + +.Lret: or $25,$31,$0 # return cy + ret $31,($26),1 + .end __mpn_add_n diff --git a/REORG.TODO/sysdeps/alpha/alphaev5/lshift.S b/REORG.TODO/sysdeps/alpha/alphaev5/lshift.S new file mode 100644 index 0000000000..24ff8e2fc3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev5/lshift.S @@ -0,0 +1,172 @@ + # Alpha EV5 __mpn_lshift -- + + # Copyright (C) 1994-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 3.25 cycles/limb on the EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .frame $30,0,$26,0 + + s8addq $18,$17,$17 # make r17 point at end of s1 + ldq $4,-8($17) # load first limb + subq $31,$19,$20 + s8addq $18,$16,$16 # make r16 point at end of RES + subq $18,1,$18 + and $18,4-1,$28 # number of limbs in first loop + srl $4,$20,$0 # compute function result + + beq $28,.L0 + subq $18,$28,$18 + + .align 3 +.Loop0: ldq $3,-16($17) + subq $16,8,$16 + sll $4,$19,$5 + subq $17,8,$17 + subq $28,1,$28 + srl $3,$20,$6 + or $3,$3,$4 + or $5,$6,$8 + stq $8,0($16) + bne $28,.Loop0 + +.L0: sll $4,$19,$24 + beq $18,.Lend + # warm up phase 1 + ldq $1,-16($17) + subq $18,4,$18 + ldq $2,-24($17) + ldq $3,-32($17) + ldq $4,-40($17) + beq $18,.Lend1 + # warm up phase 2 + srl $1,$20,$7 + sll $1,$19,$21 + srl $2,$20,$8 + ldq $1,-48($17) + sll $2,$19,$22 + ldq $2,-56($17) + srl $3,$20,$5 + or $7,$24,$7 + sll $3,$19,$23 + or $8,$21,$8 + srl $4,$20,$6 + ldq $3,-64($17) + sll $4,$19,$24 + ldq $4,-72($17) + subq $18,4,$18 + beq $18,.Lend2 + .align 4 + # main loop +.Loop: stq $7,-8($16) + or $5,$22,$5 + stq $8,-16($16) + or $6,$23,$6 + + srl $1,$20,$7 + subq $18,4,$18 + sll $1,$19,$21 + unop # ldq $31,-96($17) + + srl $2,$20,$8 + ldq $1,-80($17) + sll $2,$19,$22 + ldq $2,-88($17) + + stq $5,-24($16) + or $7,$24,$7 + stq $6,-32($16) + or $8,$21,$8 + + srl $3,$20,$5 + unop # ldq $31,-96($17) + sll $3,$19,$23 + subq $16,32,$16 + + srl $4,$20,$6 + ldq $3,-96($17) + sll $4,$19,$24 + ldq $4,-104($17) + + subq $17,32,$17 + bne $18,.Loop + # cool down phase 2/1 +.Lend2: stq $7,-8($16) + or $5,$22,$5 + stq $8,-16($16) + or $6,$23,$6 + srl $1,$20,$7 + sll $1,$19,$21 + srl $2,$20,$8 + sll $2,$19,$22 + stq $5,-24($16) + or $7,$24,$7 + stq $6,-32($16) + or $8,$21,$8 + srl $3,$20,$5 + sll $3,$19,$23 + srl $4,$20,$6 + sll $4,$19,$24 + # cool down phase 2/2 + stq $7,-40($16) + or $5,$22,$5 + stq $8,-48($16) + or $6,$23,$6 + stq $5,-56($16) + stq $6,-64($16) + # cool down phase 2/3 + stq $24,-72($16) + ret $31,($26),1 + + # cool down phase 1/1 +.Lend1: srl $1,$20,$7 + sll $1,$19,$21 + srl $2,$20,$8 + sll $2,$19,$22 + srl $3,$20,$5 + or $7,$24,$7 + sll $3,$19,$23 + or $8,$21,$8 + srl $4,$20,$6 + sll $4,$19,$24 + # cool down phase 1/2 + stq $7,-8($16) + or $5,$22,$5 + stq $8,-16($16) + or $6,$23,$6 + stq $5,-24($16) + stq $6,-32($16) + stq $24,-40($16) + ret $31,($26),1 + +.Lend: stq $24,-8($16) + ret $31,($26),1 + .end __mpn_lshift diff --git a/REORG.TODO/sysdeps/alpha/alphaev5/rshift.S b/REORG.TODO/sysdeps/alpha/alphaev5/rshift.S new file mode 100644 index 0000000000..0a44c77d0a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev5/rshift.S @@ -0,0 +1,170 @@ + # Alpha EV5 __mpn_rshift -- + + # Copyright (C) 1994-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 3.25 cycles/limb on the EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .frame $30,0,$26,0 + + ldq $4,0($17) # load first limb + subq $31,$19,$20 + subq $18,1,$18 + and $18,4-1,$28 # number of limbs in first loop + sll $4,$20,$0 # compute function result + + beq $28,.L0 + subq $18,$28,$18 + + .align 3 +.Loop0: ldq $3,8($17) + addq $16,8,$16 + srl $4,$19,$5 + addq $17,8,$17 + subq $28,1,$28 + sll $3,$20,$6 + or $3,$3,$4 + or $5,$6,$8 + stq $8,-8($16) + bne $28,.Loop0 + +.L0: srl $4,$19,$24 + beq $18,.Lend + # warm up phase 1 + ldq $1,8($17) + subq $18,4,$18 + ldq $2,16($17) + ldq $3,24($17) + ldq $4,32($17) + beq $18,.Lend1 + # warm up phase 2 + sll $1,$20,$7 + srl $1,$19,$21 + sll $2,$20,$8 + ldq $1,40($17) + srl $2,$19,$22 + ldq $2,48($17) + sll $3,$20,$5 + or $7,$24,$7 + srl $3,$19,$23 + or $8,$21,$8 + sll $4,$20,$6 + ldq $3,56($17) + srl $4,$19,$24 + ldq $4,64($17) + subq $18,4,$18 + beq $18,.Lend2 + .align 4 + # main loop +.Loop: stq $7,0($16) + or $5,$22,$5 + stq $8,8($16) + or $6,$23,$6 + + sll $1,$20,$7 + subq $18,4,$18 + srl $1,$19,$21 + unop # ldq $31,-96($17) + + sll $2,$20,$8 + ldq $1,72($17) + srl $2,$19,$22 + ldq $2,80($17) + + stq $5,16($16) + or $7,$24,$7 + stq $6,24($16) + or $8,$21,$8 + + sll $3,$20,$5 + unop # ldq $31,-96($17) + srl $3,$19,$23 + addq $16,32,$16 + + sll $4,$20,$6 + ldq $3,88($17) + srl $4,$19,$24 + ldq $4,96($17) + + addq $17,32,$17 + bne $18,.Loop + # cool down phase 2/1 +.Lend2: stq $7,0($16) + or $5,$22,$5 + stq $8,8($16) + or $6,$23,$6 + sll $1,$20,$7 + srl $1,$19,$21 + sll $2,$20,$8 + srl $2,$19,$22 + stq $5,16($16) + or $7,$24,$7 + stq $6,24($16) + or $8,$21,$8 + sll $3,$20,$5 + srl $3,$19,$23 + sll $4,$20,$6 + srl $4,$19,$24 + # cool down phase 2/2 + stq $7,32($16) + or $5,$22,$5 + stq $8,40($16) + or $6,$23,$6 + stq $5,48($16) + stq $6,56($16) + # cool down phase 2/3 + stq $24,64($16) + ret $31,($26),1 + + # cool down phase 1/1 +.Lend1: sll $1,$20,$7 + srl $1,$19,$21 + sll $2,$20,$8 + srl $2,$19,$22 + sll $3,$20,$5 + or $7,$24,$7 + srl $3,$19,$23 + or $8,$21,$8 + sll $4,$20,$6 + srl $4,$19,$24 + # cool down phase 1/2 + stq $7,0($16) + or $5,$22,$5 + stq $8,8($16) + or $6,$23,$6 + stq $5,16($16) + stq $6,24($16) + stq $24,32($16) + ret $31,($26),1 + +.Lend: stq $24,0($16) + ret $31,($26),1 + .end __mpn_rshift diff --git a/REORG.TODO/sysdeps/alpha/alphaev5/sub_n.S b/REORG.TODO/sysdeps/alpha/alphaev5/sub_n.S new file mode 100644 index 0000000000..032b0c616b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev5/sub_n.S @@ -0,0 +1,147 @@ + # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .frame $30,0,$26,0 + + or $31,$31,$25 # clear cy + subq $19,4,$19 # decr loop cnt + blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop + # Start software pipeline for 1st loop + ldq $0,0($18) + ldq $1,8($18) + ldq $4,0($17) + ldq $5,8($17) + addq $17,32,$17 # update s1_ptr + ldq $2,16($18) + subq $4,$0,$20 # 1st main sub + ldq $3,24($18) + subq $19,4,$19 # decr loop cnt + ldq $6,-16($17) + cmpult $4,$20,$25 # compute cy from last sub + ldq $7,-8($17) + addq $1,$25,$28 # cy add + addq $18,32,$18 # update s2_ptr + subq $5,$28,$21 # 2nd main sub + cmpult $28,$25,$8 # compute cy from last add + blt $19,.Lend1 # if less than 4 limbs remain, jump + # 1st loop handles groups of 4 limbs in a software pipeline + .align 4 +.Loop: cmpult $5,$21,$25 # compute cy from last add + ldq $0,0($18) + or $8,$25,$25 # combine cy from the two adds + ldq $1,8($18) + addq $2,$25,$28 # cy add + ldq $4,0($17) + subq $6,$28,$22 # 3rd main sub + ldq $5,8($17) + cmpult $28,$25,$8 # compute cy from last add + cmpult $6,$22,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + subq $7,$28,$23 # 4th main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $7,$23,$25 # compute cy from last add + addq $17,32,$17 # update s1_ptr + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + addq $0,$25,$28 # cy add + ldq $2,16($18) + subq $4,$28,$20 # 1st main sub + ldq $3,24($18) + cmpult $28,$25,$8 # compute cy from last add + ldq $6,-16($17) + cmpult $4,$20,$25 # compute cy from last add + ldq $7,-8($17) + or $8,$25,$25 # combine cy from the two adds + subq $19,4,$19 # decr loop cnt + stq $22,-16($16) + addq $1,$25,$28 # cy add + stq $23,-8($16) + subq $5,$28,$21 # 2nd main sub + addq $18,32,$18 # update s2_ptr + cmpult $28,$25,$8 # compute cy from last add + bge $19,.Loop + # Finish software pipeline for 1st loop +.Lend1: cmpult $5,$21,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $2,$25,$28 # cy add + subq $6,$28,$22 # 3rd main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $6,$22,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + subq $7,$28,$23 # 4th main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $7,$23,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + stq $22,-16($16) + stq $23,-8($16) +.Lend2: addq $19,4,$19 # restore loop cnt + beq $19,.Lret + # Start software pipeline for 2nd loop + ldq $0,0($18) + ldq $4,0($17) + subq $19,1,$19 + beq $19,.Lend0 + # 2nd loop handles remaining 1-3 limbs + .align 4 +.Loop0: addq $0,$25,$28 # cy add + ldq $0,8($18) + subq $4,$28,$20 # main sub + ldq $1,8($17) + addq $18,8,$18 + cmpult $28,$25,$8 # compute cy from last add + addq $17,8,$17 + stq $20,0($16) + cmpult $4,$20,$25 # compute cy from last add + subq $19,1,$19 # decr loop cnt + or $8,$25,$25 # combine cy from the two adds + addq $16,8,$16 + or $1,$31,$4 + bne $19,.Loop0 +.Lend0: addq $0,$25,$28 # cy add + subq $4,$28,$20 # main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $4,$20,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + +.Lret: or $25,$31,$0 # return cy + ret $31,($26),1 + .end __mpn_sub_n diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/Implies b/REORG.TODO/sysdeps/alpha/alphaev6/Implies new file mode 100644 index 0000000000..0e7fc170ba --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/Implies @@ -0,0 +1 @@ +alpha/alphaev5 diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/addmul_1.S b/REORG.TODO/sysdeps/alpha/alphaev6/addmul_1.S new file mode 100644 index 0000000000..1072ea763f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/addmul_1.S @@ -0,0 +1,477 @@ + # Alpha ev6 mpn_addmul_1 -- Multiply a limb vector with a limb and add + # the result to a second limb vector. + # + # Copyright (C) 2000-2017 Free Software Foundation, Inc. + # + # This file is part of the GNU MP Library. + # + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation; either version 2.1 of the License, or (at + # your option) any later version. + # + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # size $18 + # s2_limb $19 + # + # This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and + # exactly 3.625 cycles/limb on EV6... + # + # This code was written in close cooperation with ev6 pipeline expert + # Steve Root (root@toober.hlo.dec.com). Any errors are tege's fault, though. + # + # Register usages for unrolled loop: + # 0-3 mul's + # 4-7 acc's + # 8-15 mul results + # 20,21 carry's + # 22,23 save for stores + # + # Sustains 8 mul-adds in 29 cycles in the unrolled inner loop. + # + # The stores can issue a cycle late so we have paired no-op's to 'catch' + # them, so that further disturbance to the schedule is damped. + # + # We couldn't pair the loads, because the entangled schedule of the + # carry's has to happen on one side {0} of the machine. Note, the total + # use of U0, and the total use of L0 (after attending to the stores). + # which is part of the reason why.... + # + # This is a great schedule for the d_cache, a poor schedule for the + # b_cache. The lockup on U0 means that any stall can't be recovered + # from. Consider a ldq in L1. say that load gets stalled because it + # collides with a fill from the b_Cache. On the next cycle, this load + # gets priority. If first looks at L0, and goes there. The instruction + # we intended for L0 gets to look at L1, which is NOT where we want + # it. It either stalls 1, because it can't go in L0, or goes there, and + # causes a further instruction to stall. + # + # So for b_cache, we're likely going to want to put one or more cycles + # back into the code! And, of course, put in prefetches. For the + # accumulator, lds, intent to modify. For the multiplier, you might + # want ldq, evict next, if you're not wanting to use it again soon. Use + # 256 ahead of present pointer value. At a place where we have an mt + # followed by a bookkeeping, put the bookkeeping in upper, and the + # prefetch into lower. + # + # Note, the usage of physical registers per cycle is smoothed off, as + # much as possible. + # + # Note, the ldq's and stq's are at the end of the quadpacks. note, we'd + # like not to have a ldq or stq to preceded a conditional branch in a + # quadpack. The conditional branch moves the retire pointer one cycle + # later. + # + # Optimization notes: + # Callee-saves regs: $9 $10 $11 $12 $13 $14 $15 $26 ?$27? + # Reserved regs: $29 $30 $31 + # Free caller-saves regs in unrolled code: $24 $25 $28 + # We should swap some of the callee-saves regs for some of the free + # caller-saves regs, saving some overhead cycles. + # Most importantly, we should write fast code for the 0-7 case. + # The code we use there are for the 21164, and runs at 7 cycles/limb + # on the 21264. Should not be hard, if we write specialized code for + # 1-7 limbs (the one for 0 limbs should be straightforward). We then just + # need a jump table indexed by the low 3 bits of the count argument. + + .set noreorder + .set noat + .text + + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 +__mpn_addmul_1: + .frame $30,0,$26,0 + .prologue 0 + + cmpult $18, 8, $1 + beq $1, $Large + + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + subq $18, 1, $18 # size-- + mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + umulh $2, $19, $0 # $0 = prod_high + beq $18, $Lend0b # jump if size was == 1 + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + subq $18, 1, $18 # size-- + addq $5, $3, $3 + cmpult $3, $5, $4 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + beq $18, $Lend0a # jump if size was == 2 + + .align 3 +$Loop0: mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + addq $4, $0, $0 # cy_limb = cy_limb + 'cy' + subq $18, 1, $18 # size-- + umulh $2, $19, $4 # $4 = cy_limb + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + addq $3, $0, $3 # $3 = cy_limb + prod_low + cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low) + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + addq $5, $0, $0 # combine carries + bne $18, $Loop0 +$Lend0a: + mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + addq $4, $0, $0 # cy_limb = cy_limb + 'cy' + umulh $2, $19, $4 # $4 = cy_limb + addq $3, $0, $3 # $3 = cy_limb + prod_low + cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low) + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $5, $0, $0 # combine carries + addq $4, $0, $0 # cy_limb = prod_high + cy + ret $31, ($26), 1 +$Lend0b: + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $0, $5, $0 + ret $31, ($26), 1 + +$Large: + lda $30, -240($30) + stq $9, 8($30) + stq $10, 16($30) + stq $11, 24($30) + stq $12, 32($30) + stq $13, 40($30) + stq $14, 48($30) + stq $15, 56($30) + + and $18, 7, $20 # count for the first loop, 0-7 + srl $18, 3, $18 # count for unrolled loop + bis $31, $31, $0 + beq $20, $Lunroll + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + subq $20, 1, $20 # size-- + mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + umulh $2, $19, $0 # $0 = prod_high + beq $20, $Lend1b # jump if size was == 1 + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + subq $20, 1, $20 # size-- + addq $5, $3, $3 + cmpult $3, $5, $4 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + beq $20, $Lend1a # jump if size was == 2 + + .align 3 +$Loop1: mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + addq $4, $0, $0 # cy_limb = cy_limb + 'cy' + subq $20, 1, $20 # size-- + umulh $2, $19, $4 # $4 = cy_limb + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + addq $3, $0, $3 # $3 = cy_limb + prod_low + cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low) + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + addq $5, $0, $0 # combine carries + bne $20, $Loop1 + +$Lend1a: + mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + addq $4, $0, $0 # cy_limb = cy_limb + 'cy' + umulh $2, $19, $4 # $4 = cy_limb + addq $3, $0, $3 # $3 = cy_limb + prod_low + cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low) + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + addq $5, $0, $0 # combine carries + addq $4, $0, $0 # cy_limb = prod_high + cy + br $31, $Lunroll +$Lend1b: + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + addq $0, $5, $0 + +$Lunroll: + lda $17, -16($17) # L1 bookkeeping + lda $16, -16($16) # L1 bookkeeping + bis $0, $31, $12 + + # ____ UNROLLED LOOP SOFTWARE PIPELINE STARTUP ____ + + ldq $2, 16($17) # L1 + ldq $3, 24($17) # L1 + lda $18, -1($18) # L1 bookkeeping + ldq $6, 16($16) # L1 + ldq $7, 24($16) # L1 + ldq $0, 32($17) # L1 + mulq $19, $2, $13 # U1 + ldq $1, 40($17) # L1 + umulh $19, $2, $14 # U1 + mulq $19, $3, $15 # U1 + lda $17, 64($17) # L1 bookkeeping + ldq $4, 32($16) # L1 + ldq $5, 40($16) # L1 + umulh $19, $3, $8 # U1 + ldq $2, -16($17) # L1 + mulq $19, $0, $9 # U1 + ldq $3, -8($17) # L1 + umulh $19, $0, $10 # U1 + addq $6, $13, $6 # L0 lo + acc + mulq $19, $1, $11 # U1 + cmpult $6, $13, $20 # L0 lo add => carry + lda $16, 64($16) # L1 bookkeeping + addq $6, $12, $22 # U0 hi add => answer + cmpult $22, $12, $21 # L0 hi add => carry + addq $14, $20, $14 # U0 hi mul + carry + ldq $6, -16($16) # L1 + addq $7, $15, $23 # L0 lo + acc + addq $14, $21, $14 # U0 hi mul + carry + ldq $7, -8($16) # L1 + umulh $19, $1, $12 # U1 + cmpult $23, $15, $20 # L0 lo add => carry + addq $23, $14, $23 # U0 hi add => answer + ldq $0, 0($17) # L1 + mulq $19, $2, $13 # U1 + cmpult $23, $14, $21 # L0 hi add => carry + addq $8, $20, $8 # U0 hi mul + carry + ldq $1, 8($17) # L1 + umulh $19, $2, $14 # U1 + addq $4, $9, $4 # L0 lo + acc + stq $22, -48($16) # L0 + stq $23, -40($16) # L1 + mulq $19, $3, $15 # U1 + addq $8, $21, $8 # U0 hi mul + carry + cmpult $4, $9, $20 # L0 lo add => carry + addq $4, $8, $22 # U0 hi add => answer + ble $18, $Lend # U1 bookkeeping + + # ____ MAIN UNROLLED LOOP ____ + .align 4 +$Loop: + bis $31, $31, $31 # U1 mt + cmpult $22, $8, $21 # L0 hi add => carry + addq $10, $20, $10 # U0 hi mul + carry + ldq $4, 0($16) # L1 + + bis $31, $31, $31 # U1 mt + addq $5, $11, $23 # L0 lo + acc + addq $10, $21, $10 # L0 hi mul + carry + ldq $5, 8($16) # L1 + + umulh $19, $3, $8 # U1 + cmpult $23, $11, $20 # L0 lo add => carry + addq $23, $10, $23 # U0 hi add => answer + ldq $2, 16($17) # L1 + + mulq $19, $0, $9 # U1 + cmpult $23, $10, $21 # L0 hi add => carry + addq $12, $20, $12 # U0 hi mul + carry + ldq $3, 24($17) # L1 + + umulh $19, $0, $10 # U1 + addq $6, $13, $6 # L0 lo + acc + stq $22, -32($16) # L0 + stq $23, -24($16) # L1 + + bis $31, $31, $31 # L0 st slosh + mulq $19, $1, $11 # U1 + bis $31, $31, $31 # L1 st slosh + addq $12, $21, $12 # U0 hi mul + carry + + cmpult $6, $13, $20 # L0 lo add => carry + bis $31, $31, $31 # U1 mt + lda $18, -1($18) # L1 bookkeeping + addq $6, $12, $22 # U0 hi add => answer + + bis $31, $31, $31 # U1 mt + cmpult $22, $12, $21 # L0 hi add => carry + addq $14, $20, $14 # U0 hi mul + carry + ldq $6, 16($16) # L1 + + bis $31, $31, $31 # U1 mt + addq $7, $15, $23 # L0 lo + acc + addq $14, $21, $14 # U0 hi mul + carry + ldq $7, 24($16) # L1 + + umulh $19, $1, $12 # U1 + cmpult $23, $15, $20 # L0 lo add => carry + addq $23, $14, $23 # U0 hi add => answer + ldq $0, 32($17) # L1 + + mulq $19, $2, $13 # U1 + cmpult $23, $14, $21 # L0 hi add => carry + addq $8, $20, $8 # U0 hi mul + carry + ldq $1, 40($17) # L1 + + umulh $19, $2, $14 # U1 + addq $4, $9, $4 # U0 lo + acc + stq $22, -16($16) # L0 + stq $23, -8($16) # L1 + + bis $31, $31, $31 # L0 st slosh + mulq $19, $3, $15 # U1 + bis $31, $31, $31 # L1 st slosh + addq $8, $21, $8 # L0 hi mul + carry + + cmpult $4, $9, $20 # L0 lo add => carry + bis $31, $31, $31 # U1 mt + lda $17, 64($17) # L1 bookkeeping + addq $4, $8, $22 # U0 hi add => answer + + bis $31, $31, $31 # U1 mt + cmpult $22, $8, $21 # L0 hi add => carry + addq $10, $20, $10 # U0 hi mul + carry + ldq $4, 32($16) # L1 + + bis $31, $31, $31 # U1 mt + addq $5, $11, $23 # L0 lo + acc + addq $10, $21, $10 # L0 hi mul + carry + ldq $5, 40($16) # L1 + + umulh $19, $3, $8 # U1 + cmpult $23, $11, $20 # L0 lo add => carry + addq $23, $10, $23 # U0 hi add => answer + ldq $2, -16($17) # L1 + + mulq $19, $0, $9 # U1 + cmpult $23, $10, $21 # L0 hi add => carry + addq $12, $20, $12 # U0 hi mul + carry + ldq $3, -8($17) # L1 + + umulh $19, $0, $10 # U1 + addq $6, $13, $6 # L0 lo + acc + stq $22, 0($16) # L0 + stq $23, 8($16) # L1 + + bis $31, $31, $31 # L0 st slosh + mulq $19, $1, $11 # U1 + bis $31, $31, $31 # L1 st slosh + addq $12, $21, $12 # U0 hi mul + carry + + cmpult $6, $13, $20 # L0 lo add => carry + bis $31, $31, $31 # U1 mt + lda $16, 64($16) # L1 bookkeeping + addq $6, $12, $22 # U0 hi add => answer + + bis $31, $31, $31 # U1 mt + cmpult $22, $12, $21 # L0 hi add => carry + addq $14, $20, $14 # U0 hi mul + carry + ldq $6, -16($16) # L1 + + bis $31, $31, $31 # U1 mt + addq $7, $15, $23 # L0 lo + acc + addq $14, $21, $14 # U0 hi mul + carry + ldq $7, -8($16) # L1 + + umulh $19, $1, $12 # U1 + cmpult $23, $15, $20 # L0 lo add => carry + addq $23, $14, $23 # U0 hi add => answer + ldq $0, 0($17) # L1 + + mulq $19, $2, $13 # U1 + cmpult $23, $14, $21 # L0 hi add => carry + addq $8, $20, $8 # U0 hi mul + carry + ldq $1, 8($17) # L1 + + umulh $19, $2, $14 # U1 + addq $4, $9, $4 # L0 lo + acc + stq $22, -48($16) # L0 + stq $23, -40($16) # L1 + + bis $31, $31, $31 # L0 st slosh + mulq $19, $3, $15 # U1 + bis $31, $31, $31 # L1 st slosh + addq $8, $21, $8 # U0 hi mul + carry + + cmpult $4, $9, $20 # L0 lo add => carry + addq $4, $8, $22 # U0 hi add => answer + bis $31, $31, $31 # L1 mt + bgt $18, $Loop # U1 bookkeeping + +# ____ UNROLLED LOOP SOFTWARE PIPELINE FINISH ____ +$Lend: + cmpult $22, $8, $21 # L0 hi add => carry + addq $10, $20, $10 # U0 hi mul + carry + ldq $4, 0($16) # L1 + addq $5, $11, $23 # L0 lo + acc + addq $10, $21, $10 # L0 hi mul + carry + ldq $5, 8($16) # L1 + umulh $19, $3, $8 # U1 + cmpult $23, $11, $20 # L0 lo add => carry + addq $23, $10, $23 # U0 hi add => answer + mulq $19, $0, $9 # U1 + cmpult $23, $10, $21 # L0 hi add => carry + addq $12, $20, $12 # U0 hi mul + carry + umulh $19, $0, $10 # U1 + addq $6, $13, $6 # L0 lo + acc + stq $22, -32($16) # L0 + stq $23, -24($16) # L1 + mulq $19, $1, $11 # U1 + addq $12, $21, $12 # U0 hi mul + carry + cmpult $6, $13, $20 # L0 lo add => carry + addq $6, $12, $22 # U0 hi add => answer + cmpult $22, $12, $21 # L0 hi add => carry + addq $14, $20, $14 # U0 hi mul + carry + addq $7, $15, $23 # L0 lo + acc + addq $14, $21, $14 # U0 hi mul + carry + umulh $19, $1, $12 # U1 + cmpult $23, $15, $20 # L0 lo add => carry + addq $23, $14, $23 # U0 hi add => answer + cmpult $23, $14, $21 # L0 hi add => carry + addq $8, $20, $8 # U0 hi mul + carry + addq $4, $9, $4 # U0 lo + acc + stq $22, -16($16) # L0 + stq $23, -8($16) # L1 + bis $31, $31, $31 # L0 st slosh + addq $8, $21, $8 # L0 hi mul + carry + cmpult $4, $9, $20 # L0 lo add => carry + addq $4, $8, $22 # U0 hi add => answer + cmpult $22, $8, $21 # L0 hi add => carry + addq $10, $20, $10 # U0 hi mul + carry + addq $5, $11, $23 # L0 lo + acc + addq $10, $21, $10 # L0 hi mul + carry + cmpult $23, $11, $20 # L0 lo add => carry + addq $23, $10, $23 # U0 hi add => answer + cmpult $23, $10, $21 # L0 hi add => carry + addq $12, $20, $12 # U0 hi mul + carry + stq $22, 0($16) # L0 + stq $23, 8($16) # L1 + addq $12, $21, $0 # U0 hi mul + carry + + ldq $9, 8($30) + ldq $10, 16($30) + ldq $11, 24($30) + ldq $12, 32($30) + ldq $13, 40($30) + ldq $14, 48($30) + ldq $15, 56($30) + lda $30, 240($30) + ret $31, ($26), 1 + + .end __mpn_addmul_1 diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrt.S b/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrt.S new file mode 100644 index 0000000000..18d03ee9c9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrt.S @@ -0,0 +1,53 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <shlib-compat.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(__ieee754_sqrt) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + .align 4 +#ifdef _IEEE_FP_INEXACT + sqrtt/suid $f16, $f0 +#else + sqrtt/sud $f16, $f0 +#endif + ret + nop + nop + +END(__ieee754_sqrt) + +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +strong_alias(__ieee754_sqrt, __sqrt_finite1) +compat_symbol(libm, __sqrt_finite1, __sqrt_finite, GLIBC_2_15) +versioned_symbol(libm, __ieee754_sqrt, __sqrt_finite, GLIBC_2_18) +#else +strong_alias(__ieee754_sqrt, __sqrt_finite) +#endif diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S b/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S new file mode 100644 index 0000000000..c4ef9c32c6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S @@ -0,0 +1,53 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <shlib-compat.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(__ieee754_sqrtf) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + .align 4 +#ifdef _IEEE_FP_INEXACT + sqrts/suid $f16, $f0 +#else + sqrts/sud $f16, $f0 +#endif + ret + nop + nop + +END(__ieee754_sqrtf) + +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +strong_alias(__ieee754_sqrtf, __sqrtf_finite1) +compat_symbol(libm, __sqrtf_finite1, __sqrtf_finite, GLIBC_2_15) +versioned_symbol(libm, __ieee754_sqrtf, __sqrtf_finite, GLIBC_2_18) +#else +strong_alias(__ieee754_sqrtf, __sqrtf_finite) +#endif diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/memcpy.S b/REORG.TODO/sysdeps/alpha/alphaev6/memcpy.S new file mode 100644 index 0000000000..170a23b5da --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/memcpy.S @@ -0,0 +1,255 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * + * Temp usage notes: + * $0 - destination address + * $1,$2, - scratch + */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(memcpy) + .prologue 0 + + mov $16, $0 # E : copy dest to return + ble $18, $nomoredata # U : done with the copy? + xor $16, $17, $1 # E : are source and dest alignments the same? + and $1, 7, $1 # E : are they the same mod 8? + + bne $1, $misaligned # U : Nope - gotta do this the slow way + /* source and dest are same mod 8 address */ + and $16, 7, $1 # E : Are both 0mod8? + beq $1, $both_0mod8 # U : Yes + nop # E : + + /* + * source and dest are same misalignment. move a byte at a time + * until a 0mod8 alignment for both is reached. + * At least one byte more to move + */ + +$head_align: + ldbu $1, 0($17) # L : grab a byte + subq $18, 1, $18 # E : count-- + addq $17, 1, $17 # E : src++ + stb $1, 0($16) # L : + addq $16, 1, $16 # E : dest++ + and $16, 7, $1 # E : Are we at 0mod8 yet? + ble $18, $nomoredata # U : done with the copy? + bne $1, $head_align # U : + +$both_0mod8: + cmple $18, 127, $1 # E : Can we unroll the loop? + bne $1, $no_unroll # U : + and $16, 63, $1 # E : get mod64 alignment + beq $1, $do_unroll # U : no single quads to fiddle + +$single_head_quad: + ldq $1, 0($17) # L : get 8 bytes + subq $18, 8, $18 # E : count -= 8 + addq $17, 8, $17 # E : src += 8 + nop # E : + + stq $1, 0($16) # L : store + addq $16, 8, $16 # E : dest += 8 + and $16, 63, $1 # E : get mod64 alignment + bne $1, $single_head_quad # U : still not fully aligned + +$do_unroll: + addq $16, 64, $7 # E : Initial (+1 trip) wh64 address + cmple $18, 127, $1 # E : Can we go through the unrolled loop? + bne $1, $tail_quads # U : Nope + nop # E : + +$unroll_body: + wh64 ($7) # L1 : memory subsystem hint: 64 bytes at + # ($7) are about to be over-written + ldq $6, 0($17) # L0 : bytes 0..7 + nop # E : + nop # E : + + ldq $4, 8($17) # L : bytes 8..15 + ldq $5, 16($17) # L : bytes 16..23 + addq $7, 64, $7 # E : Update next wh64 address + nop # E : + + ldq $3, 24($17) # L : bytes 24..31 + addq $16, 64, $1 # E : fallback value for wh64 + nop # E : + nop # E : + + addq $17, 32, $17 # E : src += 32 bytes + stq $6, 0($16) # L : bytes 0..7 + nop # E : + nop # E : + + stq $4, 8($16) # L : bytes 8..15 + stq $5, 16($16) # L : bytes 16..23 + subq $18, 192, $2 # E : At least two more trips to go? + nop # E : + + stq $3, 24($16) # L : bytes 24..31 + addq $16, 32, $16 # E : dest += 32 bytes + nop # E : + nop # E : + + ldq $6, 0($17) # L : bytes 0..7 + ldq $4, 8($17) # L : bytes 8..15 + cmovlt $2, $1, $7 # E : Latency 2, extra map slot - Use + # fallback wh64 address if < 2 more trips + nop # E : + + ldq $5, 16($17) # L : bytes 16..23 + ldq $3, 24($17) # L : bytes 24..31 + addq $16, 32, $16 # E : dest += 32 + subq $18, 64, $18 # E : count -= 64 + + addq $17, 32, $17 # E : src += 32 + stq $6, -32($16) # L : bytes 0..7 + stq $4, -24($16) # L : bytes 8..15 + cmple $18, 63, $1 # E : At least one more trip? + + stq $5, -16($16) # L : bytes 16..23 + stq $3, -8($16) # L : bytes 24..31 + nop # E : + beq $1, $unroll_body + +$tail_quads: +$no_unroll: + .align 4 + subq $18, 8, $18 # E : At least a quad left? + blt $18, $less_than_8 # U : Nope + nop # E : + nop # E : + +$move_a_quad: + ldq $1, 0($17) # L : fetch 8 + subq $18, 8, $18 # E : count -= 8 + addq $17, 8, $17 # E : src += 8 + nop # E : + + stq $1, 0($16) # L : store 8 + addq $16, 8, $16 # E : dest += 8 + bge $18, $move_a_quad # U : + nop # E : + +$less_than_8: + .align 4 + addq $18, 8, $18 # E : add back for trailing bytes + ble $18, $nomoredata # U : All-done + nop # E : + nop # E : + + /* Trailing bytes */ +$tail_bytes: + subq $18, 1, $18 # E : count-- + ldbu $1, 0($17) # L : fetch a byte + addq $17, 1, $17 # E : src++ + nop # E : + + stb $1, 0($16) # L : store a byte + addq $16, 1, $16 # E : dest++ + bgt $18, $tail_bytes # U : more to be done? + nop # E : + + /* branching to exit takes 3 extra cycles, so replicate exit here */ + ret $31, ($26), 1 # L0 : + nop # E : + nop # E : + nop # E : + +$misaligned: + mov $0, $4 # E : dest temp + and $0, 7, $1 # E : dest alignment mod8 + beq $1, $dest_0mod8 # U : life doesnt totally suck + nop + +$aligndest: + ble $18, $nomoredata # U : + ldbu $1, 0($17) # L : fetch a byte + subq $18, 1, $18 # E : count-- + addq $17, 1, $17 # E : src++ + + stb $1, 0($4) # L : store it + addq $4, 1, $4 # E : dest++ + and $4, 7, $1 # E : dest 0mod8 yet? + bne $1, $aligndest # U : go until we are aligned. + + /* Source has unknown alignment, but dest is known to be 0mod8 */ +$dest_0mod8: + subq $18, 8, $18 # E : At least a quad left? + blt $18, $misalign_tail # U : Nope + ldq_u $3, 0($17) # L : seed (rotating load) of 8 bytes + nop # E : + +$mis_quad: + ldq_u $16, 8($17) # L : Fetch next 8 + extql $3, $17, $3 # U : masking + extqh $16, $17, $1 # U : masking + bis $3, $1, $1 # E : merged bytes to store + + subq $18, 8, $18 # E : count -= 8 + addq $17, 8, $17 # E : src += 8 + stq $1, 0($4) # L : store 8 (aligned) + mov $16, $3 # E : "rotate" source data + + addq $4, 8, $4 # E : dest += 8 + bge $18, $mis_quad # U : More quads to move + nop + nop + +$misalign_tail: + addq $18, 8, $18 # E : account for tail stuff + ble $18, $nomoredata # U : + nop + nop + +$misalign_byte: + ldbu $1, 0($17) # L : fetch 1 + subq $18, 1, $18 # E : count-- + addq $17, 1, $17 # E : src++ + nop # E : + + stb $1, 0($4) # L : store + addq $4, 1, $4 # E : dest++ + bgt $18, $misalign_byte # U : more to go? + nop + + +$nomoredata: + ret $31, ($26), 1 # L0 : + nop # E : + nop # E : + nop # E : + +END(memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/memset.S b/REORG.TODO/sysdeps/alpha/alphaev6/memset.S new file mode 100644 index 0000000000..185821c7eb --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/memset.S @@ -0,0 +1,223 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .arch ev6 + .set noat + .set noreorder + +ENTRY(memset) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + /* + * Serious stalling happens. The only way to mitigate this is to + * undertake a major re-write to interleave the constant materialization + * with other parts of the fall-through code. This is important, even + * though it makes maintenance tougher. + * Do this later. + */ + and $17, 255, $1 # E : 00000000000000ch + insbl $17, 1, $2 # U : 000000000000ch00 + mov $16, $0 # E : return value + ble $18, $end # U : zero length requested? + + addq $18, $16, $6 # E : max address to write to + or $1, $2, $17 # E : 000000000000chch + insbl $1, 2, $3 # U : 0000000000ch0000 + insbl $1, 3, $4 # U : 00000000ch000000 + + or $3, $4, $3 # E : 00000000chch0000 + inswl $17, 4, $5 # U : 0000chch00000000 + xor $16, $6, $1 # E : will complete write be within one quadword? + inswl $17, 6, $2 # U : chch000000000000 + + or $17, $3, $17 # E : 00000000chchchch + or $2, $5, $2 # E : chchchch00000000 + bic $1, 7, $1 # E : fit within a single quadword? + and $16, 7, $3 # E : Target addr misalignment + + or $17, $2, $17 # E : chchchchchchchch + beq $1, $within_quad # U : + nop # E : + beq $3, $aligned # U : target is 0mod8 + + /* + * Target address is misaligned, and won't fit within a quadword. + */ + ldq_u $4, 0($16) # L : Fetch first partial + mov $16, $5 # E : Save the address + insql $17, $16, $2 # U : Insert new bytes + subq $3, 8, $3 # E : Invert (for addressing uses) + + addq $18, $3, $18 # E : $18 is new count ($3 is negative) + mskql $4, $16, $4 # U : clear relevant parts of the quad + subq $16, $3, $16 # E : $16 is new aligned destination + or $2, $4, $1 # E : Final bytes + + nop + stq_u $1,0($5) # L : Store result + nop + nop + + .align 4 +$aligned: + /* + * We are now guaranteed to be quad aligned, with at least + * one partial quad to write. + */ + + sra $18, 3, $3 # U : Number of remaining quads to write + and $18, 7, $18 # E : Number of trailing bytes to write + mov $16, $5 # E : Save dest address + beq $3, $no_quad # U : tail stuff only + + /* + * It's worth the effort to unroll this and use wh64 if possible. + * At this point, entry values are: + * $16 Current destination address + * $5 A copy of $16 + * $6 The max quadword address to write to + * $18 Number trailer bytes + * $3 Number quads to write + */ + + and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop) + subq $3, 16, $4 # E : Only try to unroll if > 128 bytes + subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64) + blt $4, $loop # U : + + /* + * We know we've got at least 16 quads, minimum of one trip + * through unrolled loop. Do a quad at a time to get us 0mod64 + * aligned. + */ + + nop # E : + nop # E : + nop # E : + beq $1, $bigalign # U : + +$alignmod64: + stq $17, 0($5) # L : + subq $3, 1, $3 # E : For consistency later + addq $1, 8, $1 # E : Increment towards zero for alignment + addq $5, 8, $4 # E : Initial wh64 address (filler instruction) + + nop + nop + addq $5, 8, $5 # E : Inc address + blt $1, $alignmod64 # U : + +$bigalign: + /* + * $3 - number quads left to go + * $5 - target address (aligned 0mod64) + * $17 - mask of stuff to store + * Scratch registers available: $7, $2, $4, $1 + * We know that we'll be taking a minimum of one trip through. + * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle + * Assumes the wh64 needs to be for 2 trips through the loop in the future. + * The wh64 is issued on for the starting destination address for trip +2 + * through the loop, and if there are less than two trips left, the target + * address will be for the current trip. + */ + +$do_wh64: + wh64 ($4) # L1 : memory subsystem write hint + subq $3, 24, $2 # E : For determining future wh64 addresses + stq $17, 0($5) # L : + nop # E : + + addq $5, 128, $4 # E : speculative target of next wh64 + stq $17, 8($5) # L : + stq $17, 16($5) # L : + addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr) + + stq $17, 24($5) # L : + stq $17, 32($5) # L : + cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle + nop + + stq $17, 40($5) # L : + stq $17, 48($5) # L : + subq $3, 16, $2 # E : Repeat the loop at least once more? + nop + + stq $17, 56($5) # L : + addq $5, 64, $5 # E : + subq $3, 8, $3 # E : + bge $2, $do_wh64 # U : + + nop + nop + nop + beq $3, $no_quad # U : Might have finished already + + .align 4 + /* + * Simple loop for trailing quadwords, or for small amounts + * of data (where we can't use an unrolled loop and wh64) + */ +$loop: + stq $17, 0($5) # L : + subq $3, 1, $3 # E : Decrement number quads left + addq $5, 8, $5 # E : Inc address + bne $3, $loop # U : more? + +$no_quad: + /* + * Write 0..7 trailing bytes. + */ + nop # E : + beq $18, $end # U : All done? + ldq $7, 0($5) # L : + mskqh $7, $6, $2 # U : Mask final quad + + insqh $17, $6, $4 # U : New bits + or $2, $4, $1 # E : Put it all together + stq $1, 0($5) # L : And back to memory + ret $31,($26),1 # L0 : + +$within_quad: + ldq_u $1, 0($16) # L : + insql $17, $16, $2 # U : New bits + mskql $1, $16, $4 # U : Clear old + or $2, $4, $2 # E : New result + + mskql $2, $6, $4 # U : + mskqh $1, $6, $2 # U : + or $2, $4, $1 # E : + stq_u $1, 0($16) # L : + +$end: + nop + nop + nop + ret $31,($26),1 # L0 : + + END(memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/stxcpy.S b/REORG.TODO/sysdeps/alpha/alphaev6/stxcpy.S new file mode 100644 index 0000000000..84f19581d1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/stxcpy.S @@ -0,0 +1,314 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy a null-terminated string from SRC to DST. + + This is an internal routine used by strcpy, stpcpy, and strcat. + As such, it uses special linkage conventions to make implementation + of these public functions more efficient. + + On input: + t9 = return address + a0 = DST + a1 = SRC + + On output: + t8 = bitmask (with one bit set) indicating the last byte written + a0 = unaligned address of the last *word* written + + Furthermore, v0, a3-a5, t11, and t12 are untouched. +*/ + + +#include <sysdep.h> + + .arch ev6 + .set noat + .set noreorder + + .text + .type __stxcpy, @function + .globl __stxcpy + .usepv __stxcpy, no + + cfi_startproc + cfi_return_column (t9) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + .align 4 +stxcpy_aligned: + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # E : build a mask against false zero + mskqh t2, a1, t2 # U : detection in the src word (stall) + mskqh t1, a1, t3 # U : + ornot t1, t2, t2 # E : (stall) + + mskql t0, a1, t0 # U : assemble the first output word + cmpbge zero, t2, t10 # E : bits set iff null found + or t0, t3, t1 # E : (stall) + bne t10, $a_eos # U : (stall) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == a source word not containing a null. */ + /* Nops here to separate store quads from load quads */ + +$a_loop: + stq_u t1, 0(a0) # L : + addq a0, 8, a0 # E : + nop + nop + + ldq_u t1, 0(a1) # L : Latency=3 + addq a1, 8, a1 # E : + cmpbge zero, t1, t10 # E : (3 cycle stall) + beq t10, $a_loop # U : (stall for t10) + + /* Take care of the final (partial) word store. + On entry to this basic block we have: + t1 == the source word containing the null + t10 == the cmpbge mask that found it. */ +$a_eos: + negq t10, t6 # E : find low bit set + and t10, t6, t8 # E : (stall) + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t8, 0x80, t6 # E : (stall) + bne t6, 1f # U : (stall) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t0, 0(a0) # L : Latency=3 + subq t8, 1, t6 # E : + zapnot t1, t6, t1 # U : clear src bytes >= null (stall) + or t8, t6, t10 # E : (stall) + + zap t0, t10, t0 # E : clear dst bytes <= null + or t0, t1, t1 # E : (stall) + nop + nop + +1: stq_u t1, 0(a0) # L : + ret (t9) # L0 : Latency=3 + nop + nop + + .align 4 +__stxcpy: + /* Are source and destination co-aligned? */ + xor a0, a1, t0 # E : + unop # E : + and t0, 7, t0 # E : (stall) + bne t0, $unaligned # U : (stall) + + /* We are co-aligned; take care of a partial first word. */ + ldq_u t1, 0(a1) # L : load first src word + and a0, 7, t0 # E : take care not to load a word ... + addq a1, 8, a1 # E : + beq t0, stxcpy_aligned # U : ... if we wont need it (stall) + + ldq_u t0, 0(a0) # L : + br stxcpy_aligned # L0 : Latency=3 + nop + nop + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 4 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, for masking back in, if needed else 0 + t1 == the low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # L : + addq a1, 8, a1 # E : + extql t1, a1, t1 # U : (stall on a1) + extqh t2, a1, t4 # U : (stall on a1) + + mskql t0, a0, t0 # U : + or t1, t4, t1 # E : + mskqh t1, a0, t1 # U : (stall on t1) + or t0, t1, t1 # E : (stall on t1) + + or t1, t6, t6 # E : + cmpbge zero, t6, t10 # E : (stall) + lda t6, -1 # E : for masking just below + bne t10, $u_final # U : (stall) + + mskql t6, a1, t6 # U : mask out the bits we have + or t6, t2, t2 # E : already extracted before (stall) + cmpbge zero, t2, t10 # E : testing eos (stall) + bne t10, $u_late_head_exit # U : (stall) + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + stq_u t1, 0(a0) # L : store first output word + addq a0, 8, a0 # E : + extql t2, a1, t0 # U : position ho-bits of lo word + ldq_u t2, 8(a1) # U : read next high-order source word + + addq a1, 8, a1 # E : + cmpbge zero, t2, t10 # E : (stall for t2) + nop # E : + bne t10, $u_eos # U : (stall) + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + extqh t2, a1, t1 # U : extract high bits for current word + addq a1, 8, a1 # E : (stall) + extql t2, a1, t3 # U : extract low bits for next time (stall) + addq a0, 8, a0 # E : + + or t0, t1, t1 # E : current dst word now complete + ldq_u t2, 0(a1) # L : Latency=3 load high word for next time + stq_u t1, -8(a0) # L : save the current word (stall) + mov t3, t0 # E : + + cmpbge zero, t2, t10 # E : test new word for eos + beq t10, $u_loop # U : (stall) + nop + nop + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + extqh t2, a1, t1 # U : + or t0, t1, t1 # E : first (partial) source word complete (stall) + cmpbge zero, t1, t10 # E : is the null in this first bit? (stall) + bne t10, $u_final # U : (stall) + +$u_late_head_exit: + stq_u t1, 0(a0) # L : the null was in the high-order bits + addq a0, 8, a0 # E : + extql t2, a1, t1 # U : + cmpbge zero, t1, t10 # E : (stall) + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t1 == assembled source word + t10 == cmpbge mask that found the null. */ +$u_final: + negq t10, t6 # E : isolate low bit set + and t6, t10, t8 # E : (stall) + and t8, 0x80, t6 # E : avoid dest word load if we can (stall) + bne t6, 1f # U : (stall) + + ldq_u t0, 0(a0) # E : + subq t8, 1, t6 # E : + or t6, t8, t10 # E : (stall) + zapnot t1, t6, t1 # U : kill source bytes >= null (stall) + + zap t0, t10, t0 # U : kill dest bytes <= null (2 cycle data stall) + or t0, t1, t1 # E : (stall) + nop + nop + +1: stq_u t1, 0(a0) # L : + ret (t9) # L0 : Latency=3 + nop + nop + + /* Unaligned copy entry point. */ + .align 4 +$unaligned: + + ldq_u t1, 0(a1) # L : load first source word + and a0, 7, t4 # E : find dest misalignment + and a1, 7, t5 # E : find src misalignment + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + mov zero, t0 # E : + + mov zero, t6 # E : + beq t4, 1f # U : + ldq_u t0, 0(a0) # L : + lda t6, -1 # E : + + mskql t6, a0, t6 # U : + nop + nop + nop +1: + subq a1, t4, a1 # E : sub dest misalignment from src addr + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + cmplt t4, t5, t8 # E : + beq t8, $u_head # U : + lda t2, -1 # E : mask out leading garbage in source + + mskqh t2, t5, t2 # U : + ornot t1, t2, t3 # E : (stall) + cmpbge zero, t3, t10 # E : is there a zero? (stall) + beq t10, $u_head # U : (stall) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # L : + negq t10, t6 # E : build bitmask of bytes <= zero + and t6, t10, t8 # E : (stall) + and a1, 7, t5 # E : + + subq t8, 1, t6 # E : + or t6, t8, t10 # E : (stall) + srl t8, t5, t8 # U : adjust final null return value + zapnot t2, t10, t2 # U : prepare source word; mirror changes (stall) + + and t1, t2, t1 # E : to source validity mask + extql t2, a1, t2 # U : + extql t1, a1, t1 # U : (stall) + andnot t0, t2, t0 # .. e1 : zero place for source to reside (stall) + + or t0, t1, t1 # e1 : and put it there + stq_u t1, 0(a0) # .. e0 : (stall) + ret (t9) # e1 : + + cfi_endproc diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/stxncpy.S b/REORG.TODO/sysdeps/alpha/alphaev6/stxncpy.S new file mode 100644 index 0000000000..ad094cc1df --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/stxncpy.S @@ -0,0 +1,392 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than COUNT bytes of the null-terminated string from + SRC to DST. + + This is an internal routine used by strncpy, stpncpy, and strncat. + As such, it uses special linkage conventions to make implementation + of these public functions more efficient. + + On input: + t9 = return address + a0 = DST + a1 = SRC + a2 = COUNT + + Furthermore, COUNT may not be zero. + + On output: + t0 = last word written + t8 = bitmask (with one bit set) indicating the last byte written + t10 = bitmask (with one bit set) indicating the byte position of + the end of the range specified by COUNT + a0 = unaligned address of the last *word* written + a2 = the number of full words left in COUNT + + Furthermore, v0, a3-a5, t11, and t12 are untouched. +*/ + +#include <sysdep.h> + + .arch ev6 + .set noat + .set noreorder + + .text + .type __stxncpy, @function + .globl __stxncpy + .usepv __stxncpy, no + + cfi_startproc + cfi_return_column (t9) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + .align 4 +stxncpy_aligned: + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # E : build a mask against false zero + mskqh t2, a1, t2 # U : detection in the src word (stall) + mskqh t1, a1, t3 # U : + ornot t1, t2, t2 # E : (stall) + + mskql t0, a1, t0 # U : assemble the first output word + cmpbge zero, t2, t7 # E : bits set iff null found + or t0, t3, t0 # E : (stall) + beq a2, $a_eoc # U : + + bne t7, $a_eos # U : + nop + nop + nop + + /* On entry to this basic block: + t0 == a source word not containing a null. */ + + /* + * nops here to: + * separate store quads from load quads + * limit of 1 bcond/quad to permit training + */ +$a_loop: + stq_u t0, 0(a0) # L : + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + nop + + ldq_u t0, 0(a1) # L : + addq a1, 8, a1 # E : + cmpbge zero, t0, t7 # E : + beq a2, $a_eoc # U : + + beq t7, $a_loop # U : + nop + nop + nop + + /* Take care of the final (partial) word store. At this point + the end-of-count bit is set in t7 iff it applies. + + On entry to this basic block we have: + t0 == the source word containing the null + t7 == the cmpbge mask that found it. */ +$a_eos: + negq t7, t8 # E : find low bit set + and t7, t8, t8 # E : (stall) + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t8, 0x80, t6 # E : (stall) + bne t6, 1f # U : (stall) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t1, 0(a0) # L : + subq t8, 1, t6 # E : + or t8, t6, t7 # E : (stall) + zapnot t0, t7, t0 # U : clear src bytes > null (stall) + + zap t1, t7, t1 # .. e1 : clear dst bytes <= null + or t0, t1, t0 # e1 : (stall) + nop + nop + +1: stq_u t0, 0(a0) # L : + ret (t9) # L0 : Latency=3 + nop + nop + + /* Add the end-of-count bit to the eos detection bitmask. */ +$a_eoc: + or t10, t7, t7 # E : + br $a_eos # L0 : Latency=3 + nop + nop + + .align 4 +__stxncpy: + /* Are source and destination co-aligned? */ + lda t2, -1 # E : + xor a0, a1, t1 # E : + and a0, 7, t0 # E : find dest misalignment + nop # E : + + srl t2, 1, t2 # U : + and t1, 7, t1 # E : + cmovlt a2, t2, a2 # E : bound count to LONG_MAX (stall) + nop # E : + + addq a2, t0, a2 # E : bias count by dest misalignment + subq a2, 1, a2 # E : (stall) + and a2, 7, t2 # E : (stall) + lda t10, 1 # E : + + srl a2, 3, a2 # U : a2 = loop counter = (count - 1)/8 + sll t10, t2, t10 # U : t10 = bitmask of last count byte + nop # E : + bne t1, $unaligned # U : (stall) + + /* We are co-aligned; take care of a partial first word. */ + ldq_u t1, 0(a1) # L : load first src word + addq a1, 8, a1 # E : + beq t0, stxncpy_aligned # U : avoid loading dest word if not needed + ldq_u t0, 0(a0) # L : + + br stxncpy_aligned # U : + nop + nop + nop + + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 4 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, unmasked + t1 == the shifted low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # L : Latency=3 load second src word + addq a1, 8, a1 # E : + mskql t0, a0, t0 # U : mask trailing garbage in dst + extqh t2, a1, t4 # U : (3 cycle stall on t2) + + or t1, t4, t1 # E : first aligned src word complete (stall) + mskqh t1, a0, t1 # U : mask leading garbage in src (stall) + or t0, t1, t0 # E : first output word complete (stall) + or t0, t6, t6 # E : mask original data for zero test (stall) + + cmpbge zero, t6, t7 # E : + beq a2, $u_eocfin # U : + lda t6, -1 # E : + nop + + bne t7, $u_final # U : + mskql t6, a1, t6 # U : mask out bits already seen + stq_u t0, 0(a0) # L : store first output word + or t6, t2, t2 # E : + + cmpbge zero, t2, t7 # E : find nulls in second partial + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + bne t7, $u_late_head_exit # U : + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + extql t2, a1, t1 # U : position hi-bits of lo word + beq a2, $u_eoc # U : + ldq_u t2, 8(a1) # L : read next high-order source word + addq a1, 8, a1 # E : + + extqh t2, a1, t0 # U : position lo-bits of hi word (stall) + cmpbge zero, t2, t7 # E : + nop + bne t7, $u_eos # U : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 4 +$u_loop: + or t0, t1, t0 # E : current dst word now complete + subq a2, 1, a2 # E : decrement word count + extql t2, a1, t1 # U : extract high bits for next time + addq a0, 8, a0 # E : + + stq_u t0, -8(a0) # L : save the current word + beq a2, $u_eoc # U : + ldq_u t2, 8(a1) # L : Latency=3 load high word for next time + addq a1, 8, a1 # E : + + extqh t2, a1, t0 # U : extract low bits (2 cycle stall) + cmpbge zero, t2, t7 # E : test new word for eos + nop + beq t7, $u_loop # U : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + or t0, t1, t0 # E : first (partial) source word complete + nop + cmpbge zero, t0, t7 # E : is the null in this first bit? (stall) + bne t7, $u_final # U : (stall) + + stq_u t0, 0(a0) # L : the null was in the high-order bits + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + nop + +$u_late_head_exit: + extql t2, a1, t0 # U : + cmpbge zero, t0, t7 # E : + or t7, t10, t6 # E : (stall) + cmoveq a2, t6, t7 # E : Latency=2, extra map slot (stall) + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t0 == assembled source word + t7 == cmpbge mask that found the null. */ +$u_final: + negq t7, t6 # E : isolate low bit set + and t6, t7, t8 # E : (stall) + and t8, 0x80, t6 # E : avoid dest word load if we can (stall) + bne t6, 1f # U : (stall) + + ldq_u t1, 0(a0) # L : + subq t8, 1, t6 # E : + or t6, t8, t7 # E : (stall) + zapnot t0, t7, t0 # U : kill source bytes > null + + zap t1, t7, t1 # U : kill dest bytes <= null + or t0, t1, t0 # E : (stall) + nop + nop + +1: stq_u t0, 0(a0) # L : + ret (t9) # L0 : Latency=3 + + /* Got to end-of-count before end of string. + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word */ +$u_eoc: + and a1, 7, t6 # E : + sll t10, t6, t6 # U : (stall) + and t6, 0xff, t6 # E : (stall) + bne t6, 1f # U : (stall) + + ldq_u t2, 8(a1) # L : load final src word + nop + extqh t2, a1, t0 # U : extract low bits for last word (stall) + or t1, t0, t1 # E : (stall) + +1: cmpbge zero, t1, t7 # E : + mov t1, t0 + +$u_eocfin: # end-of-count, final word + or t10, t7, t7 # E : + br $u_final # L0 : Latency=3 + + /* Unaligned copy entry point. */ + .align 4 +$unaligned: + + ldq_u t1, 0(a1) # L : load first source word + and a0, 7, t4 # E : find dest misalignment + and a1, 7, t5 # E : find src misalignment + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + mov zero, t0 # E : + + mov zero, t6 # E : + beq t4, 1f # U : + ldq_u t0, 0(a0) # L : + lda t6, -1 # E : + + mskql t6, a0, t6 # U : + nop + nop +1: subq a1, t4, a1 # E : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t8 # E : + extql t1, a1, t1 # U : shift src into place + lda t2, -1 # E : for creating masks later + beq t8, $u_head # U : (stall) + + mskqh t2, t5, t2 # U : begin src byte validity mask + cmpbge zero, t1, t7 # E : is there a zero? + extql t2, a1, t2 # U : + or t7, t10, t5 # E : test for end-of-count too + + cmpbge zero, t2, t3 # E : + cmoveq a2, t5, t7 # E : Latency=2, extra map slot + nop # E : keep with cmoveq + andnot t7, t3, t7 # E : (stall) + + beq t7, $u_head # U : + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + ldq_u t0, 0(a0) # L : + negq t7, t6 # E : build bitmask of bytes <= zero + mskqh t1, t4, t1 # U : + + and t6, t7, t8 # E : + subq t8, 1, t6 # E : (stall) + or t6, t8, t7 # E : (stall) + zapnot t2, t7, t2 # U : prepare source word; mirror changes (stall) + + zapnot t1, t7, t1 # U : to source validity mask + andnot t0, t2, t0 # E : zero place for source to reside + or t0, t1, t0 # E : and put it there (stall both t0, t1) + stq_u t0, 0(a0) # L : (stall) + + ret (t9) # L0 : Latency=3 + + cfi_endproc diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/Implies b/REORG.TODO/sysdeps/alpha/alphaev67/Implies new file mode 100644 index 0000000000..49d19c4ad8 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/Implies @@ -0,0 +1 @@ +alpha/alphaev6 diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/ffs.S b/REORG.TODO/sysdeps/alpha/alphaev67/ffs.S new file mode 100644 index 0000000000..6715ae9234 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/ffs.S @@ -0,0 +1,51 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds the first bit set in an integer. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + + +ENTRY(__ffs) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + zap $16, 0xF0, $16 + cttz $16, $0 + addq $0, 1, $0 + cmoveq $16, 0, $0 + + nop + nop + nop + ret + +END(__ffs) + +weak_alias (__ffs, ffs) +libc_hidden_def (__ffs) +libc_hidden_builtin_def (ffs) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/ffsll.S b/REORG.TODO/sysdeps/alpha/alphaev67/ffsll.S new file mode 100644 index 0000000000..b469bba063 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/ffsll.S @@ -0,0 +1,44 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds the first bit set in a long. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(ffsl) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + cttz $16, $0 + addq $0, 1, $0 + cmoveq $16, 0, $0 + ret + +END(ffsl) + +weak_extern (ffsl) +weak_alias (ffsl, ffsll) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/fpu/Implies b/REORG.TODO/sysdeps/alpha/alphaev67/fpu/Implies new file mode 100644 index 0000000000..9e3f12d0ac --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/fpu/Implies @@ -0,0 +1 @@ +alpha/alphaev6/fpu diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/rawmemchr.S b/REORG.TODO/sysdeps/alpha/alphaev67/rawmemchr.S new file mode 100644 index 0000000000..cddfe2a9f4 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/rawmemchr.S @@ -0,0 +1,92 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return pointer to first occurrence of CH in STR. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(__rawmemchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u t0, 0(a0) # L : load first quadword Latency=3 + and a1, 0xff, t3 # E : 00000000000000ch + insbl a1, 1, t5 # U : 000000000000ch00 + insbl a1, 7, a2 # U : ch00000000000000 + + insbl t3, 6, a3 # U : 00ch000000000000 + or t5, t3, a1 # E : 000000000000chch + andnot a0, 7, v0 # E : align our loop pointer + lda t4, -1 # E : build garbage mask + + mskqh t4, a0, t4 # U : only want relevant part of first quad + or a2, a3, a2 # E : chch000000000000 + inswl a1, 2, t5 # E : 00000000chch0000 + inswl a1, 4, a3 # E : 0000chch00000000 + + or a1, a2, a1 # E : chch00000000chch + or a3, t5, t5 # E : 0000chchchch0000 + cmpbge zero, t4, t4 # E : bits set iff byte is garbage + nop # E : + + /* This quad is _very_ serialized. Lots of stalling happens */ + or t5, a1, a1 # E : chchchchchchchch + xor t0, a1, t1 # E : make bytes == c zero + cmpbge zero, t1, t0 # E : bits set iff byte == c + andnot t0, t4, t0 # E : clear garbage bits + + cttz t0, a2 # U0 : speculative (in case we get a match) + nop # E : + nop # E : + bne t0, $found # U : + + /* + * Yuk. This loop is going to stall like crazy waiting for the + * data to be loaded. Not much can be done about it unless it's + * unrolled multiple times, which is generally unsafe. + */ +$loop: + ldq t0, 8(v0) # L : Latency=3 + addq v0, 8, v0 # E : + xor t0, a1, t1 # E : + cmpbge zero, t1, t0 # E : bits set iff byte == c + + cttz t0, a2 # U0 : speculative (in case we get a match) + nop # E : + nop # E : + beq t0, $loop # U : + +$found: + negq t0, t1 # E : clear all but least set bit + and t0, t1, t0 # E : + addq v0, a2, v0 # E : Add in the bit number from above + ret # L0 : + + END(__rawmemchr) + +libc_hidden_def (__rawmemchr) +weak_alias (__rawmemchr, rawmemchr) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/stpcpy.S b/REORG.TODO/sysdeps/alpha/alphaev67/stpcpy.S new file mode 100644 index 0000000000..1f277779e0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/stpcpy.S @@ -0,0 +1,53 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@redhat.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy SRC to DEST returning the address of the terminating 0 in DEST. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + .text + +ENTRY(__stpcpy) + ldgp gp, 0(pv) +#ifdef PROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + .prologue 1 + + .align 4 + mov a0, v0 + nop + jsr t9, __stxcpy + + # t8 = bitmask (with one bit set) indicating the last byte written + # a0 = unaligned address of the last *word* written + + cttz t8, t8 + andnot a0, 7, a0 + addq a0, t8, v0 + ret + + END(__stpcpy) + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/stpncpy.S b/REORG.TODO/sysdeps/alpha/alphaev67/stpncpy.S new file mode 100644 index 0000000000..1efc86cdb2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/stpncpy.S @@ -0,0 +1,115 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@redhat.com) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than N bytes from SRC to DEST, returning the address of + the terminating '\0' in DEST. */ + +#include <sysdep.h> + + .arch ev6 + .set noat + .set noreorder + .text + +ENTRY(__stpncpy) + ldgp gp, 0(pv) +#ifdef PROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + .prologue 1 + + mov a0, v0 + beq a2, $zerocount + + .align 4 + nop + nop + jsr t9, __stxncpy # do the work of the copy + + cttz t8, t4 + zapnot t0, t8, t5 + andnot a0, 7, a0 + bne a2, $multiword # do we have full words left? + + subq t8, 1, t2 + subq t10, 1, t3 + cmpult zero, t5, t5 + addq a0, t4, v0 + + or t2, t8, t2 + or t3, t10, t3 + addq v0, t5, v0 + andnot t3, t2, t3 + + zap t0, t3, t0 + nop + stq t0, 0(a0) + ret + +$multiword: + subq t8, 1, t7 # clear the final bits in the prev word + cmpult zero, t5, t5 + or t7, t8, t7 + zapnot t0, t7, t0 + + subq a2, 1, a2 + stq t0, 0(a0) + addq a0, 8, a1 + beq a2, 1f # loop over full words remaining + + nop + nop + nop + blbc a2, 0f + + stq zero, 0(a1) + subq a2, 1, a2 + addq a1, 8, a1 + beq a2, 1f + +0: stq zero, 0(a1) + subq a2, 2, a2 + nop + nop + + stq zero, 8(a1) + addq a1, 16, a1 + nop + bne a2, 0b + +1: ldq t0, 0(a1) # clear the leading bits in the final word + subq t10, 1, t7 + addq a0, t4, v0 + nop + + or t7, t10, t7 + addq v0, t5, v0 + zap t0, t7, t0 + stq t0, 0(a1) + +$zerocount: + nop + nop + nop + ret + + END(__stpncpy) + +libc_hidden_def (__stpncpy) +weak_alias (__stpncpy, stpncpy) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strcat.S b/REORG.TODO/sysdeps/alpha/alphaev67/strcat.S new file mode 100644 index 0000000000..1d70061479 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strcat.S @@ -0,0 +1,61 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Append a null-terminated string from SRC to DST. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .text + +ENTRY(strcat) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov $16, $0 # E : set up return value + /* Find the end of the string. */ + ldq_u $1, 0($16) # L : load first quadword (a0 may be misaligned) + lda $2, -1 # E : + insqh $2, $16, $2 # U : + + andnot $16, 7, $16 # E : + or $2, $1, $1 # E : + cmpbge $31, $1, $2 # E : bits set iff byte == 0 + bne $2, $found # U : + +$loop: ldq $1, 8($16) # L : + addq $16, 8, $16 # E : + cmpbge $31, $1, $2 # E : + beq $2, $loop # U : + +$found: cttz $2, $3 # U0 : + addq $16, $3, $16 # E : + /* Now do the append. */ + mov $26, $23 # E : + jmp $31, __stxcpy # L0 : + + END(strcat) +libc_hidden_builtin_def (strcat) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strchr.S b/REORG.TODO/sysdeps/alpha/alphaev67/strchr.S new file mode 100644 index 0000000000..ac1886eb55 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strchr.S @@ -0,0 +1,100 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return the address of a given character within a null-terminated + string, or null if it is not found. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(strchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u t0, 0(a0) # L : load first quadword Latency=3 + and a1, 0xff, t3 # E : 00000000000000ch + insbl a1, 1, t5 # U : 000000000000ch00 + insbl a1, 7, a2 # U : ch00000000000000 + + insbl t3, 6, a3 # U : 00ch000000000000 + or t5, t3, a1 # E : 000000000000chch + andnot a0, 7, v0 # E : align our loop pointer + lda t4, -1 # E : build garbage mask + + mskqh t4, a0, t4 # U : only want relevant part of first quad + or a2, a3, a2 # E : chch000000000000 + inswl a1, 2, t5 # E : 00000000chch0000 + inswl a1, 4, a3 # E : 0000chch00000000 + + or a1, a2, a1 # E : chch00000000chch + or a3, t5, t5 # E : 0000chchchch0000 + cmpbge zero, t0, t2 # E : bits set iff byte == zero + cmpbge zero, t4, t4 # E : bits set iff byte is garbage + + /* This quad is _very_ serialized. Lots of stalling happens */ + or t5, a1, a1 # E : chchchchchchchch + xor t0, a1, t1 # E : make bytes == c zero + cmpbge zero, t1, t3 # E : bits set iff byte == c + or t2, t3, t0 # E : bits set iff char match or zero match + + andnot t0, t4, t0 # E : clear garbage bits + cttz t0, a2 # U0 : speculative (in case we get a match) + nop # E : + bne t0, $found # U : + + /* + * Yuk. This loop is going to stall like crazy waiting for the + * data to be loaded. Not much can be done about it unless it's + * unrolled multiple times, which is generally unsafe. + */ +$loop: + ldq t0, 8(v0) # L : Latency=3 + addq v0, 8, v0 # E : + xor t0, a1, t1 # E : + cmpbge zero, t0, t2 # E : bits set iff byte == 0 + + cmpbge zero, t1, t3 # E : bits set iff byte == c + or t2, t3, t0 # E : + cttz t3, a2 # U0 : speculative (in case we get a match) + beq t0, $loop # U : + +$found: + negq t0, t1 # E : clear all but least set bit + and t0, t1, t0 # E : + and t0, t3, t1 # E : bit set iff byte was the char + addq v0, a2, v0 # E : Add in the bit number from above + + cmoveq t1, $31, v0 # E : Two mapping slots, latency = 2 + nop + nop + ret # L0 : + + END(strchr) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strlen.S b/REORG.TODO/sysdeps/alpha/alphaev67/strlen.S new file mode 100644 index 0000000000..ead08998a1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strlen.S @@ -0,0 +1,60 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds length of a 0-terminated string. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(strlen) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u $1, 0($16) # L : load first quadword ($16 may be misaligned) + lda $2, -1($31) # E : + insqh $2, $16, $2 # U : + andnot $16, 7, $0 # E : + + or $2, $1, $1 # E : + cmpbge $31, $1, $2 # E : $2 <- bitmask: bit i == 1 <==> i-th byte == 0 + nop # E : + bne $2, $found # U : + +$loop: ldq $1, 8($0) # L : + addq $0, 8, $0 # E : addr += 8 + cmpbge $31, $1, $2 # E : + beq $2, $loop # U : + +$found: + cttz $2, $3 # U0 : + addq $0, $3, $0 # E : + subq $0, $16, $0 # E : + ret $31, ($26) # L0 : + + END(strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strncat.S b/REORG.TODO/sysdeps/alpha/alphaev67/strncat.S new file mode 100644 index 0000000000..58aac54d64 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strncat.S @@ -0,0 +1,87 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Append no more than COUNT characters from the null-terminated string SRC + to the null-terminated string DST. Always null-terminate the new DST. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .text + +ENTRY(strncat) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov a0, v0 # set up return value + beq a2, $zerocount # U : + /* Find the end of the string. */ + ldq_u t0, 0(a0) # L : load first quadword (a0 may be misaligned) + lda t1, -1 # E : + + insqh t1, v0, t1 # U : + andnot a0, 7, a0 # E : + nop # E : + or t1, t0, t0 # E : + + nop # E : + nop # E : + cmpbge zero, t0, t1 # E : bits set iff byte == 0 + bne t1, $found # U : + +$loop: ldq t0, 8(a0) # L : + addq a0, 8, a0 # E : + cmpbge zero, t0, t1 # E : + beq t1, $loop # U : + +$found: cttz t1, t2 # U0 : + addq a0, t2, a0 # E : + jsr t9, __stxncpy # L0 : Now do the append. + + /* Worry about the null termination. */ + + cttz t10, t2 # U0: byte offset of end-of-count. + bic a0, 7, a0 # E : word align the last write address. + zapnot t0, t8, t1 # U : was last byte a null? + nop # E : + + bne t1, 0f # U : + nop # E : + nop # E : + ret # L0 : + +0: addq t2, a0, a0 # E : address of end-of-count + stb zero, 1(a0) # L : + nop # E : + ret # L0 : + +$zerocount: + nop # E : + nop # E : + nop # E : + ret # L0 : + + END(strncat) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strrchr.S b/REORG.TODO/sysdeps/alpha/alphaev67/strrchr.S new file mode 100644 index 0000000000..f2fb4cf677 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strrchr.S @@ -0,0 +1,116 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return the address of the last occurrence of a given character + within a null-terminated string, or null if it is not found. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(strrchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + and a1, 0xff, t2 # E : 00000000000000ch + insbl a1, 1, t4 # U : 000000000000ch00 + insbl a1, 2, t5 # U : 0000000000ch0000 + ldq_u t0, 0(a0) # L : load first quadword Latency=3 + + mov zero, t6 # E : t6 is last match aligned addr + or t2, t4, a1 # E : 000000000000chch + sll t5, 8, t3 # U : 00000000ch000000 + mov zero, t8 # E : t8 is last match byte compare mask + + andnot a0, 7, v0 # E : align source addr + or t5, t3, t3 # E : 00000000chch0000 + sll a1, 32, t2 # U : 0000chch00000000 + sll a1, 48, t4 # U : chch000000000000 + + or t4, a1, a1 # E : chch00000000chch + or t2, t3, t2 # E : 0000chchchch0000 + or a1, t2, a1 # E : chchchchchchchch + lda t5, -1 # E : build garbage mask + + cmpbge zero, t0, t1 # E : bits set iff byte == zero + mskqh t5, a0, t4 # E : Complete garbage mask + xor t0, a1, t2 # E : make bytes == c zero + cmpbge zero, t4, t4 # E : bits set iff byte is garbage + + cmpbge zero, t2, t3 # E : bits set iff byte == c + andnot t1, t4, t1 # E : clear garbage from null test + andnot t3, t4, t3 # E : clear garbage from char test + bne t1, $eos # U : did we already hit the terminator? + + /* Character search main loop */ +$loop: + ldq t0, 8(v0) # L : load next quadword + cmovne t3, v0, t6 # E : save previous comparisons match + nop # : Latency=2, extra map slot (keep nop with cmov) + nop + + cmovne t3, t3, t8 # E : Latency=2, extra map slot + nop # : keep with cmovne + addq v0, 8, v0 # E : + xor t0, a1, t2 # E : + + cmpbge zero, t0, t1 # E : bits set iff byte == zero + cmpbge zero, t2, t3 # E : bits set iff byte == c + beq t1, $loop # U : if we havnt seen a null, loop + nop + + /* Mask out character matches after terminator */ +$eos: + negq t1, t4 # E : isolate first null byte match + and t1, t4, t4 # E : + subq t4, 1, t5 # E : build a mask of the bytes upto... + or t4, t5, t4 # E : ... and including the null + + and t3, t4, t3 # E : mask out char matches after null + cmovne t3, t3, t8 # E : save it, if match found Latency=2, extra map slot + nop # : Keep with cmovne + nop + + cmovne t3, v0, t6 # E : + nop # : Keep with cmovne + /* Locate the address of the last matched character */ + ctlz t8, t2 # U0 : Latency=3 (0x40 for t8=0) + nop + + cmoveq t8, 0x3f, t2 # E : Compensate for case when no match is seen + nop # E : hide the cmov latency (2) behind ctlz latency + lda t5, 0x3f($31) # E : + subq t5, t2, t5 # E : Normalize leading zero count + + addq t6, t5, v0 # E : and add to quadword address + ret # L0 : Latency=3 + nop + nop + +END(strrchr) + +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/alpha/atomic-machine.h b/REORG.TODO/sysdeps/alpha/atomic-machine.h new file mode 100644 index 0000000000..2cb2290837 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/atomic-machine.h @@ -0,0 +1,370 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +typedef int8_t atomic8_t; +typedef uint8_t uatomic8_t; +typedef int_fast8_t atomic_fast8_t; +typedef uint_fast8_t uatomic_fast8_t; + +typedef int16_t atomic16_t; +typedef uint16_t uatomic16_t; +typedef int_fast16_t atomic_fast16_t; +typedef uint_fast16_t uatomic_fast16_t; + +typedef int32_t atomic32_t; +typedef uint32_t uatomic32_t; +typedef int_fast32_t atomic_fast32_t; +typedef uint_fast32_t uatomic_fast32_t; + +typedef int64_t atomic64_t; +typedef uint64_t uatomic64_t; +typedef int_fast64_t atomic_fast64_t; +typedef uint_fast64_t uatomic_fast64_t; + +typedef intptr_t atomicptr_t; +typedef uintptr_t uatomicptr_t; +typedef intmax_t atomic_max_t; +typedef uintmax_t uatomic_max_t; + +#define __HAVE_64B_ATOMICS 1 +#define USE_ATOMIC_COMPILER_BUILTINS 0 + +/* XXX Is this actually correct? */ +#define ATOMIC_EXCHANGE_USES_CAS 1 + + +#ifdef UP +# define __MB /* nothing */ +#else +# define __MB " mb\n" +#endif + + +/* Compare and exchange. For all of the "xxx" routines, we expect a + "__prev" and a "__cmp" variable to be provided by the enclosing scope, + in which values are returned. */ + +#define __arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2) \ +({ \ + unsigned long __tmp, __snew, __addr64; \ + __asm__ __volatile__ ( \ + mb1 \ + " andnot %[__addr8],7,%[__addr64]\n" \ + " insbl %[__new],%[__addr8],%[__snew]\n" \ + "1: ldq_l %[__tmp],0(%[__addr64])\n" \ + " extbl %[__tmp],%[__addr8],%[__prev]\n" \ + " cmpeq %[__prev],%[__old],%[__cmp]\n" \ + " beq %[__cmp],2f\n" \ + " mskbl %[__tmp],%[__addr8],%[__tmp]\n" \ + " or %[__snew],%[__tmp],%[__tmp]\n" \ + " stq_c %[__tmp],0(%[__addr64])\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + "2:" \ + : [__prev] "=&r" (__prev), \ + [__snew] "=&r" (__snew), \ + [__tmp] "=&r" (__tmp), \ + [__cmp] "=&r" (__cmp), \ + [__addr64] "=&r" (__addr64) \ + : [__addr8] "r" (mem), \ + [__old] "Ir" ((uint64_t)(uint8_t)(uint64_t)(old)), \ + [__new] "r" (new) \ + : "memory"); \ +}) + +#define __arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2) \ +({ \ + unsigned long __tmp, __snew, __addr64; \ + __asm__ __volatile__ ( \ + mb1 \ + " andnot %[__addr16],7,%[__addr64]\n" \ + " inswl %[__new],%[__addr16],%[__snew]\n" \ + "1: ldq_l %[__tmp],0(%[__addr64])\n" \ + " extwl %[__tmp],%[__addr16],%[__prev]\n" \ + " cmpeq %[__prev],%[__old],%[__cmp]\n" \ + " beq %[__cmp],2f\n" \ + " mskwl %[__tmp],%[__addr16],%[__tmp]\n" \ + " or %[__snew],%[__tmp],%[__tmp]\n" \ + " stq_c %[__tmp],0(%[__addr64])\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + "2:" \ + : [__prev] "=&r" (__prev), \ + [__snew] "=&r" (__snew), \ + [__tmp] "=&r" (__tmp), \ + [__cmp] "=&r" (__cmp), \ + [__addr64] "=&r" (__addr64) \ + : [__addr16] "r" (mem), \ + [__old] "Ir" ((uint64_t)(uint16_t)(uint64_t)(old)), \ + [__new] "r" (new) \ + : "memory"); \ +}) + +#define __arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2) \ +({ \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldl_l %[__prev],%[__mem]\n" \ + " cmpeq %[__prev],%[__old],%[__cmp]\n" \ + " beq %[__cmp],2f\n" \ + " mov %[__new],%[__cmp]\n" \ + " stl_c %[__cmp],%[__mem]\n" \ + " beq %[__cmp],1b\n" \ + mb2 \ + "2:" \ + : [__prev] "=&r" (__prev), \ + [__cmp] "=&r" (__cmp) \ + : [__mem] "m" (*(mem)), \ + [__old] "Ir" ((uint64_t)(atomic32_t)(uint64_t)(old)), \ + [__new] "Ir" (new) \ + : "memory"); \ +}) + +#define __arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2) \ +({ \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldq_l %[__prev],%[__mem]\n" \ + " cmpeq %[__prev],%[__old],%[__cmp]\n" \ + " beq %[__cmp],2f\n" \ + " mov %[__new],%[__cmp]\n" \ + " stq_c %[__cmp],%[__mem]\n" \ + " beq %[__cmp],1b\n" \ + mb2 \ + "2:" \ + : [__prev] "=&r" (__prev), \ + [__cmp] "=&r" (__cmp) \ + : [__mem] "m" (*(mem)), \ + [__old] "Ir" ((uint64_t)(old)), \ + [__new] "Ir" (new) \ + : "memory"); \ +}) + +/* For all "bool" routines, we return FALSE if exchange succesful. */ + +#define __arch_compare_and_exchange_bool_8_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2); \ + !__cmp; }) + +#define __arch_compare_and_exchange_bool_16_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2); \ + !__cmp; }) + +#define __arch_compare_and_exchange_bool_32_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2); \ + !__cmp; }) + +#define __arch_compare_and_exchange_bool_64_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2); \ + !__cmp; }) + +/* For all "val" routines, return the old value whether exchange + successful or not. */ + +#define __arch_compare_and_exchange_val_8_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2); \ + (typeof (*mem))__prev; }) + +#define __arch_compare_and_exchange_val_16_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2); \ + (typeof (*mem))__prev; }) + +#define __arch_compare_and_exchange_val_32_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2); \ + (typeof (*mem))__prev; }) + +#define __arch_compare_and_exchange_val_64_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2); \ + (typeof (*mem))__prev; }) + +/* Compare and exchange with "acquire" semantics, ie barrier after. */ + +#define atomic_compare_and_exchange_bool_acq(mem, new, old) \ + __atomic_bool_bysize (__arch_compare_and_exchange_bool, int, \ + mem, new, old, "", __MB) + +#define atomic_compare_and_exchange_val_acq(mem, new, old) \ + __atomic_val_bysize (__arch_compare_and_exchange_val, int, \ + mem, new, old, "", __MB) + +/* Compare and exchange with "release" semantics, ie barrier before. */ + +#define atomic_compare_and_exchange_val_rel(mem, new, old) \ + __atomic_val_bysize (__arch_compare_and_exchange_val, int, \ + mem, new, old, __MB, "") + + +/* Atomically store value and return the previous value. */ + +#define __arch_exchange_8_int(mem, value, mb1, mb2) \ +({ \ + unsigned long __tmp, __addr64, __sval; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + " andnot %[__addr8],7,%[__addr64]\n" \ + " insbl %[__value],%[__addr8],%[__sval]\n" \ + "1: ldq_l %[__tmp],0(%[__addr64])\n" \ + " extbl %[__tmp],%[__addr8],%[__ret]\n" \ + " mskbl %[__tmp],%[__addr8],%[__tmp]\n" \ + " or %[__sval],%[__tmp],%[__tmp]\n" \ + " stq_c %[__tmp],0(%[__addr64])\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__sval] "=&r" (__sval), \ + [__tmp] "=&r" (__tmp), \ + [__addr64] "=&r" (__addr64) \ + : [__addr8] "r" (mem), \ + [__value] "r" (value) \ + : "memory"); \ + __ret; }) + +#define __arch_exchange_16_int(mem, value, mb1, mb2) \ +({ \ + unsigned long __tmp, __addr64, __sval; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + " andnot %[__addr16],7,%[__addr64]\n" \ + " inswl %[__value],%[__addr16],%[__sval]\n" \ + "1: ldq_l %[__tmp],0(%[__addr64])\n" \ + " extwl %[__tmp],%[__addr16],%[__ret]\n" \ + " mskwl %[__tmp],%[__addr16],%[__tmp]\n" \ + " or %[__sval],%[__tmp],%[__tmp]\n" \ + " stq_c %[__tmp],0(%[__addr64])\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__sval] "=&r" (__sval), \ + [__tmp] "=&r" (__tmp), \ + [__addr64] "=&r" (__addr64) \ + : [__addr16] "r" (mem), \ + [__value] "r" (value) \ + : "memory"); \ + __ret; }) + +#define __arch_exchange_32_int(mem, value, mb1, mb2) \ +({ \ + signed int __tmp; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldl_l %[__ret],%[__mem]\n" \ + " mov %[__val],%[__tmp]\n" \ + " stl_c %[__tmp],%[__mem]\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__tmp] "=&r" (__tmp) \ + : [__mem] "m" (*(mem)), \ + [__val] "Ir" (value) \ + : "memory"); \ + __ret; }) + +#define __arch_exchange_64_int(mem, value, mb1, mb2) \ +({ \ + unsigned long __tmp; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldq_l %[__ret],%[__mem]\n" \ + " mov %[__val],%[__tmp]\n" \ + " stq_c %[__tmp],%[__mem]\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__tmp] "=&r" (__tmp) \ + : [__mem] "m" (*(mem)), \ + [__val] "Ir" (value) \ + : "memory"); \ + __ret; }) + +#define atomic_exchange_acq(mem, value) \ + __atomic_val_bysize (__arch_exchange, int, mem, value, "", __MB) + +#define atomic_exchange_rel(mem, value) \ + __atomic_val_bysize (__arch_exchange, int, mem, value, __MB, "") + + +/* Atomically add value and return the previous (unincremented) value. */ + +#define __arch_exchange_and_add_8_int(mem, value, mb1, mb2) \ + ({ __builtin_trap (); 0; }) + +#define __arch_exchange_and_add_16_int(mem, value, mb1, mb2) \ + ({ __builtin_trap (); 0; }) + +#define __arch_exchange_and_add_32_int(mem, value, mb1, mb2) \ +({ \ + signed int __tmp; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldl_l %[__ret],%[__mem]\n" \ + " addl %[__ret],%[__val],%[__tmp]\n" \ + " stl_c %[__tmp],%[__mem]\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__tmp] "=&r" (__tmp) \ + : [__mem] "m" (*(mem)), \ + [__val] "Ir" ((signed int)(value)) \ + : "memory"); \ + __ret; }) + +#define __arch_exchange_and_add_64_int(mem, value, mb1, mb2) \ +({ \ + unsigned long __tmp; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldq_l %[__ret],%[__mem]\n" \ + " addq %[__ret],%[__val],%[__tmp]\n" \ + " stq_c %[__tmp],%[__mem]\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__tmp] "=&r" (__tmp) \ + : [__mem] "m" (*(mem)), \ + [__val] "Ir" ((unsigned long)(value)) \ + : "memory"); \ + __ret; }) + +/* ??? Barrier semantics for atomic_exchange_and_add appear to be + undefined. Use full barrier for now, as that's safe. */ +#define atomic_exchange_and_add(mem, value) \ + __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, __MB, __MB) + + +/* ??? Blah, I'm lazy. Implement these later. Can do better than the + compare-and-exchange loop provided by generic code. + +#define atomic_decrement_if_positive(mem) +#define atomic_bit_test_set(mem, bit) + +*/ + +#ifndef UP +# define atomic_full_barrier() __asm ("mb" : : : "memory"); +# define atomic_read_barrier() __asm ("mb" : : : "memory"); +# define atomic_write_barrier() __asm ("wmb" : : : "memory"); +#endif diff --git a/REORG.TODO/sysdeps/alpha/backtrace.c b/REORG.TODO/sysdeps/alpha/backtrace.c new file mode 100644 index 0000000000..27ce597b39 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/backtrace.c @@ -0,0 +1 @@ +#include <sysdeps/x86_64/backtrace.c> diff --git a/REORG.TODO/sysdeps/alpha/bb_init_func.S b/REORG.TODO/sysdeps/alpha/bb_init_func.S new file mode 100644 index 0000000000..a3064d8abc --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bb_init_func.S @@ -0,0 +1,86 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* __bb_init_func is invoked at the beginning of each function, before + any registers have been saved. It is therefore safe to use any + caller-saved (call-used) registers (except for argument registers + a1-a5). */ + +#include <sysdep.h> + +/* + * These offsets should match with "struct bb" declared in gcc/libgcc2.c. + */ +#define ZERO_WORD 0x00 +#define NEXT 0x20 + + .set noat + .set noreorder + +ENTRY(__bb_init_func) + .prologue 0 + + ldq t0, ZERO_WORD(a0) /* t0 <- blocks->zero_word */ + beq t0, init /* not initialized yet -> */ + ret + +END(__bb_init_func) + + .ent init +init: + .frame sp, 0x38, ra, 0 + subq sp, 0x38, sp + .prologue 0 + + stq pv, 0x30(sp) + br pv, 1f +1: ldgp gp, 0(pv) + + ldiq t1, __bb_head + lda t3, _gmonparam + ldq t2, 0(t1) + ldl t3, 0(t3) /* t3 = _gmonparam.state */ + lda t0, 1 + stq t0, ZERO_WORD(a0) /* blocks->zero_word = 1 */ + stq t2, NEXT(a0) /* blocks->next = __bb_head */ + stq a0, 0(t1) + bne t2, $leave + beq t3, $leave /* t3 == GMON_PROF_ON? yes -> */ + + /* also need to initialize destructor: */ + stq ra, 0x00(sp) + lda a0, __bb_exit_func + stq a1, 0x08(sp) + lda pv, atexit + stq a2, 0x10(sp) + stq a3, 0x18(sp) + stq a4, 0x20(sp) + stq a5, 0x28(sp) + jsr ra, (pv), atexit + ldq ra, 0x00(sp) + ldq a1, 0x08(sp) + ldq a2, 0x10(sp) + ldq a3, 0x18(sp) + ldq a4, 0x20(sp) + ldq a5, 0x28(sp) + +$leave: ldq pv, 0x30(sp) + addq sp, 0x38, sp + ret + + .end init diff --git a/REORG.TODO/sysdeps/alpha/bits/endian.h b/REORG.TODO/sysdeps/alpha/bits/endian.h new file mode 100644 index 0000000000..8a16e14e24 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bits/endian.h @@ -0,0 +1,7 @@ +/* Alpha is little-endian. */ + +#ifndef _ENDIAN_H +# error "Never use <bits/endian.h> directly; include <endian.h> instead." +#endif + +#define __BYTE_ORDER __LITTLE_ENDIAN diff --git a/REORG.TODO/sysdeps/alpha/bits/link.h b/REORG.TODO/sysdeps/alpha/bits/link.h new file mode 100644 index 0000000000..f9b8938f07 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bits/link.h @@ -0,0 +1,68 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LINK_H +# error "Never include <bits/link.h> directly; use <link.h> instead." +#endif + + +/* Registers for entry into PLT on Alpha. */ +typedef struct La_alpha_regs +{ + uint64_t lr_r26; + uint64_t lr_sp; + uint64_t lr_r16; + uint64_t lr_r17; + uint64_t lr_r18; + uint64_t lr_r19; + uint64_t lr_r20; + uint64_t lr_r21; + double lr_f16; + double lr_f17; + double lr_f18; + double lr_f19; + double lr_f20; + double lr_f21; +} La_alpha_regs; + +/* Return values for calls from PLT on Alpha. */ +typedef struct La_alpha_retval +{ + uint64_t lrv_r0; + uint64_t lrv_r1; + double lrv_f0; + double lrv_f1; +} La_alpha_retval; + + +__BEGIN_DECLS + +extern Elf64_Addr la_alpha_gnu_pltenter (Elf64_Sym *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_alpha_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_alpha_gnu_pltexit (Elf64_Sym *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_alpha_regs *__inregs, + La_alpha_retval *__outregs, + const char *symname); + +__END_DECLS diff --git a/REORG.TODO/sysdeps/alpha/bits/mathdef.h b/REORG.TODO/sysdeps/alpha/bits/mathdef.h new file mode 100644 index 0000000000..f375bfda7f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bits/mathdef.h @@ -0,0 +1,44 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _COMPLEX_H +# error "Never use <bits/mathdef.h> directly; include <complex.h> instead" +#endif + +#if defined _COMPLEX_H && !defined _COMPLEX_H_MATHDEF +# define _COMPLEX_H_MATHDEF 1 +# if defined(__GNUC__) && !__GNUC_PREREQ(3,4) + +/* Due to an ABI change, we need to remap the complex float symbols. */ +# define _Mdouble_ float +# define __MATHCALL(function, args) \ + __MATHDECL (_Complex float, function, args) +# define __MATHDECL(type, function, args) \ + __MATHDECL_1(type, function##f, args, __c1_##function##f); \ + __MATHDECL_1(type, __##function##f, args, __c1_##function##f) +# define __MATHDECL_1(type, function, args, alias) \ + extern type function args __asm__(#alias) __THROW + +# include <bits/cmathcalls.h> + +# undef _Mdouble_ +# undef __MATHCALL +# undef __MATHDECL +# undef __MATHDECL_1 + +# endif /* GNUC before 3.4 */ +#endif /* COMPLEX_H */ diff --git a/REORG.TODO/sysdeps/alpha/bits/setjmp.h b/REORG.TODO/sysdeps/alpha/bits/setjmp.h new file mode 100644 index 0000000000..d7eb751de8 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bits/setjmp.h @@ -0,0 +1,61 @@ +/* Define the machine-dependent type `jmp_buf'. Alpha version. + Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_SETJMP_H +#define _BITS_SETJMP_H 1 + +#if !defined _SETJMP_H && !defined _PTHREAD_H +# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead." +#endif + +/* The previous bits/setjmp.h had __jmp_buf defined as a structure. + We use an array of 'long int' instead, to make writing the + assembler easier. Naturally, user code should not depend on + either representation. */ + +/* + * Integer registers: + * $0 is the return value (va); + * $1-$8, $22-$25, $28 are call-used (t0-t7, t8-t11, at); + * $9-$14 we save here (s0-s5); + * $15 is the FP and we save it here (fp or s6); + * $16-$21 are input arguments (call-used) (a0-a5); + * $26 is the return PC and we save it here (ra); + * $27 is the procedure value (i.e., the address of __setjmp) (pv or t12); + * $29 is the global pointer, which the caller will reconstruct + * from the return address restored in $26 (gp); + * $30 is the stack pointer and we save it here (sp); + * $31 is always zero (zero). + * + * Floating-point registers: + * $f0 is the floating return value; + * $f1, $f10-$f15, $f22-$f30 are call-used; + * $f2-$f9 we save here; + * $f16-$21 are input args (call-used); + * $f31 is always zero. + * + * Note that even on Alpha hardware that does not have an FPU (there + * isn't such a thing currently) it is required to implement the FP + * registers. + */ + +#ifndef __ASSEMBLY__ +typedef long int __jmp_buf[17]; +#endif + +#endif /* bits/setjmp.h */ diff --git a/REORG.TODO/sysdeps/alpha/bsd-_setjmp.S b/REORG.TODO/sysdeps/alpha/bsd-_setjmp.S new file mode 100644 index 0000000000..4e6a2da560 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bsd-_setjmp.S @@ -0,0 +1 @@ +/* _setjmp is in setjmp.S */ diff --git a/REORG.TODO/sysdeps/alpha/bsd-setjmp.S b/REORG.TODO/sysdeps/alpha/bsd-setjmp.S new file mode 100644 index 0000000000..1da848d2f1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bsd-setjmp.S @@ -0,0 +1 @@ +/* setjmp is in setjmp.S */ diff --git a/REORG.TODO/sysdeps/alpha/bzero.S b/REORG.TODO/sysdeps/alpha/bzero.S new file mode 100644 index 0000000000..9cea9fb59d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bzero.S @@ -0,0 +1,110 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Fill a block of memory with zeros. Optimized for the Alpha architecture: + + - memory accessed as aligned quadwords only + - destination memory not read unless needed for good cache behaviour + - basic blocks arranged to optimize branch prediction for full-quadword + aligned memory blocks. + - partial head and tail quadwords constructed with byte-mask instructions + + This is generally scheduled for the EV5 (got to look out for my own + interests :-), but with EV4 needs in mind. There *should* be no more + stalls for the EV4 than there are for the EV5. +*/ + + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + .type __bzero, @function + .globl __bzero + .usepv __bzero, USEPV_PROF + + cfi_startproc + + /* On entry to this basic block: + t3 == loop counter + t4 == bytes in partial final word + a0 == possibly misaligned destination pointer */ + + .align 3 +bzero_loop: + beq t3, $tail # + blbc t3, 0f # skip single store if count even + + stq_u zero, 0(a0) # e0 : store one word + subq t3, 1, t3 # .. e1 : + addq a0, 8, a0 # e0 : + beq t3, $tail # .. e1 : + +0: stq_u zero, 0(a0) # e0 : store two words + subq t3, 2, t3 # .. e1 : + stq_u zero, 8(a0) # e0 : + addq a0, 16, a0 # .. e1 : + bne t3, 0b # e1 : + +$tail: bne t4, 1f # is there a tail to do? + ret # no + +1: ldq_u t0, 0(a0) # yes, load original data + mskqh t0, t4, t0 # + stq_u t0, 0(a0) # + ret # + +__bzero: +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + mov a0, v0 # e0 : move return value in place + beq a1, $done # .. e1 : early exit for zero-length store + and a0, 7, t1 # e0 : + addq a1, t1, a1 # e1 : add dest misalignment to count + srl a1, 3, t3 # e0 : loop = count >> 3 + and a1, 7, t4 # .. e1 : find number of bytes in tail + unop # : + beq t1, bzero_loop # e1 : aligned head, jump right in + + ldq_u t0, 0(a0) # e0 : load original data to mask into + cmpult a1, 8, t2 # .. e1 : is this a sub-word set? + bne t2, $oneq # e1 : + + mskql t0, a0, t0 # e0 : we span words. finish this partial + subq t3, 1, t3 # e0 : + addq a0, 8, a0 # .. e1 : + stq_u t0, -8(a0) # e0 : + br bzero_loop # .. e1 : + + .align 3 +$oneq: + mskql t0, a0, t2 # e0 : + mskqh t0, a1, t3 # e0 : + or t2, t3, t0 # e1 : + stq_u t0, 0(a0) # e0 : + +$done: ret + + cfi_endproc +weak_alias (__bzero, bzero) diff --git a/REORG.TODO/sysdeps/alpha/configure b/REORG.TODO/sysdeps/alpha/configure new file mode 100644 index 0000000000..464b596527 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/configure @@ -0,0 +1,8 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/alpha. + +# With required gcc+binutils, we can always access static and hidden +# symbols in a position independent way. +$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h + +# work around problem with autoconf and empty lines at the end of files diff --git a/REORG.TODO/sysdeps/alpha/configure.ac b/REORG.TODO/sysdeps/alpha/configure.ac new file mode 100644 index 0000000000..38e52e71ac --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/configure.ac @@ -0,0 +1,7 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/alpha. + +# With required gcc+binutils, we can always access static and hidden +# symbols in a position independent way. +AC_DEFINE(PI_STATIC_AND_HIDDEN) +# work around problem with autoconf and empty lines at the end of files diff --git a/REORG.TODO/sysdeps/alpha/crti.S b/REORG.TODO/sysdeps/alpha/crti.S new file mode 100644 index 0000000000..932b337e29 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/crti.S @@ -0,0 +1,99 @@ +/* Special .init and .fini section support for Alpha. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* crti.S puts a function prologue at the beginning of the .init and + .fini sections and defines global symbols for those addresses, so + they can be called as functions. The symbols _init and _fini are + magic and cause the linker to emit DT_INIT and DT_FINI. + + This differs from what would be generated for ordinary code in that + we save and restore the GP within the function. In order for linker + relaxation to work, the value in the GP register on exit from a function + must be valid for the function entry point. Normally, a function is + contained within one object file and this is not an issue, provided + that the function reloads the gp after making any function calls. + However, _init and _fini are constructed from pieces of many object + files, all of which may have different GP values. So we must reload + the GP value from crti.o in crtn.o. */ + +#include <libc-symbols.h> +#include <sysdep.h> + +#ifndef PREINIT_FUNCTION +# define PREINIT_FUNCTION __gmon_start__ +#endif + +#ifndef PREINIT_FUNCTION_WEAK +# define PREINIT_FUNCTION_WEAK 1 +#endif + +#if PREINIT_FUNCTION_WEAK + weak_extern (PREINIT_FUNCTION) +#else + .hidden PREINIT_FUNCTION +#endif + + .section .init, "ax", @progbits + .globl _init + .type _init, @function + .usepv _init, std +_init: + ldgp $29, 0($27) + subq $30, 16, $30 +#if PREINIT_FUNCTION_WEAK + lda $27, PREINIT_FUNCTION +#endif + stq $26, 0($30) + stq $29, 8($30) +#if PREINIT_FUNCTION_WEAK + beq $27, 1f + jsr $26, ($27), PREINIT_FUNCTION + ldq $29, 8($30) +1: +#else + bsr $26, PREINIT_FUNCTION !samegp +#endif + .p2align 3 + + .section .fini, "ax", @progbits + .globl _fini + .type _fini,@function + .usepv _fini,std +_fini: + ldgp $29, 0($27) + subq $30, 16, $30 + stq $26, 0($30) + stq $29, 8($30) + .p2align 3 diff --git a/REORG.TODO/sysdeps/alpha/crtn.S b/REORG.TODO/sysdeps/alpha/crtn.S new file mode 100644 index 0000000000..cb310778e1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/crtn.S @@ -0,0 +1,49 @@ +/* Special .init and .fini section support for Alpha. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* crtn.S puts function epilogues in the .init and .fini sections + corresponding to the prologues in crti.S. */ + + .section .init, "ax", @progbits + ldq $26, 0($30) + ldq $29, 8($30) + addq $30, 16, $30 + ret + + .section .fini, "ax", @progbits + ldq $26, 0($30) + ldq $29, 8($30) + addq $30, 16, $30 + ret diff --git a/REORG.TODO/sysdeps/alpha/div.S b/REORG.TODO/sysdeps/alpha/div.S new file mode 100644 index 0000000000..a323379067 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/div.S @@ -0,0 +1,87 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + +#undef FRAME +#ifdef __alpha_fix__ +#define FRAME 0 +#else +#define FRAME 16 +#endif + + .set noat + + .align 4 + .globl div + .ent div +div: + .frame sp, FRAME, ra +#if FRAME > 0 + lda sp, -FRAME(sp) +#endif +#ifdef PROF + .set macro + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .set nomacro + .prologue 1 +#else + .prologue 0 +#endif + + beq $18, $divbyzero + excb + mf_fpcr $f10 + + _ITOFT2 $17, $f0, 0, $18, $f1, 8 + + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + cvttq/c $f0, $f0 + excb + mt_fpcr $f10 + _FTOIT $f0, $0, 0 + + mull $0, $18, $1 + subl $17, $1, $1 + + stl $0, 0(a0) + stl $1, 4(a0) + mov a0, v0 + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret + +$divbyzero: + mov a0, v0 + lda a0, GEN_INTDIV + call_pal PAL_gentrap + stl zero, 0(v0) + stl zero, 4(v0) + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret + + .end div diff --git a/REORG.TODO/sysdeps/alpha/div_libc.h b/REORG.TODO/sysdeps/alpha/div_libc.h new file mode 100644 index 0000000000..96eb95bc7e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/div_libc.h @@ -0,0 +1,163 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Common bits for implementing software divide. */ + +#include <sysdep.h> +#ifdef __linux__ +# include <asm/gentrap.h> +# include <asm/pal.h> +#else +# include <machine/pal.h> +#endif + +/* These are not normal C functions. Argument registers are t10 and t11; + the result goes in t12; the return address is in t9. Only t12 and AT + may be clobbered. */ +#define X t10 +#define Y t11 +#define RV t12 +#define RA t9 + +/* The secureplt format does not allow the division routines to be called + via plt; there aren't enough registers free to be clobbered. Avoid + setting the symbol type to STT_FUNC, so that the linker won't be tempted + to create a plt entry. */ +#define funcnoplt notype + +/* None of these functions should use implicit anything. */ + .set nomacro + .set noat + +/* Code fragment to invoke _mcount for profiling. This should be invoked + directly after allocation of the stack frame. */ +.macro CALL_MCOUNT +#ifdef PROF + stq ra, 0(sp) + stq pv, 8(sp) + stq gp, 16(sp) + cfi_rel_offset (ra, 0) + cfi_rel_offset (pv, 8) + cfi_rel_offset (gp, 16) + br AT, 1f + .set macro +1: ldgp gp, 0(AT) + mov RA, ra + lda AT, _mcount + jsr AT, (AT), _mcount + .set nomacro + ldq ra, 0(sp) + ldq pv, 8(sp) + ldq gp, 16(sp) + cfi_restore (ra) + cfi_restore (pv) + cfi_restore (gp) + /* Realign subsequent code with what we'd have without this + macro at all. This means aligned with one arithmetic insn + used within the bundle. */ + .align 4 + nop +#endif +.endm + +/* In order to make the below work, all top-level divide routines must + use the same frame size. */ +#define FRAME 64 + +/* Code fragment to generate an integer divide-by-zero fault. When + building libc.so, we arrange for there to be one copy of this code + placed late in the dso, such that all branches are forward. When + building libc.a, we use multiple copies to avoid having an out of + range branch. Users should jump to DIVBYZERO. */ + +.macro DO_DIVBYZERO +#ifdef PIC +#define DIVBYZERO __divbyzero + .section .gnu.linkonce.t.divbyzero, "ax", @progbits + .globl __divbyzero + .type __divbyzero, @function + .usepv __divbyzero, no + .hidden __divbyzero +#else +#define DIVBYZERO $divbyzero +#endif + + .align 4 +DIVBYZERO: + cfi_startproc + cfi_return_column (RA) + cfi_def_cfa_offset (FRAME) + + mov a0, RV + unop + lda a0, GEN_INTDIV + call_pal PAL_gentrap + + mov RV, a0 + clr RV + lda sp, FRAME(sp) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + cfi_endproc + .size DIVBYZERO, .-DIVBYZERO +.endm + +/* Like the ev6 instructions, but fall back to stack use on prior machines. */ + + .arch ev6 + +.macro _ITOFS gr, fr, slot +#ifdef __alpha_fix__ + itofs \gr, \fr +#else + stl \gr, \slot(sp) + lds \fr, \slot(sp) +#endif +.endm + +.macro _ITOFT gr, fr, slot +#ifdef __alpha_fix__ + itoft \gr, \fr +#else + stq \gr, \slot(sp) + ldt \fr, \slot(sp) +#endif +.endm + +.macro _FTOIT fr, gr, slot +#ifdef __alpha_fix__ + ftoit \fr, \gr +#else + stt \fr, \slot(sp) + ldq \gr, \slot(sp) +#endif +.endm + +/* Similarly, but move two registers. Schedules better for pre-ev6. */ + +.macro _ITOFT2 gr1, fr1, slot1, gr2, fr2, slot2 +#ifdef __alpha_fix__ + itoft \gr1, \fr1 + itoft \gr2, \fr2 +#else + stq \gr1, \slot1(sp) + stq \gr2, \slot2(sp) + ldt \fr1, \slot1(sp) + ldt \fr2, \slot2(sp) +#endif +.endm diff --git a/REORG.TODO/sysdeps/alpha/divl.S b/REORG.TODO/sysdeps/alpha/divl.S new file mode 100644 index 0000000000..f09433e993 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/divl.S @@ -0,0 +1,83 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + +/* 32-bit signed int divide. This is not a normal C function. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may + be clobbered. + + The FPU can handle all input values except zero. Whee! + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + +#ifndef EXTEND +#define EXTEND(S,D) sextl S, D +#endif + + .text + .align 4 + .globl __divl + .type __divl, @funcnoplt + .usepv __divl, no + + cfi_startproc + cfi_return_column (RA) +__divl: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f2, 16(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f2, 16) + mf_fpcr $f2 + + EXTEND (X, RV) + EXTEND (Y, AT) + _ITOFT2 RV, $f0, 24, AT, $f1, 32 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + cvttq/c $f0, $f0 + excb + mt_fpcr $f2 + _FTOIT $f0, RV, 24 + + ldt $f0, 0(sp) + ldt $f1, 8(sp) + ldt $f2, 16(sp) + lda sp, FRAME(sp) + cfi_restore ($f0) + cfi_restore ($f1) + cfi_restore ($f2) + cfi_def_cfa_offset (0) + sextl RV, RV + ret $31, (RA), 1 + + cfi_endproc + .size __divl, .-__divl + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/divlu.S b/REORG.TODO/sysdeps/alpha/divlu.S new file mode 100644 index 0000000000..5c54bb54c0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/divlu.S @@ -0,0 +1,4 @@ +#define UNSIGNED +#define EXTEND(S,D) zapnot S, 15, D +#define __divl __divlu +#include <divl.S> diff --git a/REORG.TODO/sysdeps/alpha/divq.S b/REORG.TODO/sysdeps/alpha/divq.S new file mode 100644 index 0000000000..f9acc1c70e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/divq.S @@ -0,0 +1,273 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + + +/* 64-bit signed long divide. These are not normal C functions. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may + be clobbered. + + Theory of operation here is that we can use the FPU divider for virtually + all operands that we see: all dividend values between -2**53 and 2**53-1 + can be computed directly. Note that divisor values need not be checked + against that range because the rounded fp value will be close enough such + that the quotient is < 1, which will properly be truncated to zero when we + convert back to integer. + + When the dividend is outside the range for which we can compute exact + results, we use the fp quotent as an estimate from which we begin refining + an exact integral value. This reduces the number of iterations in the + shift-and-subtract loop significantly. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + + .text + .align 4 + .globl __divq + .type __divq, @funcnoplt + .usepv __divq, no + + cfi_startproc + cfi_return_column (RA) +__divq: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + + /* Get the fp divide insn issued as quickly as possible. After + that's done, we have at least 22 cycles until its results are + ready -- all the time in the world to figure out how we're + going to use the results. */ + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f3, 48(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f3, 48) + mf_fpcr $f3 + + _ITOFT2 X, $f0, 16, Y, $f1, 24 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + + /* Check to see if X fit in the double as an exact value. */ + sll X, (64-53), AT + ldt $f1, 8(sp) + sra AT, (64-53), AT + cmpeq X, AT, AT + beq AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert and clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f3 + _FTOIT $f0, RV, 16 + + ldt $f0, 0(sp) + ldt $f3, 48(sp) + cfi_restore ($f1) + cfi_remember_state + cfi_restore ($f0) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + lda sp, FRAME(sp) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + stq t0, 16(sp) + stq t1, 24(sp) + stq t2, 32(sp) + stq t5, 40(sp) + cfi_rel_offset (t0, 16) + cfi_rel_offset (t1, 24) + cfi_rel_offset (t2, 32) + cfi_rel_offset (t5, 40) + +#define Q RV /* quotient */ +#define R t0 /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + /* The fixup code below can only handle unsigned values. */ + or X, Y, AT + mov $31, t5 + blt AT, $fix_sign_in +$fix_sign_in_ret1: + cvttq/c $f0, $f0 + + _FTOIT $f0, Q, 8 + .align 3 +$fix_sign_in_ret2: + ldt $f0, 0(sp) + stq t3, 0(sp) + cfi_restore ($f0) + cfi_rel_offset (t3, 0) + + mulq Q, Y, QY + excb + stq t4, 8(sp) + mt_fpcr $f3 + cfi_rel_offset (t4, 8) + + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + ldq t0, 16(sp) + ldq t1, 24(sp) + ldq t2, 32(sp) + bne t5, $fix_sign_out + +$fix_sign_out_ret: + ldq t3, 0(sp) + ldq t4, 8(sp) + ldq t5, 40(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore (t0) + cfi_restore (t1) + cfi_restore (t2) + cfi_restore (t3) + cfi_restore (t4) + cfi_restore (t5) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 +$fix_sign_in: + /* If we got here, then X|Y is negative. Need to adjust everything + such that we're doing unsigned division in the fixup loop. */ + /* T5 records the changes we had to make: + bit 0: set if result should be negative. + bit 2: set if X was negated. + bit 3: set if Y was negated. + */ + xor X, Y, AT + cmplt AT, 0, t5 + cmplt X, 0, AT + negq X, t0 + + s4addq AT, t5, t5 + cmovne AT, t0, X + cmplt Y, 0, AT + negq Y, t0 + + s8addq AT, t5, t5 + cmovne AT, t0, Y + unop + blbc t5, $fix_sign_in_ret1 + + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + .align 3 + negq Q, Q + br $fix_sign_in_ret2 + + .align 4 +$fix_sign_out: + /* Now we get to undo what we did above. */ + /* ??? Is this really faster than just increasing the size of + the stack frame and storing X and Y in memory? */ + and t5, 8, AT + negq Y, t4 + cmovne AT, t4, Y + + and t5, 4, AT + negq X, t4 + cmovne AT, t4, X + + negq RV, t4 + cmovlbs t5, t4, RV + + br $fix_sign_out_ret + + cfi_endproc + .size __divq, .-__divq + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/divqu.S b/REORG.TODO/sysdeps/alpha/divqu.S new file mode 100644 index 0000000000..15101fa246 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/divqu.S @@ -0,0 +1,256 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + + +/* 64-bit unsigned long divide. These are not normal C functions. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may be + clobbered. + + Theory of operation here is that we can use the FPU divider for virtually + all operands that we see: all dividend values between -2**53 and 2**53-1 + can be computed directly. Note that divisor values need not be checked + against that range because the rounded fp value will be close enough such + that the quotient is < 1, which will properly be truncated to zero when we + convert back to integer. + + When the dividend is outside the range for which we can compute exact + results, we use the fp quotent as an estimate from which we begin refining + an exact integral value. This reduces the number of iterations in the + shift-and-subtract loop significantly. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + + .text + .align 4 + .globl __divqu + .type __divqu, @funcnoplt + .usepv __divqu, no + + cfi_startproc + cfi_return_column (RA) +__divqu: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + + /* Get the fp divide insn issued as quickly as possible. After + that's done, we have at least 22 cycles until its results are + ready -- all the time in the world to figure out how we're + going to use the results. */ + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f3, 48(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f3, 48) + mf_fpcr $f3 + + _ITOFT2 X, $f0, 16, Y, $f1, 24 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + blt X, $x_is_neg + divt/c $f0, $f1, $f0 + + /* Check to see if Y was mis-converted as signed value. */ + ldt $f1, 8(sp) + blt Y, $y_is_neg + + /* Check to see if X fit in the double as an exact value. */ + srl X, 53, AT + bne AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert and clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f3 + _FTOIT $f0, RV, 16 + + ldt $f0, 0(sp) + ldt $f3, 48(sp) + cfi_remember_state + cfi_restore ($f0) + cfi_restore ($f1) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + lda sp, FRAME(sp) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state +$x_is_neg: + /* If we get here, X is so big that bit 63 is set, which made the + conversion come out negative. Fix it up lest we not even get + a good estimate. */ + ldah AT, 0x5f80 /* 2**64 as float. */ + stt $f2, 24(sp) + cfi_rel_offset ($f2, 24) + _ITOFS AT, $f2, 16 + + .align 4 + addt $f0, $f2, $f0 + unop + divt/c $f0, $f1, $f0 + unop + + /* Ok, we've now the divide issued. Continue with other checks. */ + ldt $f1, 8(sp) + unop + ldt $f2, 24(sp) + blt Y, $y_is_neg + cfi_restore ($f1) + cfi_restore ($f2) + cfi_remember_state /* for y_is_neg */ + + .align 4 +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + stq t0, 16(sp) + stq t1, 24(sp) + stq t2, 32(sp) + stq t3, 40(sp) + cfi_rel_offset (t0, 16) + cfi_rel_offset (t1, 24) + cfi_rel_offset (t2, 32) + cfi_rel_offset (t3, 40) + +#define Q RV /* quotient */ +#define R t0 /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + mulq Q, Y, QY + + .align 4 + stq t4, 8(sp) + excb + ldt $f0, 0(sp) + mt_fpcr $f3 + cfi_rel_offset (t4, 8) + cfi_restore ($f0) + + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + ldq t4, 8(sp) + ldq t0, 16(sp) + ldq t1, 24(sp) + ldq t2, 32(sp) + + ldq t3, 40(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore (t0) + cfi_restore (t1) + cfi_restore (t2) + cfi_restore (t3) + cfi_restore (t4) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 + cfi_restore_state +$y_is_neg: + /* If we get here, Y is so big that bit 63 is set. The results + from the divide will be completely wrong. Fortunately, the + quotient must be either 0 or 1, so just compute it directly. */ + cmpule Y, X, RV + excb + mt_fpcr $f3 + ldt $f0, 0(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_restore ($f0) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + cfi_endproc + .size __divqu, .-__divqu + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/dl-dtprocnum.h b/REORG.TODO/sysdeps/alpha/dl-dtprocnum.h new file mode 100644 index 0000000000..67845cdd62 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-dtprocnum.h @@ -0,0 +1,3 @@ +/* Number of extra dynamic section entries for this architecture. By + default there are none. */ +#define DT_THISPROCNUM DT_ALPHA_NUM diff --git a/REORG.TODO/sysdeps/alpha/dl-machine.h b/REORG.TODO/sysdeps/alpha/dl-machine.h new file mode 100644 index 0000000000..7580cd29b6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-machine.h @@ -0,0 +1,529 @@ +/* Machine-dependent ELF dynamic relocation inline functions. Alpha version. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This was written in the absence of an ABI -- don't expect + it to remain unchanged. */ + +#ifndef dl_machine_h +#define dl_machine_h 1 + +#define ELF_MACHINE_NAME "alpha" + +#include <string.h> + + +/* Mask identifying addresses reserved for the user program, + where the dynamic linker should not map anything. */ +#define ELF_MACHINE_USER_ADDRESS_MASK 0x120000000UL + +/* Translate a processor specific dynamic tag to the index in l_info array. */ +#define DT_ALPHA(x) (DT_ALPHA_##x - DT_LOPROC + DT_NUM) + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_ALPHA; +} + +/* Return the link-time address of _DYNAMIC. The multiple-got-capable + linker no longer allocates the first .got entry for this. But not to + worry, no special tricks are needed. */ +static inline Elf64_Addr +elf_machine_dynamic (void) +{ +#ifndef NO_AXP_MULTI_GOT_LD + return (Elf64_Addr) &_DYNAMIC; +#else + register Elf64_Addr *gp __asm__ ("$29"); + return gp[-4096]; +#endif +} + +/* Return the run-time load address of the shared object. */ + +static inline Elf64_Addr +elf_machine_load_address (void) +{ + /* This relies on the compiler using gp-relative addresses for static symbols. */ + static void *dot = ˙ + return (void *)&dot - dot; +} + +/* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + +static inline int +elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) +{ + extern char _dl_runtime_resolve_new[] attribute_hidden; + extern char _dl_runtime_profile_new[] attribute_hidden; + extern char _dl_runtime_resolve_old[] attribute_hidden; + extern char _dl_runtime_profile_old[] attribute_hidden; + + struct pltgot { + char *resolve; + struct link_map *link; + }; + + struct pltgot *pg; + long secureplt; + char *resolve; + + if (map->l_info[DT_JMPREL] == 0 || !lazy) + return lazy; + + /* Check to see if we're using the read-only plt form. */ + secureplt = map->l_info[DT_ALPHA(PLTRO)] != 0; + + /* If the binary uses the read-only secure plt format, PG points to + the .got.plt section, which is the right place for ld.so to place + its hooks. Otherwise, PG is currently pointing at the start of + the plt; the hooks go at offset 16. */ + pg = (struct pltgot *) D_PTR (map, l_info[DT_PLTGOT]); + pg += !secureplt; + + /* This function will be called to perform the relocation. They're + not declared as functions to convince the compiler to use gp + relative relocations for them. */ + if (secureplt) + resolve = _dl_runtime_resolve_new; + else + resolve = _dl_runtime_resolve_old; + + if (__builtin_expect (profile, 0)) + { + if (secureplt) + resolve = _dl_runtime_profile_new; + else + resolve = _dl_runtime_profile_old; + + if (GLRO(dl_profile) && _dl_name_match_p (GLRO(dl_profile), map)) + { + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = map; + } + } + + pg->resolve = resolve; + pg->link = map; + + return lazy; +} + +/* Initial entry point code for the dynamic linker. + The C function `_dl_start' is the real entry point; + its return value is the user program's entry point. */ + +#define RTLD_START asm ("\ + .section .text \n\ + .set at \n\ + .globl _start \n\ + .ent _start \n\ +_start: \n\ + .frame $31,0,$31,0 \n\ + br $gp, 0f \n\ +0: ldgp $gp, 0($gp) \n\ + .prologue 0 \n\ + /* Pass pointer to argument block to _dl_start. */ \n\ + mov $sp, $16 \n\ + bsr $26, _dl_start !samegp \n\ + .end _start \n\ + /* FALLTHRU */ \n\ + .globl _dl_start_user \n\ + .ent _dl_start_user \n\ +_dl_start_user: \n\ + .frame $31,0,$31,0 \n\ + .prologue 0 \n\ + /* Save the user entry point address in s0. */ \n\ + mov $0, $9 \n\ + /* See if we were run as a command with the executable \n\ + file name as an extra leading argument. */ \n\ + ldah $1, _dl_skip_args($gp) !gprelhigh \n\ + ldl $1, _dl_skip_args($1) !gprellow \n\ + bne $1, $fixup_stack \n\ +$fixup_stack_ret: \n\ + /* The special initializer gets called with the stack \n\ + just as the application's entry point will see it; \n\ + it can switch stacks if it moves these contents \n\ + over. */ \n\ +" RTLD_START_SPECIAL_INIT " \n\ + /* Call _dl_init(_dl_loaded, argc, argv, envp) to run \n\ + initializers. */ \n\ + ldah $16, _rtld_local($gp) !gprelhigh \n\ + ldq $16, _rtld_local($16) !gprellow \n\ + ldq $17, 0($sp) \n\ + lda $18, 8($sp) \n\ + s8addq $17, 8, $19 \n\ + addq $19, $18, $19 \n\ + bsr $26, _dl_init !samegp \n\ + /* Pass our finalizer function to the user in $0. */ \n\ + ldah $0, _dl_fini($gp) !gprelhigh \n\ + lda $0, _dl_fini($0) !gprellow \n\ + /* Jump to the user's entry point. */ \n\ + mov $9, $27 \n\ + jmp ($9) \n\ +$fixup_stack: \n\ + /* Adjust the stack pointer to skip _dl_skip_args words.\n\ + This involves copying everything down, since the \n\ + stack pointer must always be 16-byte aligned. */ \n\ + ldah $7, __GI__dl_argv($gp) !gprelhigh \n\ + ldq $2, 0($sp) \n\ + ldq $5, __GI__dl_argv($7) !gprellow \n\ + subq $31, $1, $6 \n\ + subq $2, $1, $2 \n\ + s8addq $6, $5, $5 \n\ + mov $sp, $4 \n\ + s8addq $1, $sp, $3 \n\ + stq $2, 0($sp) \n\ + stq $5, __GI__dl_argv($7) !gprellow \n\ + /* Copy down argv. */ \n\ +0: ldq $5, 8($3) \n\ + addq $4, 8, $4 \n\ + addq $3, 8, $3 \n\ + stq $5, 0($4) \n\ + bne $5, 0b \n\ + /* Copy down envp. */ \n\ +1: ldq $5, 8($3) \n\ + addq $4, 8, $4 \n\ + addq $3, 8, $3 \n\ + stq $5, 0($4) \n\ + bne $5, 1b \n\ + /* Copy down auxiliary table. */ \n\ +2: ldq $5, 8($3) \n\ + ldq $6, 16($3) \n\ + addq $4, 16, $4 \n\ + addq $3, 16, $3 \n\ + stq $5, -8($4) \n\ + stq $6, 0($4) \n\ + bne $5, 2b \n\ + br $fixup_stack_ret \n\ + .end _dl_start_user \n\ + .set noat \n\ +.previous"); + +#ifndef RTLD_START_SPECIAL_INIT +#define RTLD_START_SPECIAL_INIT /* nothing */ +#endif + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry + or TLS variables, so undefined references should not be allowed + to define the value. + + ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve + to one of the main executable's symbols, as for a COPY reloc. + This is unused on Alpha. */ + +# define elf_machine_type_class(type) \ + (((type) == R_ALPHA_JMP_SLOT \ + || (type) == R_ALPHA_DTPMOD64 \ + || (type) == R_ALPHA_DTPREL64 \ + || (type) == R_ALPHA_TPREL64) * ELF_RTYPE_CLASS_PLT) + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_ALPHA_JMP_SLOT + +/* The alpha never uses Elf64_Rel relocations. */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +/* We define an initialization functions. This is called very early in + * _dl_sysdep_start. */ +#define DL_PLATFORM_INIT dl_platform_init () + +static inline void __attribute__ ((unused)) +dl_platform_init (void) +{ + if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') + /* Avoid an empty string which would disturb us. */ + GLRO(dl_platform) = NULL; +} + +/* Fix up the instructions of a PLT entry to invoke the function + rather than the dynamic linker. */ +static inline Elf64_Addr +elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const Elf64_Rela *reloc, + Elf64_Addr *got_addr, Elf64_Addr value) +{ + const Elf64_Rela *rela_plt; + Elf64_Word *plte; + long int edisp; + + /* Store the value we are going to load. */ + *got_addr = value; + + /* If this binary uses the read-only secure plt format, we're done. */ + if (map->l_info[DT_ALPHA(PLTRO)]) + return value; + + /* Otherwise we have to modify the plt entry in place to do the branch. */ + + /* Recover the PLT entry address by calculating reloc's index into the + .rela.plt, and finding that entry in the .plt. */ + rela_plt = (const Elf64_Rela *) D_PTR (map, l_info[DT_JMPREL]); + plte = (Elf64_Word *) (D_PTR (map, l_info[DT_PLTGOT]) + 32); + plte += 3 * (reloc - rela_plt); + + /* Find the displacement from the plt entry to the function. */ + edisp = (long int) (value - (Elf64_Addr)&plte[3]) / 4; + + if (edisp >= -0x100000 && edisp < 0x100000) + { + /* If we are in range, use br to perfect branch prediction and + elide the dependency on the address load. This case happens, + e.g., when a shared library call is resolved to the same library. */ + + int hi, lo; + hi = value - (Elf64_Addr)&plte[0]; + lo = (short int) hi; + hi = (hi - lo) >> 16; + + /* Emit "lda $27,lo($27)" */ + plte[1] = 0x237b0000 | (lo & 0xffff); + + /* Emit "br $31,function" */ + plte[2] = 0xc3e00000 | (edisp & 0x1fffff); + + /* Think about thread-safety -- the previous instructions must be + committed to memory before the first is overwritten. */ + __asm__ __volatile__("wmb" : : : "memory"); + + /* Emit "ldah $27,hi($27)" */ + plte[0] = 0x277b0000 | (hi & 0xffff); + } + else + { + /* Don't bother with the hint since we already know the hint is + wrong. Eliding it prevents the wrong page from getting pulled + into the cache. */ + + int hi, lo; + hi = (Elf64_Addr)got_addr - (Elf64_Addr)&plte[0]; + lo = (short)hi; + hi = (hi - lo) >> 16; + + /* Emit "ldq $27,lo($27)" */ + plte[1] = 0xa77b0000 | (lo & 0xffff); + + /* Emit "jmp $31,($27)" */ + plte[2] = 0x6bfb0000; + + /* Think about thread-safety -- the previous instructions must be + committed to memory before the first is overwritten. */ + __asm__ __volatile__("wmb" : : : "memory"); + + /* Emit "ldah $27,hi($27)" */ + plte[0] = 0x277b0000 | (hi & 0xffff); + } + + /* At this point, if we've been doing runtime resolution, Icache is dirty. + This will be taken care of in _dl_runtime_resolve. If instead we are + doing this as part of non-lazy startup relocation, that bit of code + hasn't made it into Icache yet, so there's nothing to clean up. */ + + return value; +} + +/* Return the final value of a plt relocation. */ +static inline Elf64_Addr +elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + Elf64_Addr value) +{ + return value + reloc->r_addend; +} + +/* Names of the architecture-specific auditing callback functions. */ +#define ARCH_LA_PLTENTER alpha_gnu_pltenter +#define ARCH_LA_PLTEXIT alpha_gnu_pltexit + +#endif /* !dl_machine_h */ + +#ifdef RESOLVE_MAP + +/* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ +auto inline void +__attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, + void *const reloc_addr_arg, + int skip_ifunc) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; + unsigned long int const r_type = ELF64_R_TYPE (reloc->r_info); + +#if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC && !defined SHARED + /* This is defined in rtld.c, but nowhere in the static libc.a; make the + reference weak so static programs can still link. This declaration + cannot be done when compiling rtld.c (i.e. #ifdef RTLD_BOOTSTRAP) + because rtld.c contains the common defn for _dl_rtld_map, which is + incompatible with a weak decl in the same file. */ + weak_extern (_dl_rtld_map); +#endif + + /* We cannot use a switch here because we cannot locate the switch + jump table until we've self-relocated. */ + +#if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC + if (__builtin_expect (r_type == R_ALPHA_RELATIVE, 0)) + { +# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC + /* Already done in dynamic linker. */ + if (map != &GL(dl_rtld_map)) +# endif + { + /* XXX Make some timings. Maybe it's preferable to test for + unaligned access and only do it the complex way if necessary. */ + Elf64_Addr reloc_addr_val; + + /* Load value without causing unaligned trap. */ + memcpy (&reloc_addr_val, reloc_addr_arg, 8); + reloc_addr_val += map->l_addr; + + /* Store value without causing unaligned trap. */ + memcpy (reloc_addr_arg, &reloc_addr_val, 8); + } + } + else +#endif + if (__builtin_expect (r_type == R_ALPHA_NONE, 0)) + return; + else + { + struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); + Elf64_Addr sym_value; + Elf64_Addr sym_raw_value; + + sym_raw_value = sym_value = reloc->r_addend; + if (sym_map) + { + sym_raw_value += sym->st_value; + sym_value = sym_raw_value + sym_map->l_addr; + } + + if (r_type == R_ALPHA_GLOB_DAT) + *reloc_addr = sym_value; +#ifdef RESOLVE_CONFLICT_FIND_MAP + /* In .gnu.conflict section, R_ALPHA_JMP_SLOT relocations have + R_ALPHA_JMP_SLOT in lower 8 bits and the remaining 24 bits + are .rela.plt index. */ + else if ((r_type & 0xff) == R_ALPHA_JMP_SLOT) + { + /* elf_machine_fixup_plt needs the map reloc_addr points into, + while in _dl_resolve_conflicts map is _dl_loaded. */ + RESOLVE_CONFLICT_FIND_MAP (map, reloc_addr); + reloc = ((const Elf64_Rela *) D_PTR (map, l_info[DT_JMPREL])) + + (r_type >> 8); + elf_machine_fixup_plt (map, 0, reloc, reloc_addr, sym_value); + } +#else + else if (r_type == R_ALPHA_JMP_SLOT) + elf_machine_fixup_plt (map, 0, reloc, reloc_addr, sym_value); +#endif +#ifndef RTLD_BOOTSTRAP + else if (r_type == R_ALPHA_REFQUAD) + { + /* Store value without causing unaligned trap. */ + memcpy (reloc_addr_arg, &sym_value, 8); + } +#endif + else if (r_type == R_ALPHA_DTPMOD64) + { +# ifdef RTLD_BOOTSTRAP + /* During startup the dynamic linker is always index 1. */ + *reloc_addr = 1; +# else + /* Get the information from the link map returned by the + resolv function. */ + if (sym_map != NULL) + *reloc_addr = sym_map->l_tls_modid; +# endif + } + else if (r_type == R_ALPHA_DTPREL64) + { +# ifndef RTLD_BOOTSTRAP + /* During relocation all TLS symbols are defined and used. + Therefore the offset is already correct. */ + *reloc_addr = sym_raw_value; +# endif + } + else if (r_type == R_ALPHA_TPREL64) + { +# ifdef RTLD_BOOTSTRAP + *reloc_addr = sym_raw_value + map->l_tls_offset; +# else + if (sym_map) + { + CHECK_STATIC_TLS (map, sym_map); + *reloc_addr = sym_raw_value + sym_map->l_tls_offset; + } +# endif + } + else + _dl_reloc_bad_type (map, r_type, 0); + } +} + +/* Let do-rel.h know that on Alpha if l_addr is 0, all RELATIVE relocs + can be skipped. */ +#define ELF_MACHINE_REL_RELATIVE 1 + +auto inline void +__attribute__ ((always_inline)) +elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +{ + /* XXX Make some timings. Maybe it's preferable to test for + unaligned access and only do it the complex way if necessary. */ + Elf64_Addr reloc_addr_val; + + /* Load value without causing unaligned trap. */ + memcpy (&reloc_addr_val, reloc_addr_arg, 8); + reloc_addr_val += l_addr; + + /* Store value without causing unaligned trap. */ + memcpy (reloc_addr_arg, &reloc_addr_val, 8); +} + +auto inline void +__attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) +{ + Elf64_Addr * const reloc_addr = (void *)(l_addr + reloc->r_offset); + unsigned long int const r_type = ELF64_R_TYPE (reloc->r_info); + + if (r_type == R_ALPHA_JMP_SLOT) + { + /* Perform a RELATIVE reloc on the .got entry that transfers + to the .plt. */ + *reloc_addr += l_addr; + } + else if (r_type == R_ALPHA_NONE) + return; + else + _dl_reloc_bad_type (map, r_type, 1); +} + +#endif /* RESOLVE_MAP */ diff --git a/REORG.TODO/sysdeps/alpha/dl-procinfo.c b/REORG.TODO/sysdeps/alpha/dl-procinfo.c new file mode 100644 index 0000000000..7a2e18a978 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-procinfo.c @@ -0,0 +1,63 @@ +/* Data for Alpha version of processor capability information. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Aurelien Jarno <aurelien@aurel32.net>, 2008. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This information must be kept in sync with the _DL_PLATFORM_COUNT + definitions in procinfo.h. + + If anything should be added here check whether the size of each string + is still ok with the given array size. + + All the #ifdefs in the definitions are quite irritating but + necessary if we want to avoid duplicating the information. There + are three different modes: + + - PROCINFO_DECL is defined. This means we are only interested in + declarations. + + - PROCINFO_DECL is not defined: + + + if SHARED is defined the file is included in an array + initializer. The .element = { ... } syntax is needed. + + + if SHARED is not defined a normal array initialization is + needed. + */ + +#ifndef PROCINFO_CLASS +#define PROCINFO_CLASS +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_alpha_platforms +#else +PROCINFO_CLASS const char _dl_alpha_platforms[5][5] +#endif +#ifndef PROCINFO_DECL += { + "ev4", "ev5", "ev56", "ev6", "ev67" + } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif + +#undef PROCINFO_DECL +#undef PROCINFO_CLASS diff --git a/REORG.TODO/sysdeps/alpha/dl-procinfo.h b/REORG.TODO/sysdeps/alpha/dl-procinfo.h new file mode 100644 index 0000000000..ed7a66ee49 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-procinfo.h @@ -0,0 +1,60 @@ +/* Alpha version of processor capability information handling macros. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Aurelien Jarno <aurelien@aurel32.net>, 2008. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_PROCINFO_H +#define _DL_PROCINFO_H 1 + +#include <ldsodefs.h> + + +/* Mask to filter out platforms. */ +#define _DL_HWCAP_PLATFORM (-1ULL) + +#define _DL_PLATFORMS_COUNT 5 + +static inline int +__attribute__ ((unused, always_inline)) +_dl_string_platform (const char *str) +{ + int i; + + if (str != NULL) + for (i = 0; i < _DL_PLATFORMS_COUNT; ++i) + { + if (strcmp (str, GLRO(dl_alpha_platforms)[i]) == 0) + return i; + } + return -1; +}; + +/* We cannot provide a general printing function. */ +#define _dl_procinfo(type, word) -1 + +/* There are no hardware capabilities defined. */ +#define _dl_hwcap_string(idx) "" + +/* By default there is no important hardware capability. */ +#define HWCAP_IMPORTANT (0) + +/* We don't have any hardware capabilities. */ +#define _DL_HWCAP_COUNT 0 + +#define _dl_string_hwcap(str) (-1) + +#endif /* dl-procinfo.h */ diff --git a/REORG.TODO/sysdeps/alpha/dl-sysdep.h b/REORG.TODO/sysdeps/alpha/dl-sysdep.h new file mode 100644 index 0000000000..67a90f1bd0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-sysdep.h @@ -0,0 +1,23 @@ +/* System-specific settings for dynamic linker code. Alpha version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include_next <dl-sysdep.h> + +/* _dl_argv cannot be attribute_relro, because _dl_start_user + might write into it after _dl_start returns. */ +#define DL_ARGV_NOT_RELRO 1 diff --git a/REORG.TODO/sysdeps/alpha/dl-tls.h b/REORG.TODO/sysdeps/alpha/dl-tls.h new file mode 100644 index 0000000000..cac369c9d3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-tls.h @@ -0,0 +1,27 @@ +/* Thread-local storage handling in the ELF dynamic linker. Alpha version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + + +/* Type used for the representation of TLS information in the GOT. */ +typedef struct +{ + unsigned long int ti_module; + unsigned long int ti_offset; +} tls_index; + +extern void *__tls_get_addr (tls_index *ti); diff --git a/REORG.TODO/sysdeps/alpha/dl-trampoline.S b/REORG.TODO/sysdeps/alpha/dl-trampoline.S new file mode 100644 index 0000000000..f527705d27 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-trampoline.S @@ -0,0 +1,540 @@ +/* PLT trampolines. Alpha version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .set noat + +.macro savei regno, offset + stq $\regno, \offset($30) + cfi_rel_offset(\regno, \offset) +.endm + +.macro savef regno, offset + stt $f\regno, \offset($30) + cfi_rel_offset(\regno+32, \offset) +.endm + + .align 4 + .globl _dl_runtime_resolve_new + .ent _dl_runtime_resolve_new + +#undef FRAMESIZE +#define FRAMESIZE 14*8 + +_dl_runtime_resolve_new: + .frame $30, FRAMESIZE, $26, 0 + .mask 0x4000000, 0 + + ldah $29, 0($27) !gpdisp!1 + lda $30, -FRAMESIZE($30) + stq $26, 0*8($30) + stq $16, 2*8($30) + + stq $17, 3*8($30) + lda $29, 0($29) !gpdisp!1 + stq $18, 4*8($30) + mov $28, $16 /* link_map from .got.plt */ + + stq $19, 5*8($30) + mov $25, $17 /* offset of reloc entry */ + stq $20, 6*8($30) + mov $26, $18 /* return address */ + + stq $21, 7*8($30) + stt $f16, 8*8($30) + stt $f17, 9*8($30) + stt $f18, 10*8($30) + + stt $f19, 11*8($30) + stt $f20, 12*8($30) + stt $f21, 13*8($30) + .prologue 2 + + bsr $26, _dl_fixup !samegp + mov $0, $27 + + ldq $26, 0*8($30) + ldq $16, 2*8($30) + ldq $17, 3*8($30) + ldq $18, 4*8($30) + ldq $19, 5*8($30) + ldq $20, 6*8($30) + ldq $21, 7*8($30) + ldt $f16, 8*8($30) + ldt $f17, 9*8($30) + ldt $f18, 10*8($30) + ldt $f19, 11*8($30) + ldt $f20, 12*8($30) + ldt $f21, 13*8($30) + lda $30, FRAMESIZE($30) + jmp $31, ($27), 0 + .end _dl_runtime_resolve_new + + .globl _dl_runtime_profile_new + .type _dl_runtime_profile_new, @function + +#undef FRAMESIZE +#define FRAMESIZE 20*8 + + /* We save the registers in a different order than desired by + .mask/.fmask, so we have to use explicit cfi directives. */ + cfi_startproc + +_dl_runtime_profile_new: + ldah $29, 0($27) !gpdisp!2 + lda $30, -FRAMESIZE($30) + savei 26, 0*8 + stq $16, 2*8($30) + + stq $17, 3*8($30) + lda $29, 0($29) !gpdisp!2 + stq $18, 4*8($30) + lda $1, FRAMESIZE($30) /* incoming sp value */ + + stq $1, 1*8($30) + stq $19, 5*8($30) + stq $20, 6*8($30) + mov $28, $16 /* link_map from .got.plt */ + + stq $21, 7*8($30) + mov $25, $17 /* offset of reloc entry */ + stt $f16, 8*8($30) + mov $26, $18 /* return address */ + + stt $f17, 9*8($30) + mov $30, $19 /* La_alpha_regs address */ + stt $f18, 10*8($30) + lda $20, 14*8($30) /* framesize address */ + + stt $f19, 11*8($30) + stt $f20, 12*8($30) + stt $f21, 13*8($30) + stq $28, 16*8($30) + stq $25, 17*8($30) + + bsr $26, _dl_profile_fixup !samegp + mov $0, $27 + + /* Discover if we're wrapping this call. */ + ldq $18, 14*8($30) + bge $18, 1f + + ldq $26, 0*8($30) + ldq $16, 2*8($30) + ldq $17, 3*8($30) + ldq $18, 4*8($30) + ldq $19, 5*8($30) + ldq $20, 6*8($30) + ldq $21, 7*8($30) + ldt $f16, 8*8($30) + ldt $f17, 9*8($30) + ldt $f18, 10*8($30) + ldt $f19, 11*8($30) + ldt $f20, 12*8($30) + ldt $f21, 13*8($30) + lda $30, FRAMESIZE($30) + jmp $31, ($27), 0 + +1: + /* Create a frame pointer and allocate a new argument frame. */ + savei 15, 15*8 + mov $30, $15 + cfi_def_cfa_register (15) + addq $18, 15, $18 + bic $18, 15, $18 + subq $30, $18, $30 + + /* Save the call destination around memcpy. */ + stq $0, 14*8($30) + + /* Copy the stack arguments into place. */ + lda $16, 0($30) + lda $17, FRAMESIZE($15) + jsr $26, memcpy + ldgp $29, 0($26) + + /* Reload the argument registers. */ + ldq $27, 14*8($30) + ldq $16, 2*8($15) + ldq $17, 3*8($15) + ldq $18, 4*8($15) + ldq $19, 5*8($15) + ldq $20, 6*8($15) + ldq $21, 7*8($15) + ldt $f16, 8*8($15) + ldt $f17, 9*8($15) + ldt $f18, 10*8($15) + ldt $f19, 11*8($15) + ldt $f20, 12*8($15) + ldt $f21, 13*8($15) + + jsr $26, ($27), 0 + ldgp $29, 0($26) + + /* Set up for call to _dl_call_pltexit. */ + ldq $16, 16*8($15) + ldq $17, 17*8($15) + stq $0, 16*8($15) + lda $18, 0($15) + stq $1, 17*8($15) + lda $19, 16*8($15) + stt $f0, 18*8($15) + stt $f1, 19*8($15) + bsr $26, _dl_call_pltexit !samegp + + mov $15, $30 + cfi_def_cfa_register (30) + ldq $26, 0($30) + ldq $15, 15*8($30) + lda $30, FRAMESIZE($30) + ret + + cfi_endproc + .size _dl_runtime_profile_new, .-_dl_runtime_profile_new + + .align 4 + .globl _dl_runtime_resolve_old + .ent _dl_runtime_resolve_old + +#undef FRAMESIZE +#define FRAMESIZE 44*8 + +_dl_runtime_resolve_old: + lda $30, -FRAMESIZE($30) + .frame $30, FRAMESIZE, $26 + /* Preserve all registers that C normally doesn't. */ + stq $26, 0*8($30) + stq $0, 1*8($30) + stq $1, 2*8($30) + stq $2, 3*8($30) + stq $3, 4*8($30) + stq $4, 5*8($30) + stq $5, 6*8($30) + stq $6, 7*8($30) + stq $7, 8*8($30) + stq $8, 9*8($30) + stq $16, 10*8($30) + stq $17, 11*8($30) + stq $18, 12*8($30) + stq $19, 13*8($30) + stq $20, 14*8($30) + stq $21, 15*8($30) + stq $22, 16*8($30) + stq $23, 17*8($30) + stq $24, 18*8($30) + stq $25, 19*8($30) + stq $29, 20*8($30) + stt $f0, 21*8($30) + stt $f1, 22*8($30) + stt $f10, 23*8($30) + stt $f11, 24*8($30) + stt $f12, 25*8($30) + stt $f13, 26*8($30) + stt $f14, 27*8($30) + stt $f15, 28*8($30) + stt $f16, 29*8($30) + stt $f17, 30*8($30) + stt $f18, 31*8($30) + stt $f19, 32*8($30) + stt $f20, 33*8($30) + stt $f21, 34*8($30) + stt $f22, 35*8($30) + stt $f23, 36*8($30) + stt $f24, 37*8($30) + stt $f25, 38*8($30) + stt $f26, 39*8($30) + stt $f27, 40*8($30) + stt $f28, 41*8($30) + stt $f29, 42*8($30) + stt $f30, 43*8($30) + .mask 0x27ff01ff, -FRAMESIZE + .fmask 0xfffffc03, -FRAMESIZE+21*8 + /* Set up our GP. */ + br $29, .+4 + ldgp $29, 0($29) + .prologue 0 + /* Set up the arguments for _dl_fixup: + $16 = link_map out of plt0 + $17 = offset of reloc entry = ($28 - $27 - 20) /12 * 24 + $18 = return address + */ + subq $28, $27, $17 + ldq $16, 8($27) + subq $17, 20, $17 + mov $26, $18 + addq $17, $17, $17 + bsr $26, _dl_fixup !samegp + + /* Move the destination address into position. */ + mov $0, $27 + /* Restore program registers. */ + ldq $26, 0*8($30) + ldq $0, 1*8($30) + ldq $1, 2*8($30) + ldq $2, 3*8($30) + ldq $3, 4*8($30) + ldq $4, 5*8($30) + ldq $5, 6*8($30) + ldq $6, 7*8($30) + ldq $7, 8*8($30) + ldq $8, 9*8($30) + ldq $16, 10*8($30) + ldq $17, 11*8($30) + ldq $18, 12*8($30) + ldq $19, 13*8($30) + ldq $20, 14*8($30) + ldq $21, 15*8($30) + ldq $22, 16*8($30) + ldq $23, 17*8($30) + ldq $24, 18*8($30) + ldq $25, 19*8($30) + ldq $29, 20*8($30) + ldt $f0, 21*8($30) + ldt $f1, 22*8($30) + ldt $f10, 23*8($30) + ldt $f11, 24*8($30) + ldt $f12, 25*8($30) + ldt $f13, 26*8($30) + ldt $f14, 27*8($30) + ldt $f15, 28*8($30) + ldt $f16, 29*8($30) + ldt $f17, 30*8($30) + ldt $f18, 31*8($30) + ldt $f19, 32*8($30) + ldt $f20, 33*8($30) + ldt $f21, 34*8($30) + ldt $f22, 35*8($30) + ldt $f23, 36*8($30) + ldt $f24, 37*8($30) + ldt $f25, 38*8($30) + ldt $f26, 39*8($30) + ldt $f27, 40*8($30) + ldt $f28, 41*8($30) + ldt $f29, 42*8($30) + ldt $f30, 43*8($30) + /* Flush the Icache after having modified the .plt code. */ + imb + /* Clean up and turn control to the destination */ + lda $30, FRAMESIZE($30) + jmp $31, ($27) + + .end _dl_runtime_resolve_old + + .globl _dl_runtime_profile_old + .usepv _dl_runtime_profile_old, no + .type _dl_runtime_profile_old, @function + + /* We save the registers in a different order than desired by + .mask/.fmask, so we have to use explicit cfi directives. */ + cfi_startproc + +#undef FRAMESIZE +#define FRAMESIZE 50*8 + + .align 4 +_dl_runtime_profile_old: + lda $30, -FRAMESIZE($30) + cfi_adjust_cfa_offset (FRAMESIZE) + + /* Preserve all argument registers. This also constructs the + La_alpha_regs structure. */ + savei 26, 0*8 + savei 16, 2*8 + savei 17, 3*8 + savei 18, 4*8 + savei 19, 5*8 + savei 20, 6*8 + savei 21, 7*8 + lda $16, FRAMESIZE($30) + savef 16, 8*8 + savef 17, 9*8 + savef 18, 10*8 + savef 19, 11*8 + savef 20, 12*8 + savef 21, 13*8 + stq $16, 1*8($30) + + /* Preserve all registers that C normally doesn't. */ + savei 0, 14*8 + savei 1, 15*8 + savei 2, 16*8 + savei 3, 17*8 + savei 4, 18*8 + savei 5, 19*8 + savei 6, 20*8 + savei 7, 21*8 + savei 8, 22*8 + savei 22, 23*8 + savei 23, 24*8 + savei 24, 25*8 + savei 25, 26*8 + savei 29, 27*8 + savef 0, 28*8 + savef 1, 29*8 + savef 10, 30*8 + savef 11, 31*8 + savef 12, 32*8 + savef 13, 33*8 + savef 14, 34*8 + savef 15, 35*8 + savef 22, 36*8 + savef 23, 37*8 + savef 24, 38*8 + savef 25, 39*8 + savef 26, 40*8 + savef 27, 41*8 + savef 28, 42*8 + savef 29, 43*8 + savef 30, 44*8 + + /* Set up our GP. */ + br $29, .+4 + ldgp $29, 0($29) + + /* Set up the arguments for _dl_profile_fixup: + $16 = link_map out of plt0 + $17 = offset of reloc entry = ($28 - $27 - 20) /12 * 24 + $18 = return address + $19 = La_alpha_regs address + $20 = framesize address + */ + subq $28, $27, $17 + ldq $16, 8($27) + subq $17, 20, $17 + mov $26, $18 + addq $17, $17, $17 + lda $19, 0($30) + lda $20, 45*8($30) + stq $16, 48*8($30) + stq $17, 49*8($30) + + bsr $26, _dl_profile_fixup !samegp + + /* Discover if we're wrapping this call. */ + ldq $18, 45*8($30) + bge $18, 1f + + /* Move the destination address into position. */ + mov $0, $27 + /* Restore program registers. */ + ldq $26, 0*8($30) + ldq $16, 2*8($30) + ldq $17, 3*8($30) + ldq $18, 4*8($30) + ldq $19, 5*8($30) + ldq $20, 6*8($30) + ldq $21, 7*8($30) + ldt $f16, 8*8($30) + ldt $f17, 9*8($30) + ldt $f18, 10*8($30) + ldt $f19, 11*8($30) + ldt $f20, 12*8($30) + ldt $f21, 13*8($30) + ldq $0, 14*8($30) + ldq $1, 15*8($30) + ldq $2, 16*8($30) + ldq $3, 17*8($30) + ldq $4, 18*8($30) + ldq $5, 19*8($30) + ldq $6, 20*8($30) + ldq $7, 21*8($30) + ldq $8, 22*8($30) + ldq $22, 23*8($30) + ldq $23, 24*8($30) + ldq $24, 25*8($30) + ldq $25, 26*8($30) + ldq $29, 27*8($30) + ldt $f0, 28*8($30) + ldt $f1, 29*8($30) + ldt $f10, 30*8($30) + ldt $f11, 31*8($30) + ldt $f12, 32*8($30) + ldt $f13, 33*8($30) + ldt $f14, 34*8($30) + ldt $f15, 35*8($30) + ldt $f22, 36*8($30) + ldt $f23, 37*8($30) + ldt $f24, 38*8($30) + ldt $f25, 39*8($30) + ldt $f26, 40*8($30) + ldt $f27, 41*8($30) + ldt $f28, 42*8($30) + ldt $f29, 43*8($30) + ldt $f30, 44*8($30) + + /* Clean up and turn control to the destination. */ + lda $30, FRAMESIZE($30) + jmp $31, ($27) + +1: + /* Create a frame pointer and allocate a new argument frame. */ + savei 15, 45*8 + mov $30, $15 + cfi_def_cfa_register (15) + addq $18, 15, $18 + bic $18, 15, $18 + subq $30, $18, $30 + + /* Save the call destination around memcpy. */ + stq $0, 46*8($30) + + /* Copy the stack arguments into place. */ + lda $16, 0($30) + lda $17, FRAMESIZE($15) + jsr $26, memcpy + ldgp $29, 0($26) + + /* Reload the argument registers. */ + ldq $27, 46*8($30) + ldq $16, 2*8($15) + ldq $17, 3*8($15) + ldq $18, 4*8($15) + ldq $19, 5*8($15) + ldq $20, 6*8($15) + ldq $21, 7*8($15) + ldt $f16, 8*8($15) + ldt $f17, 9*8($15) + ldt $f18, 10*8($15) + ldt $f19, 11*8($15) + ldt $f20, 12*8($15) + ldt $f21, 13*8($15) + + jsr $26, ($27), 0 + ldgp $29, 0($26) + + /* Set up for call to _dl_call_pltexit. */ + ldq $16, 48*8($15) + ldq $17, 49*8($15) + stq $0, 46*8($15) + lda $18, 0($15) + stq $1, 47*8($15) + lda $19, 46*8($15) + stt $f0, 48*8($15) + stt $f1, 49*8($15) + bsr $26, _dl_call_pltexit !samegp + + mov $15, $30 + cfi_def_cfa_register (30) + ldq $26, 0($30) + ldq $15, 45*8($30) + lda $30, FRAMESIZE($30) + ret + + cfi_endproc + .size _dl_runtime_profile_old, .-_dl_runtime_profile_old diff --git a/REORG.TODO/sysdeps/alpha/ffs.S b/REORG.TODO/sysdeps/alpha/ffs.S new file mode 100644 index 0000000000..8cd7e5123a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/ffs.S @@ -0,0 +1,91 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds the first bit set in an integer. Optimized for the Alpha + architecture. */ + +#include <sysdep.h> + + .set noreorder + .set noat + + +ENTRY(__ffs) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 + zap $16, 0xF0, $16 + br $ffsl..ng +#else + .prologue 0 + zap $16, 0xF0, $16 + # FALLTHRU +#endif +END(__ffs) + + .align 4 +ENTRY(ffsl) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +$ffsl..ng: +#else + .prologue 0 +#endif + not $16, $1 # e0 : + ldi $2, -1 # .. e1 : + cmpbge $1, $2, $3 # e0 : bit N == 1 for byte N == 0 + clr $0 # .. e1 : + addq $3, 1, $4 # e0 : + bic $4, $3, $3 # e1 : bit N == 1 for first byte N != 0 + and $3, 0xF0, $4 # e0 : + and $3, 0xCC, $5 # .. e1 : + and $3, 0xAA, $6 # e0 : + cmovne $4, 4, $0 # .. e1 : + cmovne $5, 2, $5 # e0 : + cmovne $6, 1, $6 # .. e1 : + addl $0, $5, $0 # e0 : + addl $0, $6, $0 # e1 : $0 == N + extbl $16, $0, $1 # e0 : $1 == byte N + ldi $2, 1 # .. e1 : + negq $1, $3 # e0 : + and $3, $1, $3 # e1 : bit N == least bit set of byte N + and $3, 0xF0, $4 # e0 : + and $3, 0xCC, $5 # .. e1 : + and $3, 0xAA, $6 # e0 : + cmovne $4, 5, $2 # .. e1 : + cmovne $5, 2, $5 # e0 : + cmovne $6, 1, $6 # .. e1 : + s8addl $0, $2, $0 # e0 : mult byte ofs by 8 and sum + addl $5, $6, $5 # .. e1 : + addl $0, $5, $0 # e0 : + nop # .. e1 : + cmoveq $16, 0, $0 # e0 : trap input == 0 case. + ret # .. e1 : 18 + +END(ffsl) + +weak_alias (__ffs, ffs) +libc_hidden_def (__ffs) +libc_hidden_builtin_def (ffs) +weak_extern (ffsl) +weak_alias (ffsl, ffsll) diff --git a/REORG.TODO/sysdeps/alpha/ffsll.S b/REORG.TODO/sysdeps/alpha/ffsll.S new file mode 100644 index 0000000000..b2f46d899c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/ffsll.S @@ -0,0 +1 @@ +/* This function is defined in ffs.S. */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/Versions b/REORG.TODO/sysdeps/alpha/fpu/Versions new file mode 100644 index 0000000000..c9b0e03a91 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/Versions @@ -0,0 +1,23 @@ +libc { + GLIBC_2.0 { + # functions used in other libraries + __ieee_get_fp_control; __ieee_set_fp_control; + } +} +libm { + GLIBC_2.3.4 { + # functions implementing old complex float abi + __c1_cabsf; __c1_cacosf; __c1_cacoshf; __c1_cargf; __c1_casinf; + __c1_casinhf; __c1_catanf; __c1_catanhf; __c1_ccosf; __c1_ccoshf; + __c1_cexpf; __c1_cimagf; __c1_clog10f; __c1_clogf; __c1_conjf; + __c1_cpowf; __c1_cprojf; __c1_crealf; __c1_csinf; __c1_csinhf; + __c1_csqrtf; __c1_ctanf; __c1_ctanhf; + + # functions implementing new complex float abi + cabsf; cacosf; cacoshf; cargf; casinf; + casinhf; catanf; catanhf; ccosf; ccoshf; + cexpf; cimagf; clog10f; clogf; conjf; + cpowf; cprojf; crealf; csinf; csinhf; + csqrtf; ctanf; ctanhf; + } +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/bits/fenv.h b/REORG.TODO/sysdeps/alpha/fpu/bits/fenv.h new file mode 100644 index 0000000000..94ca4a4da0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/bits/fenv.h @@ -0,0 +1,141 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_H +# error "Never use <bits/fenv.h> directly; include <fenv.h> instead." +#endif + + +/* Define the bits representing the exception. + + Note that these are the bit positions as defined by the OSF/1 + ieee_{get,set}_control_word interface and not by the hardware fpcr. + + See the Alpha Architecture Handbook section 4.7.7.3 for details, + but in summary, trap shadows mean the hardware register can acquire + extra exception bits so for proper IEEE support the tracking has to + be done in software -- in this case with kernel support. + + As to why the system call interface isn't in the same format as + the hardware register, only those crazy folks at DEC can tell you. */ + +enum + { +#ifdef __USE_GNU + FE_DENORMAL = +#define FE_DENORMAL (1 << 22) + FE_DENORMAL, +#endif + + FE_INEXACT = +#define FE_INEXACT (1 << 21) + FE_INEXACT, + + FE_UNDERFLOW = +#define FE_UNDERFLOW (1 << 20) + FE_UNDERFLOW, + + FE_OVERFLOW = +#define FE_OVERFLOW (1 << 19) + FE_OVERFLOW, + + FE_DIVBYZERO = +#define FE_DIVBYZERO (1 << 18) + FE_DIVBYZERO, + + FE_INVALID = +#define FE_INVALID (1 << 17) + FE_INVALID, + + FE_ALL_EXCEPT = +#define FE_ALL_EXCEPT (0x3f << 17) + FE_ALL_EXCEPT + }; + +/* Alpha chips support all four defined rouding modes. + + Note that code must be compiled to use dynamic rounding (/d) instructions + to see these changes. For gcc this is -mfp-rounding-mode=d; for DEC cc + this is -fprm d. The default for both is static rounding to nearest. + + These are shifted down 58 bits from the hardware fpcr because the + functions are declared to take integers. */ + +enum + { + FE_TOWARDZERO = +#define FE_TOWARDZERO 0 + FE_TOWARDZERO, + + FE_DOWNWARD = +#define FE_DOWNWARD 1 + FE_DOWNWARD, + + FE_TONEAREST = +#define FE_TONEAREST 2 + FE_TONEAREST, + + FE_UPWARD = +#define FE_UPWARD 3 + FE_UPWARD, + }; + +#ifdef __USE_GNU +/* On later hardware, and later kernels for earlier hardware, we can forcibly + underflow denormal inputs and outputs. This can speed up certain programs + significantly, usually without affecting accuracy. */ +enum + { + FE_MAP_DMZ = 1UL << 12, /* Map denorm inputs to zero */ +#define FE_MAP_DMZ FE_MAP_DMZ + + FE_MAP_UMZ = 1UL << 13, /* Map underflowed outputs to zero */ +#define FE_MAP_UMZ FE_MAP_UMZ + }; +#endif + +/* Type representing exception flags. */ +typedef unsigned long int fexcept_t; + +/* Type representing floating-point environment. */ +typedef unsigned long int fenv_t; + +/* If the default argument is used we use this value. Note that due to + architecture-specified page mappings, no user-space pointer will ever + have its two high bits set. Co-opt one. */ +#define FE_DFL_ENV ((const fenv_t *) 0x8800000000000000UL) + +#ifdef __USE_GNU +/* Floating-point environment where none of the exceptions are masked. */ +# define FE_NOMASK_ENV ((const fenv_t *) 0x880000000000003eUL) + +/* Floating-point environment with (processor-dependent) non-IEEE floating + point. In this case, mapping denormals to zero. */ +# define FE_NONIEEE_ENV ((const fenv_t *) 0x8800000000003000UL) +#endif + +/* The system calls to talk to the kernel's FP code. */ +extern unsigned long int __ieee_get_fp_control (void) __THROW; +extern void __ieee_set_fp_control (unsigned long int __value) __THROW; + +#if __GLIBC_USE (IEC_60559_BFP_EXT) +/* Type representing floating-point control modes. */ +typedef unsigned long int femode_t; + +/* Default floating-point control modes. */ +# define FE_DFL_MODE ((const femode_t *) 0x8800000000000000UL) +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/bits/mathinline.h b/REORG.TODO/sysdeps/alpha/fpu/bits/mathinline.h new file mode 100644 index 0000000000..00c8c42a83 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/bits/mathinline.h @@ -0,0 +1,125 @@ +/* Inline math functions for Alpha. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David Mosberger-Tang. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/mathinline.h> directly; include <math.h> instead." +#endif + +#ifndef __extern_inline +# define __MATH_INLINE __inline +#else +# define __MATH_INLINE __extern_inline +#endif + +#if defined __USE_ISOC99 && defined __GNUC__ && !__GNUC_PREREQ(3,0) +# undef isgreater +# undef isgreaterequal +# undef isless +# undef islessequal +# undef islessgreater +# undef isunordered +# define isunordered(u, v) \ + (__extension__ \ + ({ double __r, __u = (u), __v = (v); \ + __asm ("cmptun/su %1,%2,%0\n\ttrapb" \ + : "=&f" (__r) : "f" (__u), "f"(__v)); \ + __r != 0; })) +#endif /* ISO C99 */ + +#if (!defined __NO_MATH_INLINES || defined __LIBC_INTERNAL_MATH_INLINES) \ + && defined __OPTIMIZE__ + +#if !__GNUC_PREREQ (4, 0) +# define __inline_copysign(NAME, TYPE) \ +__MATH_INLINE TYPE \ +__NTH (NAME (TYPE __x, TYPE __y)) \ +{ \ + TYPE __z; \ + __asm ("cpys %1, %2, %0" : "=f" (__z) : "f" (__y), "f" (__x)); \ + return __z; \ +} + +__inline_copysign (__copysignf, float) +__inline_copysign (copysignf, float) +__inline_copysign (__copysign, double) +__inline_copysign (copysign, double) + +# undef __inline_copysign +#endif + + +#if !__GNUC_PREREQ (2, 8) +# define __inline_fabs(NAME, TYPE) \ +__MATH_INLINE TYPE \ +__NTH (NAME (TYPE __x)) \ +{ \ + TYPE __z; \ + __asm ("cpys $f31, %1, %0" : "=f" (__z) : "f" (__x)); \ + return __z; \ +} + +__inline_fabs (__fabsf, float) +__inline_fabs (fabsf, float) +__inline_fabs (__fabs, double) +__inline_fabs (fabs, double) + +# undef __inline_fabs +#endif + +#ifdef __USE_ISOC99 + +/* Test for negative number. Used in the signbit() macro. */ +__MATH_INLINE int +__NTH (__signbitf (float __x)) +{ +#if !__GNUC_PREREQ (4, 0) + __extension__ union { float __f; int __i; } __u = { __f: __x }; + return __u.__i < 0; +#else + return __builtin_signbitf (__x); +#endif +} + +__MATH_INLINE int +__NTH (__signbit (double __x)) +{ +#if !__GNUC_PREREQ (4, 0) + __extension__ union { double __d; long __i; } __u = { __d: __x }; + return __u.__i < 0; +#else + return __builtin_signbit (__x); +#endif +} + +__MATH_INLINE int +__NTH (__signbitl (long double __x)) +{ +#if !__GNUC_PREREQ (4, 0) + __extension__ union { + long double __d; + long __i[sizeof(long double)/sizeof(long)]; + } __u = { __d: __x }; + return __u.__i[sizeof(long double)/sizeof(long) - 1] < 0; +#else + return __builtin_signbitl (__x); +#endif +} +#endif /* C99 */ + +#endif /* __NO_MATH_INLINES */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/cabsf.c b/REORG.TODO/sysdeps/alpha/fpu/cabsf.c new file mode 100644 index 0000000000..2ffd6a327d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/cabsf.c @@ -0,0 +1,41 @@ +/* Return the complex absolute value of float complex value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cabsf __cabsf_not_defined +#define cabsf cabsf_not_defined + +#include <complex.h> +#include <math.h> +#include "cfloat-compat.h" + +#undef __cabsf +#undef cabsf + +float +__c1_cabsf (c1_cfloat_decl (z)) +{ + return __hypotf (c1_cfloat_real (z), c1_cfloat_imag (z)); +} + +float +__c2_cabsf (c2_cfloat_decl (z)) +{ + return __hypotf (c2_cfloat_real (z), c2_cfloat_imag (z)); +} + +cfloat_versions (cabsf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/cargf.c b/REORG.TODO/sysdeps/alpha/fpu/cargf.c new file mode 100644 index 0000000000..6bff8a57f9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/cargf.c @@ -0,0 +1,41 @@ +/* Compute argument of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cargf __cargf_not_defined +#define cargf cargf_not_defined + +#include <complex.h> +#include <math.h> +#include "cfloat-compat.h" + +#undef __cargf +#undef cargf + +float +__c1_cargf (c1_cfloat_decl (x)) +{ + return __atan2f (c1_cfloat_imag (x), c1_cfloat_real (x)); +} + +float +__c2_cargf (c2_cfloat_decl (x)) +{ + return __atan2f (c2_cfloat_imag (x), c2_cfloat_real (x)); +} + +cfloat_versions (cargf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/cfloat-compat.h b/REORG.TODO/sysdeps/alpha/fpu/cfloat-compat.h new file mode 100644 index 0000000000..484cdd0df2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/cfloat-compat.h @@ -0,0 +1,58 @@ +/* Compatibility macros for old and new Alpha complex float ABI. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* The behaviour of complex float changed between GCC 3.3 and 3.4. + + In 3.3 and before (below, complex version 1, or "c1"), complex float + values were packed into one floating point register. + + In 3.4 and later (below, complex version 2, or "c2"), GCC changed to + follow the official Tru64 ABI, which passes the components of a complex + as separate parameters. */ + +typedef union { double d; _Complex float cf; } c1_compat; +# define c1_cfloat_decl(x) double x +# define c1_cfloat_real(x) __real__ c1_cfloat_value (x) +# define c1_cfloat_imag(x) __imag__ c1_cfloat_value (x) +# define c1_cfloat_value(x) (((c1_compat *)(void *)&x)->cf) +# define c1_cfloat_rettype double +# define c1_cfloat_return(x) ({ c1_compat _; _.cf = (x); _.d; }) + +# define c2_cfloat_decl(x) _Complex float x +# define c2_cfloat_real(x) __real__ x +# define c2_cfloat_imag(x) __imag__ x +# define c2_cfloat_value(x) x +# define c2_cfloat_rettype _Complex float +# define c2_cfloat_return(x) x + +/* Get the proper symbol versions defined for each function. */ + +#include <shlib-compat.h> + +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_3_4) +#define cfloat_versions_compat(func) \ + compat_symbol (libm, __c1_##func, func, GLIBC_2_1) +#else +#define cfloat_versions_compat(func) +#endif + +#define cfloat_versions(func) \ + cfloat_versions_compat(func); \ + versioned_symbol (libm, __c2_##func, func, GLIBC_2_3_4); \ + extern typeof(__c2_##func) __##func attribute_hidden; \ + strong_alias (__c2_##func, __##func) diff --git a/REORG.TODO/sysdeps/alpha/fpu/cimagf.c b/REORG.TODO/sysdeps/alpha/fpu/cimagf.c new file mode 100644 index 0000000000..6318f12297 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/cimagf.c @@ -0,0 +1,40 @@ +/* Return imaginary part of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cimagf __cimagf_not_defined +#define cimagf cimagf_not_defined + +#include <complex.h> +#include "cfloat-compat.h" + +#undef __cimagf +#undef cimagf + +float +__c1_cimagf (c1_cfloat_decl (z)) +{ + return c1_cfloat_imag (z); +} + +float +__c2_cimagf (c2_cfloat_decl (z)) +{ + return c2_cfloat_imag (z); +} + +cfloat_versions (cimagf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/conjf.c b/REORG.TODO/sysdeps/alpha/fpu/conjf.c new file mode 100644 index 0000000000..802898a5cb --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/conjf.c @@ -0,0 +1,42 @@ +/* Return complex conjugate of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __conjf __conjf_not_defined +#define conjf conjf_not_defined + +#include <complex.h> +#include "cfloat-compat.h" + +#undef __conjf +#undef conjf + +c1_cfloat_rettype +__c1_conjf (c1_cfloat_decl (z)) +{ + _Complex float r = ~ c1_cfloat_value (z); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_conjf (c2_cfloat_decl (z)) +{ + _Complex float r = ~ c2_cfloat_value (z); + return c2_cfloat_return (r); +} + +cfloat_versions (conjf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/crealf.c b/REORG.TODO/sysdeps/alpha/fpu/crealf.c new file mode 100644 index 0000000000..fdaaf2e59e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/crealf.c @@ -0,0 +1,40 @@ +/* Return real part of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __crealf __crealf_not_defined +#define crealf crealf_not_defined + +#include <complex.h> +#include "cfloat-compat.h" + +#undef __crealf +#undef crealf + +float +__c1_crealf (c1_cfloat_decl (z)) +{ + return c1_cfloat_real (z); +} + +float +__c2_crealf (c2_cfloat_decl (z)) +{ + return c2_cfloat_real (z); +} + +cfloat_versions (crealf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/e_sqrt.c b/REORG.TODO/sysdeps/alpha/fpu/e_sqrt.c new file mode 100644 index 0000000000..ec9d0d12f2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/e_sqrt.c @@ -0,0 +1,187 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <shlib-compat.h> + +#if !defined(_IEEE_FP_INEXACT) + +/* + * This version is much faster than generic sqrt implementation, but + * it doesn't handle the inexact flag. It doesn't handle exceptional + * values either, but will defer to the full ieee754_sqrt routine which + * can. + */ + +/* Careful with rearranging this without consulting the assembly below. */ +const static struct sqrt_data_struct { + unsigned long dn, up, half, almost_three_half; + unsigned long one_and_a_half, two_to_minus_30, one, nan; + const int T2[64]; +} sqrt_data __attribute__((used)) = { + 0x3fefffffffffffff, /* __dn = nextafter(1,-Inf) */ + 0x3ff0000000000001, /* __up = nextafter(1,+Inf) */ + 0x3fe0000000000000, /* half */ + 0x3ff7ffffffc00000, /* almost_three_half = 1.5-2^-30 */ + 0x3ff8000000000000, /* one_and_a_half */ + 0x3e10000000000000, /* two_to_minus_30 */ + 0x3ff0000000000000, /* one */ + 0xffffffffffffffff, /* nan */ + + { 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, + 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, + 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, + 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, + 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, + 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, + 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, + 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd } +}; + +asm ("\ + /* Define offsets into the structure defined in C above. */ \n\ + $DN = 0*8 \n\ + $UP = 1*8 \n\ + $HALF = 2*8 \n\ + $ALMOST_THREE_HALF = 3*8 \n\ + $NAN = 7*8 \n\ + $T2 = 8*8 \n\ + \n\ + /* Stack variables. */ \n\ + $K = 0 \n\ + $Y = 8 \n\ + \n\ + .text \n\ + .align 5 \n\ + .globl __ieee754_sqrt \n\ + .ent __ieee754_sqrt \n\ +__ieee754_sqrt: \n\ + ldgp $29, 0($27) \n\ + subq $sp, 16, $sp \n\ + .frame $sp, 16, $26, 0\n" +#ifdef PROF +" lda $28, _mcount \n\ + jsr $28, ($28), _mcount\n" +#endif +" .prologue 1 \n\ + \n\ + .align 4 \n\ + stt $f16, $K($sp) # e0 : \n\ + mult $f31, $f31, $f31 # .. fm : \n\ + lda $4, sqrt_data # e0 : \n\ + fblt $f16, $fixup # .. fa : \n\ + \n\ + ldah $2, 0x5fe8 # e0 : \n\ + ldq $3, $K($sp) # .. e1 : \n\ + ldt $f12, $HALF($4) # e0 : \n\ + ldt $f18, $ALMOST_THREE_HALF($4) # .. e1 : \n\ + \n\ + sll $3, 52, $5 # e0 : \n\ + lda $6, 0x7fd # .. e1 : \n\ + fnop # .. fa : \n\ + fnop # .. fm : \n\ + \n\ + subq $5, 1, $5 # e1 : \n\ + srl $3, 33, $1 # .. e0 : \n\ + cmpule $5, $6, $5 # e0 : \n\ + beq $5, $fixup # .. e1 : \n\ + \n\ + mult $f16, $f12, $f11 # fm : $f11 = x * 0.5 \n\ + subl $2, $1, $2 # .. e0 : \n\ + addt $f12, $f12, $f17 # .. fa : $f17 = 1.0 \n\ + srl $2, 12, $1 # e0 : \n\ + \n\ + and $1, 0xfc, $1 # e0 : \n\ + addq $1, $4, $1 # e1 : \n\ + ldl $1, $T2($1) # e0 : \n\ + addt $f12, $f17, $f15 # .. fa : $f15 = 1.5 \n\ + \n\ + subl $2, $1, $2 # e0 : \n\ + ldt $f14, $DN($4) # .. e1 : \n\ + sll $2, 32, $2 # e0 : \n\ + stq $2, $Y($sp) # e0 : \n\ + \n\ + ldt $f13, $Y($sp) # e0 : \n\ + mult/su $f11, $f13, $f10 # fm 2: $f10 = (x * 0.5) * y \n\ + mult $f10, $f13, $f10 # fm 4: $f10 = ((x*0.5)*y)*y \n\ + subt $f15, $f10, $f1 # fa 4: $f1 = (1.5-0.5*x*y*y) \n\ + \n\ + mult $f13, $f1, $f13 # fm 4: yp = y*(1.5-0.5*x*y^2)\n\ + mult/su $f11, $f13, $f1 # fm 4: $f11 = x * 0.5 * yp \n\ + mult $f1, $f13, $f11 # fm 4: $f11 = (x*0.5*yp)*yp \n\ + subt $f18, $f11, $f1 # fa 4: $f1=(1.5-2^-30)-x/2*yp^2\n\ + \n\ + mult $f13, $f1, $f13 # fm 4: ypp = $f13 = yp*$f1 \n\ + subt $f15, $f12, $f1 # .. fa : $f1 = (1.5 - 0.5) \n\ + ldt $f15, $UP($4) # .. e0 : \n\ + mult/su $f16, $f13, $f10 # fm 4: z = $f10 = x * ypp \n\ + \n\ + mult $f10, $f13, $f11 # fm 4: $f11 = z*ypp \n\ + mult $f10, $f12, $f12 # fm : $f12 = z*0.5 \n\ + subt $f1, $f11, $f1 # fa 4: $f1 = 1 - z*ypp \n\ + mult $f12, $f1, $f12 # fm 4: $f12 = z/2*(1 - z*ypp)\n\ + \n\ + addt $f10, $f12, $f0 # fa 4: zp=res= z+z/2*(1-z*ypp)\n\ + mult/c $f0, $f14, $f12 # fm 4: zmi = zp * DN \n\ + mult/c $f0, $f15, $f11 # fm : zpl = zp * UP \n\ + mult/c $f0, $f12, $f1 # fm : $f1 = zp * zmi \n\ + \n\ + mult/c $f0, $f11, $f15 # fm : $f15 = zp * zpl \n\ + subt/su $f1, $f16, $f13 # .. fa : y1 = zp*zmi - x \n\ + subt/su $f15, $f16, $f14 # fa 4: y2 = zp*zpl - x \n\ + fcmovge $f13, $f12, $f0 # fa 3: res = (y1>=0)?zmi:res \n\ + \n\ + fcmovlt $f14, $f11, $f0 # fa 4: res = (y2<0)?zpl:res \n\ + addq $sp, 16, $sp # .. e0 : \n\ + ret # .. e1 : \n\ + \n\ + .align 4 \n\ +$fixup: \n\ + addq $sp, 16, $sp \n\ + br __full_ieee754_sqrt !samegp \n\ + \n\ + .end __ieee754_sqrt"); + +/* Avoid the __sqrt_finite alias that dbl-64/e_sqrt.c would give... */ +#undef strong_alias +#define strong_alias(a,b) + +/* ... defining our own. */ +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +asm (".global __sqrt_finite1; __sqrt_finite1 = __ieee754_sqrt"); +#else +asm (".global __sqrt_finite; __sqrt_finite = __ieee754_sqrt"); +#endif + +static double __full_ieee754_sqrt(double) __attribute_used__; +#define __ieee754_sqrt __full_ieee754_sqrt + +#elif SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +# define __sqrt_finite __sqrt_finite1 +#endif /* _IEEE_FP_INEXACT */ + +#include <sysdeps/ieee754/dbl-64/e_sqrt.c> + +/* Work around forgotten symbol in alphaev6 build. */ +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +# undef __sqrt_finite +# undef __ieee754_sqrt +compat_symbol (libm, __sqrt_finite1, __sqrt_finite, GLIBC_2_15); +versioned_symbol (libm, __ieee754_sqrt, __sqrt_finite, GLIBC_2_18); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/e_sqrtf.c b/REORG.TODO/sysdeps/alpha/fpu/e_sqrtf.c new file mode 100644 index 0000000000..ad523f5cf2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/e_sqrtf.c @@ -0,0 +1,14 @@ +#include <shlib-compat.h> + +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +# define __sqrtf_finite __sqrtf_finite1 +#endif + +#include <sysdeps/ieee754/flt-32/e_sqrtf.c> + +/* Work around forgotten symbol in alphaev6 build. */ +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +# undef __sqrtf_finite +compat_symbol (libm, __sqrtf_finite1, __sqrtf_finite, GLIBC_2_15); +versioned_symbol (libm, __ieee754_sqrtf, __sqrtf_finite, GLIBC_2_18); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/fclrexcpt.c b/REORG.TODO/sysdeps/alpha/fpu/fclrexcpt.c new file mode 100644 index 0000000000..9e9be0b206 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fclrexcpt.c @@ -0,0 +1,47 @@ +/* Clear given exceptions in current floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__feclearexcept (int excepts) +{ + unsigned long int swcr; + + /* Get the current state. */ + swcr = __ieee_get_fp_control (); + + /* Clear the relevant bits. */ + swcr &= ~((unsigned long int) excepts & SWCR_STATUS_MASK); + + /* Put the new state in effect. */ + __ieee_set_fp_control (swcr); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feclearexcept, __old_feclearexcept) +compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); +#endif + +libm_hidden_ver (__feclearexcept, feclearexcept) +versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/fedisblxcpt.c b/REORG.TODO/sysdeps/alpha/fpu/fedisblxcpt.c new file mode 100644 index 0000000000..029393e558 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fedisblxcpt.c @@ -0,0 +1,35 @@ +/* Disable floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fedisableexcept (int excepts) +{ + unsigned long int new_exc, old_exc; + + new_exc = __ieee_get_fp_control (); + + old_exc = (new_exc & SWCR_ENABLE_MASK) << SWCR_ENABLE_SHIFT; + new_exc &= ~((excepts >> SWCR_ENABLE_SHIFT) & SWCR_ENABLE_MASK); + + __ieee_set_fp_control (new_exc); + + return old_exc; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/feenablxcpt.c b/REORG.TODO/sysdeps/alpha/fpu/feenablxcpt.c new file mode 100644 index 0000000000..8244f02cd3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/feenablxcpt.c @@ -0,0 +1,35 @@ +/* Enable floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +feenableexcept (int excepts) +{ + unsigned long int new_exc, old_exc; + + new_exc = __ieee_get_fp_control (); + + old_exc = (new_exc & SWCR_ENABLE_MASK) << SWCR_ENABLE_SHIFT; + new_exc |= (excepts >> SWCR_ENABLE_SHIFT) & SWCR_ENABLE_MASK; + + __ieee_set_fp_control (new_exc); + + return old_exc; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fegetenv.c b/REORG.TODO/sysdeps/alpha/fpu/fegetenv.c new file mode 100644 index 0000000000..0b242b3c12 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fegetenv.c @@ -0,0 +1,48 @@ +/* Store current floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetenv (fenv_t *envp) +{ + unsigned long int fpcr; + unsigned long int swcr; + + /* Get status from software and hardware. Note that we don't need an + excb because the callsys is an implied trap barrier. */ + swcr = __ieee_get_fp_control (); + __asm__ __volatile__ ("mf_fpcr %0" : "=f" (fpcr)); + + /* Merge the two bits of information. */ + *envp = ((fpcr & FPCR_ROUND_MASK) | (swcr & SWCR_ALL_MASK)); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetenv, __old_fegetenv) +compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fegetenv) +versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2); +libm_hidden_ver(__fegetenv, fegetenv) diff --git a/REORG.TODO/sysdeps/alpha/fpu/fegetexcept.c b/REORG.TODO/sysdeps/alpha/fpu/fegetexcept.c new file mode 100644 index 0000000000..ccb207433e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fegetexcept.c @@ -0,0 +1,30 @@ +/* Get enabled floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fegetexcept (void) +{ + unsigned long int exc; + + exc = __ieee_get_fp_control (); + + return (exc & SWCR_ENABLE_MASK) << SWCR_ENABLE_SHIFT; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fegetmode.c b/REORG.TODO/sysdeps/alpha/fpu/fegetmode.c new file mode 100644 index 0000000000..18ab5d328a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fegetmode.c @@ -0,0 +1,33 @@ +/* Store current floating-point control modes. Alpha version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fegetmode (femode_t *modep) +{ + unsigned long int fpcr; + unsigned long int swcr; + + /* As in fegetenv. */ + swcr = __ieee_get_fp_control (); + __asm__ __volatile__ ("mf_fpcr %0" : "=f" (fpcr)); + *modep = ((fpcr & FPCR_ROUND_MASK) | (swcr & SWCR_ALL_MASK)); + + return 0; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fegetround.c b/REORG.TODO/sysdeps/alpha/fpu/fegetround.c new file mode 100644 index 0000000000..9befd175b7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fegetround.c @@ -0,0 +1,33 @@ +/* Return current rounding direction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetround (void) +{ + unsigned long fpcr; + + __asm__ __volatile__("excb; mf_fpcr %0" : "=f"(fpcr)); + + return (fpcr >> FPCR_ROUND_SHIFT) & 3; +} +libm_hidden_def (__fegetround) +weak_alias (__fegetround, fegetround) +libm_hidden_weak (fegetround) diff --git a/REORG.TODO/sysdeps/alpha/fpu/feholdexcpt.c b/REORG.TODO/sysdeps/alpha/fpu/feholdexcpt.c new file mode 100644 index 0000000000..7f4f487ad5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/feholdexcpt.c @@ -0,0 +1,35 @@ +/* Store current floating-point environment and clear exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__feholdexcept (fenv_t *envp) +{ + /* Save the current state. */ + __fegetenv(envp); + + /* Clear all exception status bits and exception enable bits. */ + __ieee_set_fp_control(*envp & SWCR_MAP_MASK); + + return 0; +} +libm_hidden_def (__feholdexcept) +weak_alias (__feholdexcept, feholdexcept) +libm_hidden_weak (feholdexcept) diff --git a/REORG.TODO/sysdeps/alpha/fpu/fenv_libc.h b/REORG.TODO/sysdeps/alpha/fpu/fenv_libc.h new file mode 100644 index 0000000000..355d6f0659 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fenv_libc.h @@ -0,0 +1,39 @@ +/* Internal libc stuff for floating point environment routines. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_LIBC_H +#define _FENV_LIBC_H 1 + +#include <fenv.h> + +#define FPCR_ROUND_MASK (3UL << 58) +#define FPCR_ROUND_SHIFT 58 + +#define SWCR_MAP_MASK (3UL << 12) +#define SWCR_ENABLE_SHIFT 16 +#define SWCR_ENABLE_MASK (FE_ALL_EXCEPT >> SWCR_ENABLE_SHIFT) +#define SWCR_STATUS_MASK (FE_ALL_EXCEPT) +#define SWCR_ALL_MASK (SWCR_ENABLE_MASK \ + | SWCR_MAP_MASK \ + | SWCR_STATUS_MASK) + +/* These are declared for public consumption in <bits/fenv.h>. */ +libc_hidden_proto(__ieee_set_fp_control) +libc_hidden_proto(__ieee_get_fp_control) + +#endif /* fenv_libc.h */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/fesetenv.c b/REORG.TODO/sysdeps/alpha/fpu/fesetenv.c new file mode 100644 index 0000000000..c115f13a87 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fesetenv.c @@ -0,0 +1,57 @@ +/* Install given floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetenv (const fenv_t *envp) +{ + unsigned long int fpcr; + fenv_t env; + + /* Magic encoding of default values: high bit set (never possible for a + user-space address) is not indirect. And we don't even have to get + rid of it since we mask things around just below. */ + if ((long int) envp >= 0) + env = *envp; + else + env = (unsigned long int) envp; + + /* Reset the rounding mode with the hardware fpcr. Note that the following + system call is an implied trap barrier for our modification. */ + __asm__ __volatile__ ("excb; mf_fpcr %0" : "=f" (fpcr)); + fpcr = (fpcr & ~FPCR_ROUND_MASK) | (env & FPCR_ROUND_MASK); + __asm__ __volatile__ ("mt_fpcr %0" : : "f" (fpcr)); + + /* Reset the exception status and mask with the kernel's FP code. */ + __ieee_set_fp_control (env & SWCR_ALL_MASK); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetenv, __old_fesetenv) +compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fesetenv) +libm_hidden_ver (__fesetenv, fesetenv) +versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/fesetexcept.c b/REORG.TODO/sysdeps/alpha/fpu/fesetexcept.c new file mode 100644 index 0000000000..c84a2dfe37 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fesetexcept.c @@ -0,0 +1,31 @@ +/* Set given exception flags. Alpha version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fesetexcept (int excepts) +{ + unsigned long int tmp; + + tmp = __ieee_get_fp_control (); + tmp |= excepts & SWCR_STATUS_MASK; + __ieee_set_fp_control (tmp); + + return 0; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fesetmode.c b/REORG.TODO/sysdeps/alpha/fpu/fesetmode.c new file mode 100644 index 0000000000..23a7be687b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fesetmode.c @@ -0,0 +1,44 @@ +/* Install given floating-point control modes. Alpha version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fesetmode (const femode_t *modep) +{ + unsigned long int fpcr; + unsigned long int swcr; + femode_t mode; + + /* As in fesetenv. */ + if ((long int) modep >= 0) + mode = *modep; + else + mode = (unsigned long int) modep; + + __asm__ __volatile__ ("excb; mf_fpcr %0" : "=f" (fpcr)); + fpcr = (fpcr & ~FPCR_ROUND_MASK) | (mode & FPCR_ROUND_MASK); + __asm__ __volatile__ ("mt_fpcr %0" : : "f" (fpcr)); + + swcr = __ieee_get_fp_control (); + swcr = ((mode & SWCR_ALL_MASK & ~SWCR_STATUS_MASK) + | (swcr & SWCR_STATUS_MASK)); + __ieee_set_fp_control (swcr); + + return 0; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fesetround.c b/REORG.TODO/sysdeps/alpha/fpu/fesetround.c new file mode 100644 index 0000000000..af2b695029 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fesetround.c @@ -0,0 +1,44 @@ +/* Set current rounding direction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetround (int round) +{ + unsigned long fpcr; + + if (round & ~3) + return 1; + + /* Get the current state. */ + __asm__ __volatile__("excb; mf_fpcr %0" : "=f"(fpcr)); + + /* Set the relevant bits. */ + fpcr = ((fpcr & ~FPCR_ROUND_MASK) + | ((unsigned long)round << FPCR_ROUND_SHIFT)); + + /* Put the new state in effect. */ + __asm__ __volatile__("mt_fpcr %0; excb" : : "f"(fpcr)); + + return 0; +} +libm_hidden_def (__fesetround) +weak_alias (__fesetround, fesetround) +libm_hidden_weak (fesetround) diff --git a/REORG.TODO/sysdeps/alpha/fpu/feupdateenv.c b/REORG.TODO/sysdeps/alpha/fpu/feupdateenv.c new file mode 100644 index 0000000000..d77b276b40 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/feupdateenv.c @@ -0,0 +1,50 @@ +/* Install given floating-point environment and raise exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__feupdateenv (const fenv_t *envp) +{ + unsigned long int tmp; + + /* Get the current exception state. */ + tmp = __ieee_get_fp_control (); + + /* Install new environment. */ + __fesetenv (envp); + + /* Raise the saved exception. Incidently for us the implementation + defined format of the values in objects of type fexcept_t is the + same as the ones specified using the FE_* constants. */ + __feraiseexcept (tmp & SWCR_STATUS_MASK); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feupdateenv, __old_feupdateenv) +compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1); +#endif + +libm_hidden_def (__feupdateenv) +libm_hidden_ver (__feupdateenv, feupdateenv) +versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/fgetexcptflg.c b/REORG.TODO/sysdeps/alpha/fpu/fgetexcptflg.c new file mode 100644 index 0000000000..c69b0a1ce5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fgetexcptflg.c @@ -0,0 +1,43 @@ +/* Store current representation for exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetexceptflag (fexcept_t *flagp, int excepts) +{ + unsigned long int tmp; + + /* Get the current state. */ + tmp = __ieee_get_fp_control(); + + /* Return that portion that corresponds to the requested exceptions. */ + *flagp = tmp & excepts & SWCR_STATUS_MASK; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetexceptflag, __old_fegetexceptflag) +compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/fpu_control.h b/REORG.TODO/sysdeps/alpha/fpu/fpu_control.h new file mode 100644 index 0000000000..fbb55e5461 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fpu_control.h @@ -0,0 +1,105 @@ +/* FPU control word bits. Alpha-mapped-to-Intel version. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Olaf Flebbe. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _ALPHA_FPU_CONTROL_H +#define _ALPHA_FPU_CONTROL_H + +/* + * Since many programs seem to hardcode the values passed to __setfpucw() + * (rather than using the manifest constants) we emulate the x87 interface + * here (at least where this makes sense). + * + * 15-13 12 11-10 9-8 7-6 5 4 3 2 1 0 + * | reserved | IC | RC | PC | reserved | PM | UM | OM | ZM | DM | IM + * + * IM: Invalid operation mask + * DM: Denormalized operand mask + * ZM: Zero-divide mask + * OM: Overflow mask + * UM: Underflow mask + * PM: Precision (inexact result) mask + * + * Mask bit is 1 means no interrupt. + * + * PC: Precision control + * 11 - round to extended precision + * 10 - round to double precision + * 00 - round to single precision + * + * RC: Rounding control + * 00 - rounding to nearest + * 01 - rounding down (toward - infinity) + * 10 - rounding up (toward + infinity) + * 11 - rounding toward zero + * + * IC: Infinity control + * That is for 8087 and 80287 only. + * + * The hardware default is 0x037f. I choose 0x1372. + */ + +#include <features.h> + +/* masking of interrupts */ +#define _FPU_MASK_IM 0x01 +#define _FPU_MASK_DM 0x02 +#define _FPU_MASK_ZM 0x04 +#define _FPU_MASK_OM 0x08 +#define _FPU_MASK_UM 0x10 +#define _FPU_MASK_PM 0x20 + +/* precision control -- without effect on Alpha */ +#define _FPU_EXTENDED 0x300 /* RECOMMENDED */ +#define _FPU_DOUBLE 0x200 +#define _FPU_SINGLE 0x0 /* DO NOT USE */ + +/* + * rounding control---notice that on the Alpha this affects only + * instructions with the dynamic rounding mode qualifier (/d). + */ +#define _FPU_RC_NEAREST 0x000 /* RECOMMENDED */ +#define _FPU_RC_DOWN 0x400 +#define _FPU_RC_UP 0x800 +#define _FPU_RC_ZERO 0xC00 + +#define _FPU_RESERVED 0xF0C0 /* Reserved bits in cw */ + + +/* Now two recommended cw */ + +/* Linux default: + - extended precision + - rounding to positive infinity. There is no /p instruction + qualifier. By setting the dynamic rounding mode to +infinity, + one can use /d to get round to +infinity with no extra overhead + (so long as the default isn't changed, of course...) + - no exceptions enabled. */ + +#define _FPU_DEFAULT 0x137f + +/* IEEE: same as above. */ +#define _FPU_IEEE 0x137f + +/* Type of the control word. */ +typedef unsigned int fpu_control_t; + +/* Default control word set at startup. */ +extern fpu_control_t __fpu_control; + +#endif /* _ALPHA_FPU_CONTROL */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/fsetexcptflg.c b/REORG.TODO/sysdeps/alpha/fpu/fsetexcptflg.c new file mode 100644 index 0000000000..f39f6125c7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fsetexcptflg.c @@ -0,0 +1,46 @@ +/* Set floating-point environment exception handling. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetexceptflag (const fexcept_t *flagp, int excepts) +{ + unsigned long int tmp; + + /* Get the current exception state. */ + tmp = __ieee_get_fp_control (); + + /* Set all the bits that were called for. */ + tmp = (tmp & ~SWCR_STATUS_MASK) | (*flagp & excepts & SWCR_STATUS_MASK); + + /* And store it back. */ + __ieee_set_fp_control (tmp); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetexceptflag, __old_fesetexceptflag) +compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/ftestexcept.c b/REORG.TODO/sysdeps/alpha/fpu/ftestexcept.c new file mode 100644 index 0000000000..8a0cf74986 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/ftestexcept.c @@ -0,0 +1,32 @@ +/* Test exception in current environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fetestexcept (int excepts) +{ + unsigned long tmp; + + /* Get current exceptions. */ + tmp = __ieee_get_fp_control(); + + return tmp & excepts & SWCR_STATUS_MASK; +} +libm_hidden_def (fetestexcept) diff --git a/REORG.TODO/sysdeps/alpha/fpu/get-rounding-mode.h b/REORG.TODO/sysdeps/alpha/fpu/get-rounding-mode.h new file mode 100644 index 0000000000..866fb9926c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/get-rounding-mode.h @@ -0,0 +1,35 @@ +/* Determine floating-point rounding mode within libc. Alpha version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef ALPHA_GET_ROUNDING_MODE_H +#define ALPHA_GET_ROUNDING_MODE_H 1 + +#include <fenv.h> +#include <fenv_libc.h> + +/* Return the floating-point rounding mode. */ + +static inline int +get_rounding_mode (void) +{ + unsigned long fpcr; + __asm__ __volatile__("excb; mf_fpcr %0" : "=f"(fpcr)); + return (fpcr >> FPCR_ROUND_SHIFT) & 3; +} + +#endif /* get-rounding-mode.h */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps b/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps new file mode 100644 index 0000000000..6ec37f72ac --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps @@ -0,0 +1,2262 @@ +# Begin of automatic generation + +# Maximal error of functions: +Function: "acos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "acosh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "asin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "asinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "asinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "asinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "atan": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atan2_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "atan2_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "atanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "atanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cabs": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacos_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacos_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "cacos_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacos_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "cacos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cacos_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: Real part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacosh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "cacosh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacosh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "cacosh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacosh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "cacosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "carg": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "carg_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "carg_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "carg_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casin": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casin_downward": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casin_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casin_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casinh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casinh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casinh_downward": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casinh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casinh_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casinh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casinh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "catan_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catan_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catan_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_upward": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "catan_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "catanh": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "catanh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catanh_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catanh_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "catanh_upward": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_upward": +double: 5 +float: 1 +idouble: 5 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "ccos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccos_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccos_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "ccosh_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccosh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccosh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "cexp": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cexp_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cexp_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cexp_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "clog": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "clog10": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "clog10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog10_downward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog10_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog10_towardzero": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog10_upward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog10_upward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog_towardzero": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "cos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "cos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "cosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cosh_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 2 + +Function: "cosh_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 2 + +Function: "cosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 3 + +Function: Real part of "cpow": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "cpow": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cpow_downward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cpow_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow_towardzero": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cpow_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow_upward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cpow_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csin": +ildouble: 1 +ldouble: 1 + +Function: Real part of "csin_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csin_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csin_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csinh": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "csinh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csinh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csinh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csinh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csqrt_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctan": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctan_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "ctan_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Real part of "ctan_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "ctan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Real part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "ctanh": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctanh_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Real part of "ctanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctanh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "erf": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erfc": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "erfc_downward": +double: 5 +float: 6 +idouble: 5 +ifloat: 6 +ildouble: 5 +ldouble: 5 + +Function: "erfc_towardzero": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "erfc_upward": +double: 5 +float: 6 +idouble: 5 +ifloat: 6 +ildouble: 5 +ldouble: 5 + +Function: "exp": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp10": +double: 2 +idouble: 2 +ildouble: 2 +ldouble: 2 + +Function: "exp10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "exp10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "exp10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "exp2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_downward": +double: 1 +idouble: 1 + +Function: "exp_towardzero": +double: 1 +idouble: 1 + +Function: "exp_upward": +double: 1 +idouble: 1 + +Function: "expm1": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "expm1_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "expm1_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "gamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "gamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: "gamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "gamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "hypot": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "j0": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j0_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "j0_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j0_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "j1": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "j1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "j1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "j1_upward": +double: 3 +float: 5 +idouble: 3 +ifloat: 5 +ildouble: 3 +ldouble: 3 + +Function: "jn": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: "jn_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "jn_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "jn_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 7 +ldouble: 7 + +Function: "lgamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "lgamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: "lgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "lgamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "log": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log10_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log1p": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log1p_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log1p_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log1p_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log2": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log2_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "log2_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log2_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log_downward": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log_towardzero": +float: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "pow": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "pow10": +double: 2 +idouble: 2 +ildouble: 2 +ldouble: 2 + +Function: "pow10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "pow10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "pow10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "pow_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "pow_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "pow_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sin_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sincos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sincos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sincos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sincos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "sinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "sinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tan": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "tan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "tan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "tan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "tanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "tanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "tanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "tgamma": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "tgamma_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "y0": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "y0_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "y0_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "y0_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "y1": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "y1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_upward": +double: 7 +float: 2 +idouble: 7 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "yn": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "yn_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "yn_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "yn_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +# end of automatic generation diff --git a/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps-name b/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps-name new file mode 100644 index 0000000000..5219734094 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps-name @@ -0,0 +1 @@ +Alpha diff --git a/REORG.TODO/sysdeps/alpha/fpu/math_private.h b/REORG.TODO/sysdeps/alpha/fpu/math_private.h new file mode 100644 index 0000000000..1e97c867c3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/math_private.h @@ -0,0 +1,50 @@ +#ifndef ALPHA_MATH_PRIVATE_H +#define ALPHA_MATH_PRIVATE_H 1 + +/* In bits/mathinline.h we define __isnan et al. + In sysdeps/alpha/fpu/s_isnan.c we move the identifier out of the way + via macro hackery. In both cases, tell math/math_private.h that + we have a local copy of the function. */ + +#ifndef __isnan +# define __isnan __isnan +#endif +#ifndef __isnanf +# define __isnanf __isnanf +#endif + +/* Generic code forces values to memory; we don't need to do that. */ +#define math_opt_barrier(x) \ + ({ __typeof (x) __x = (x); __asm ("" : "+frm" (__x)); __x; }) +#define math_force_eval(x) \ + ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "frm" (__x)); }) + +#include_next <math_private.h> + +#ifdef __alpha_fix__ +extern __always_inline double +__ieee754_sqrt (double d) +{ + double ret; +# ifdef _IEEE_FP_INEXACT + asm ("sqrtt/suid %1,%0" : "=&f"(ret) : "f"(d)); +# else + asm ("sqrtt/sud %1,%0" : "=&f"(ret) : "f"(d)); +# endif + return ret; +} + +extern __always_inline float +__ieee754_sqrtf (float d) +{ + float ret; +# ifdef _IEEE_FP_INEXACT + asm ("sqrts/suid %1,%0" : "=&f"(ret) : "f"(d)); +# else + asm ("sqrts/sud %1,%0" : "=&f"(ret) : "f"(d)); +# endif + return ret; +} +#endif /* FIX */ + +#endif /* ALPHA_MATH_PRIVATE_H */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cacosf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cacosf.c new file mode 100644 index 0000000000..e06b06305f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cacosf.c @@ -0,0 +1,57 @@ +/* Return arc cosine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cacosf __cacosf_not_defined +#define cacosf cacosf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cacosf +#undef cacosf + +static _Complex float internal_cacosf (_Complex float x); + +#define M_DECL_FUNC(f) internal_cacosf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cacos_template.c> + +#include "cfloat-compat.h" + +#undef __cacosf + +c1_cfloat_rettype +__c1_cacosf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_cacosf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cacosf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_cacosf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (cacosf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cacoshf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cacoshf.c new file mode 100644 index 0000000000..d67cffb59e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cacoshf.c @@ -0,0 +1,56 @@ +/* Return arc hyperbole cosine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cacoshf __cacoshf_not_defined +#define cacoshf cacoshf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cacoshf +#undef cacoshf + +static _Complex float internal_cacoshf (_Complex float x); + +#define M_DECL_FUNC(f) internal_cacoshf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cacosh_template.c> +#include "cfloat-compat.h" + +#undef __cacoshf + +c1_cfloat_rettype +__c1_cacoshf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_cacoshf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cacoshf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_cacoshf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (cacoshf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_casinf.c b/REORG.TODO/sysdeps/alpha/fpu/s_casinf.c new file mode 100644 index 0000000000..1baa1d4669 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_casinf.c @@ -0,0 +1,54 @@ +/* Return arc sine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __casinf __casinf_not_defined +#define casinf casinf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __casinf +#undef casinf + +static _Complex float internal_casinf (_Complex float x); + +#define M_DECL_FUNC(f) internal_casinf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_casin_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_casinf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_casinf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_casinf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_casinf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (casinf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_casinhf.c b/REORG.TODO/sysdeps/alpha/fpu/s_casinhf.c new file mode 100644 index 0000000000..4cb3a2fe33 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_casinhf.c @@ -0,0 +1,54 @@ +/* Return arc hyperbole sine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __casinhf __casinhf_not_defined +#define casinhf casinhf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __casinhf +#undef casinhf + +static _Complex float internal_casinhf (_Complex float x); + +#define M_DECL_FUNC(f) internal_casinhf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_casinh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_casinhf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_casinhf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_casinhf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_casinhf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (casinhf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_catanf.c b/REORG.TODO/sysdeps/alpha/fpu/s_catanf.c new file mode 100644 index 0000000000..6d928e077d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_catanf.c @@ -0,0 +1,54 @@ +/* Return arc tangent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __catanf __catanf_not_defined +#define catanf catanf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __catanf +#undef catanf + +static _Complex float internal_catanf (_Complex float x); + +#define M_DECL_FUNC(f) internal_catanf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_catan_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_catanf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_catanf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_catanf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_catanf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (catanf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_catanhf.c b/REORG.TODO/sysdeps/alpha/fpu/s_catanhf.c new file mode 100644 index 0000000000..d8942a057e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_catanhf.c @@ -0,0 +1,54 @@ +/* Return arc hyperbole tangent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __catanhf __catanhf_not_defined +#define catanhf catanhf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __catanhf +#undef catanhf + +static _Complex float internal_catanhf (_Complex float x); + +#define M_DECL_FUNC(f) internal_catanhf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_catanh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_catanhf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_catanhf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_catanhf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_catanhf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (catanhf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ccosf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ccosf.c new file mode 100644 index 0000000000..abc7f10766 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ccosf.c @@ -0,0 +1,54 @@ +/* Return cosine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ccosf __ccosf_not_defined +#define ccosf ccosf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __ccosf +#undef ccosf + +static _Complex float internal_ccosf (_Complex float x); + +#define M_DECL_FUNC(f) internal_ccosf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_ccos_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_ccosf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_ccosf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_ccosf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_ccosf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (ccosf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ccoshf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ccoshf.c new file mode 100644 index 0000000000..65deabd9b2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ccoshf.c @@ -0,0 +1,54 @@ +/* Return hyperbole cosine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ccoshf __ccoshf_not_defined +#define ccoshf ccoshf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __ccoshf +#undef ccoshf + +static _Complex float internal_ccoshf (_Complex float x); + +#define M_DECL_FUNC(f) internal_ccoshf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_ccosh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_ccoshf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_ccoshf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_ccoshf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_ccoshf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (ccoshf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ceil.c b/REORG.TODO/sysdeps/alpha/fpu/s_ceil.c new file mode 100644 index 0000000000..029ee09315 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ceil.c @@ -0,0 +1,56 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +/* Use the -inf rounding mode conversion instructions to implement + ceil, via something akin to -floor(-x). This is much faster than + playing with the fpcr to achieve +inf rounding mode. */ + +double +__ceil (double x) +{ + if (isnan (x)) + return x + x; + + if (isless (fabs (x), 9007199254740992.0)) /* 1 << DBL_MANT_DIG */ + { + double tmp1, new_x; + + new_x = -x; + __asm ( + "cvttq/svm %2,%1\n\t" + "cvtqt/m %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1) + : "f"(new_x)); + + /* Fix up the negation we did above, as well as handling -0 properly. */ + x = copysign(new_x, x); + } + return x; +} + +weak_alias (__ceil, ceil) +#ifdef NO_LONG_DOUBLE +strong_alias (__ceil, __ceill) +weak_alias (__ceil, ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ceilf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ceilf.c new file mode 100644 index 0000000000..8c76c65d7f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ceilf.c @@ -0,0 +1,53 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +/* Use the -inf rounding mode conversion instructions to implement + ceil, via something akin to -floor(-x). This is much faster than + playing with the fpcr to achieve +inf rounding mode. */ + +float +__ceilf (float x) +{ + if (isnanf (x)) + return x + x; + + if (isless (fabsf (x), 16777216.0f)) /* 1 << FLT_MANT_DIG */ + { + /* Note that Alpha S_Floating is stored in registers in a + restricted T_Floating format, so we don't even need to + convert back to S_Floating in the end. The initial + conversion to T_Floating is needed to handle denormals. */ + + float tmp1, tmp2, new_x; + + new_x = -x; + __asm ("cvtst/s %3,%2\n\t" + "cvttq/svm %2,%1\n\t" + "cvtqt/m %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2) + : "f"(new_x)); + + /* Fix up the negation we did above, as well as handling -0 properly. */ + x = copysignf(new_x, x); + } + return x; +} + +weak_alias (__ceilf, ceilf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cexpf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cexpf.c new file mode 100644 index 0000000000..64daf689d2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cexpf.c @@ -0,0 +1,54 @@ +/* Return exponent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cexpf __cexpf_not_defined +#define cexpf cexpf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cexpf +#undef cexpf + +static _Complex float internal_cexpf (_Complex float x); + +#define M_DECL_FUNC(f) internal_cexpf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cexp_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_cexpf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_cexpf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cexpf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_cexpf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (cexpf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_clog10f.c b/REORG.TODO/sysdeps/alpha/fpu/s_clog10f.c new file mode 100644 index 0000000000..0646a09ca5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_clog10f.c @@ -0,0 +1,64 @@ +/* Return base 10 logarithm of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __clog10f __clog10f_not_defined +#define clog10f clog10f_not_defined + +#include <complex.h> +#include <math.h> + +#undef __clog10f +#undef clog10f + +static _Complex float internal_clog10f (_Complex float x); + +#define M_DECL_FUNC(f) internal_clog10f +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_clog10_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_clog10f (c1_cfloat_decl (x)) +{ + _Complex float r = internal_clog10f (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_clog10f (c2_cfloat_decl (x)) +{ + _Complex float r = internal_clog10f (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +/* Ug. __clog10f was exported from GLIBC_2.1. This is the only + complex function whose double-underscore symbol was exported, + so we get to handle that specially. */ +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_3_4) +strong_alias (__c1_clog10f, __c1_clog10f_2); +compat_symbol (libm, __c1_clog10f, clog10f, GLIBC_2_1); +compat_symbol (libm, __c1_clog10f_2, __clog10f, GLIBC_2_1); +#endif +versioned_symbol (libm, __c2_clog10f, clog10f, GLIBC_2_3_4); +extern typeof(__c2_clog10f) __clog10f attribute_hidden; +strong_alias (__c2_clog10f, __clog10f) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_clogf.c b/REORG.TODO/sysdeps/alpha/fpu/s_clogf.c new file mode 100644 index 0000000000..1a7e234aa9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_clogf.c @@ -0,0 +1,54 @@ +/* Return natural logarithm of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __clogf __clogf_not_defined +#define clogf clogf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __clogf +#undef clogf + +static _Complex float internal_clogf (_Complex float x); + +#define M_DECL_FUNC(f) internal_clogf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_clog_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_clogf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_clogf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_clogf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_clogf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (clogf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_copysign.c b/REORG.TODO/sysdeps/alpha/fpu/s_copysign.c new file mode 100644 index 0000000000..3bd3dd48df --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_copysign.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +double +__copysign (double x, double y) +{ + return __builtin_copysign (x, y); +} + +weak_alias (__copysign, copysign) +#ifdef NO_LONG_DOUBLE +strong_alias (__copysign, __copysignl) +weak_alias (__copysign, copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_copysignf.c b/REORG.TODO/sysdeps/alpha/fpu/s_copysignf.c new file mode 100644 index 0000000000..90b20124a6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_copysignf.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +float +__copysignf (float x, float y) +{ + return __builtin_copysignf (x, y); +} + +weak_alias (__copysignf, copysignf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cpowf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cpowf.c new file mode 100644 index 0000000000..dacf0e12e0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cpowf.c @@ -0,0 +1,54 @@ +/* Return power of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cpowf __cpowf_not_defined +#define cpowf cpowf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cpowf +#undef cpowf + +static _Complex float internal_cpowf (_Complex float x, _Complex float c); + +#define M_DECL_FUNC(f) internal_cpowf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cpow_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_cpowf (c1_cfloat_decl (x), c1_cfloat_decl (c)) +{ + _Complex float r = internal_cpowf (c1_cfloat_value (x), c1_cfloat_value (c)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cpowf (c2_cfloat_decl (x), c2_cfloat_decl (c)) +{ + _Complex float r = internal_cpowf (c2_cfloat_value (x), c2_cfloat_value (c)); + return c2_cfloat_return (r); +} + +cfloat_versions (cpowf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cprojf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cprojf.c new file mode 100644 index 0000000000..316cc1f551 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cprojf.c @@ -0,0 +1,54 @@ +/* Return projection of complex float value to Riemann sphere. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cprojf __cprojf_not_defined +#define cprojf cprojf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cprojf +#undef cprojf + +static _Complex float internal_cprojf (_Complex float x); + +#define M_DECL_FUNC(f) internal_cprojf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cproj_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_cprojf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_cprojf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cprojf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_cprojf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (cprojf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_csinf.c b/REORG.TODO/sysdeps/alpha/fpu/s_csinf.c new file mode 100644 index 0000000000..f884d29fc9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_csinf.c @@ -0,0 +1,54 @@ +/* Return sine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __csinf __csinf_not_defined +#define csinf csinf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __csinf +#undef csinf + +static _Complex float internal_csinf (_Complex float x); + +#define M_DECL_FUNC(f) internal_csinf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_csin_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_csinf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_csinf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_csinf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_csinf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (csinf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_csinhf.c b/REORG.TODO/sysdeps/alpha/fpu/s_csinhf.c new file mode 100644 index 0000000000..071ff1227b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_csinhf.c @@ -0,0 +1,54 @@ +/* Return hyperbole sine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __csinhf __csinhf_not_defined +#define csinhf csinhf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __csinhf +#undef csinhf + +static _Complex float internal_csinhf (_Complex float x); + +#define M_DECL_FUNC(f) internal_csinhf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_csinh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_csinhf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_csinhf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_csinhf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_csinhf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (csinhf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_csqrtf.c b/REORG.TODO/sysdeps/alpha/fpu/s_csqrtf.c new file mode 100644 index 0000000000..0611f09465 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_csqrtf.c @@ -0,0 +1,54 @@ +/* Return square root of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __csqrtf __csinhf_not_defined +#define csqrtf csqrtf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __csqrtf +#undef csqrtf + +static _Complex float internal_csqrtf (_Complex float x); + +#define M_DECL_FUNC(f) internal_csqrtf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_csqrt_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_csqrtf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_csqrtf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_csqrtf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_csqrtf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (csqrtf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ctanf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ctanf.c new file mode 100644 index 0000000000..7288db23c0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ctanf.c @@ -0,0 +1,54 @@ +/* Return tangent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ctanf __ctanf_not_defined +#define ctanf ctanf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __ctanf +#undef ctanf + +static _Complex float internal_ctanf (_Complex float x); + +#define M_DECL_FUNC(f) internal_ctanf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_ctan_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_ctanf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_ctanf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_ctanf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_ctanf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (ctanf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ctanhf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ctanhf.c new file mode 100644 index 0000000000..fc0a5f6837 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ctanhf.c @@ -0,0 +1,54 @@ +/* Return hyperbole tangent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ctanhf __ctanhf_not_defined +#define ctanhf ctanhf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __ctanhf +#undef ctanhf + +static _Complex float internal_ctanhf (_Complex float x); + +#define M_DECL_FUNC(f) internal_ctanhf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_ctanh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_ctanhf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_ctanhf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_ctanhf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_ctanhf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (ctanhf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fabs.c b/REORG.TODO/sysdeps/alpha/fpu/s_fabs.c new file mode 100644 index 0000000000..abcc6e7c75 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fabs.c @@ -0,0 +1,35 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +double +__fabs (double x) +{ + return __builtin_fabs (x); +} + +weak_alias (__fabs, fabs) +#ifdef NO_LONG_DOUBLE +strong_alias (__fabs, __fabsl) +weak_alias (__fabs, fabsl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __fabs, fabsl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fabsf.c b/REORG.TODO/sysdeps/alpha/fpu/s_fabsf.c new file mode 100644 index 0000000000..5b1105cb00 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fabsf.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +float +__fabsf (float x) +{ + return __builtin_fabsf (x); +} + +weak_alias (__fabsf, fabsf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_floor.c b/REORG.TODO/sysdeps/alpha/fpu/s_floor.c new file mode 100644 index 0000000000..49a0c760a9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_floor.c @@ -0,0 +1,57 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + + +/* Use the -inf rounding mode conversion instructions to implement + floor. We note when the exponent is large enough that the value + must be integral, as this avoids unpleasant integer overflows. */ + +double +__floor (double x) +{ + if (isnan (x)) + return x + x; + + if (isless (fabs (x), 9007199254740992.0)) /* 1 << DBL_MANT_DIG */ + { + double tmp1, new_x; + + __asm ( + "cvttq/svm %2,%1\n\t" + "cvtqt/m %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1) + : "f"(x)); + + /* floor(-0) == -0, and in general we'll always have the same + sign as our input. */ + x = copysign(new_x, x); + } + return x; +} + +weak_alias (__floor, floor) +#ifdef NO_LONG_DOUBLE +strong_alias (__floor, __floorl) +weak_alias (__floor, floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_floorf.c b/REORG.TODO/sysdeps/alpha/fpu/s_floorf.c new file mode 100644 index 0000000000..79cae27720 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_floorf.c @@ -0,0 +1,54 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + + +/* Use the -inf rounding mode conversion instructions to implement + floor. We note when the exponent is large enough that the value + must be integral, as this avoids unpleasant integer overflows. */ + +float +__floorf (float x) +{ + if (isnanf (x)) + return x + x; + + if (isless (fabsf (x), 16777216.0f)) /* 1 << FLT_MANT_DIG */ + { + /* Note that Alpha S_Floating is stored in registers in a + restricted T_Floating format, so we don't even need to + convert back to S_Floating in the end. The initial + conversion to T_Floating is needed to handle denormals. */ + + float tmp1, tmp2, new_x; + + __asm ("cvtst/s %3,%2\n\t" + "cvttq/svm %2,%1\n\t" + "cvtqt/m %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2) + : "f"(x)); + + /* floor(-0) == -0, and in general we'll always have the same + sign as our input. */ + x = copysignf(new_x, x); + } + return x; +} + +weak_alias (__floorf, floorf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fmax.S b/REORG.TODO/sysdeps/alpha/fpu/s_fmax.S new file mode 100644 index 0000000000..8a65ae4963 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fmax.S @@ -0,0 +1,57 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .set noat + .set noreorder + + .text +ENTRY (__fmax) + .prologue 0 + + cmptun/su $f16, $f16, $f10 + cmptun/su $f17, $f17, $f11 + fmov $f17, $f0 + unop + + trapb + fbne $f10, $ret + fmov $f16, $f0 + fbne $f11, $ret + + cmptlt/su $f16, $f17, $f11 + trapb + fcmovne $f11, $f17, $f0 +$ret: ret + +END (__fmax) + +/* Given the in-register format of single-precision, this works there too. */ +strong_alias (__fmax, __fmaxf) +weak_alias (__fmaxf, fmaxf) + +weak_alias (__fmax, fmax) +#ifdef NO_LONG_DOUBLE +strong_alias (__fmax, __fmaxl) +weak_alias (__fmaxl, fmaxl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fmax, fmaxl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fmaxf.S b/REORG.TODO/sysdeps/alpha/fpu/s_fmaxf.S new file mode 100644 index 0000000000..3c2d62bb81 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fmaxf.S @@ -0,0 +1 @@ +/* __fmaxf is in s_fmax.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fmin.S b/REORG.TODO/sysdeps/alpha/fpu/s_fmin.S new file mode 100644 index 0000000000..926bd32ec4 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fmin.S @@ -0,0 +1,57 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .set noat + .set noreorder + + .text +ENTRY (__fmin) + .prologue 0 + + cmptun/su $f16, $f16, $f10 + cmptun/su $f17, $f17, $f11 + fmov $f17, $f0 + unop + + trapb + fbne $f10, $ret + fmov $f16, $f0 + fbne $f11, $ret + + cmptlt/su $f17, $f16, $f11 + trapb + fcmovne $f11, $f17, $f0 +$ret: ret + +END (__fmin) + +/* Given the in-register format of single-precision, this works there too. */ +strong_alias (__fmin, __fminf) +weak_alias (__fminf, fminf) + +weak_alias (__fmin, fmin) +#ifdef NO_LONG_DOUBLE +strong_alias (__fmin, __fminl) +weak_alias (__fminl, fminl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fmin, fminl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fminf.S b/REORG.TODO/sysdeps/alpha/fpu/s_fminf.S new file mode 100644 index 0000000000..10ab7fe53c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fminf.S @@ -0,0 +1 @@ +/* __fminf is in s_fmin.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_isnan.c b/REORG.TODO/sysdeps/alpha/fpu/s_isnan.c new file mode 100644 index 0000000000..b56fdbe2c5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_isnan.c @@ -0,0 +1,58 @@ +/* Return 1 if argument is a NaN, else 0. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Ugly kludge to avoid declarations. */ +#define __isnanf not___isnanf +#define isnanf not_isnanf +#define __GI___isnanf not__GI___isnanf + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef __isnanf +#undef isnanf +#undef __GI___isnanf + +int +__isnan (double x) +{ + uint64_t ix; + EXTRACT_WORDS64 (ix, x); + return ix * 2 > 0xffe0000000000000ul; +} + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +weak_alias (__isnan, isnanf) + +/* ??? GCC 4.8 fails to look through chains of aliases with asm names + attached. Work around this for now. */ +hidden_ver (__isnan, __isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif +#if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_isnanf.c b/REORG.TODO/sysdeps/alpha/fpu/s_isnanf.c new file mode 100644 index 0000000000..af41e43850 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_isnanf.c @@ -0,0 +1 @@ +/* In s_isnan.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_llrint.c b/REORG.TODO/sysdeps/alpha/fpu/s_llrint.c new file mode 100644 index 0000000000..5db97be037 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_llrint.c @@ -0,0 +1 @@ +/* In s_lrint.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_llrintf.c b/REORG.TODO/sysdeps/alpha/fpu/s_llrintf.c new file mode 100644 index 0000000000..18f2885ef7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_llrintf.c @@ -0,0 +1 @@ +/* In s_lrintf.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_llround.c b/REORG.TODO/sysdeps/alpha/fpu/s_llround.c new file mode 100644 index 0000000000..b212fbd8e5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_llround.c @@ -0,0 +1 @@ +/* In s_lround.c. */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_llroundf.c b/REORG.TODO/sysdeps/alpha/fpu/s_llroundf.c new file mode 100644 index 0000000000..73bdf3103f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_llroundf.c @@ -0,0 +1 @@ +/* In s_lroundf.c. */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_lrint.c b/REORG.TODO/sysdeps/alpha/fpu/s_lrint.c new file mode 100644 index 0000000000..2a644c57df --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_lrint.c @@ -0,0 +1,47 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __llrint not___llrint +#define llrint not_llrint +#include <math.h> +#include <math_ldbl_opt.h> +#undef __llrint +#undef llrint + +long int +__lrint (double x) +{ + long ret; + + __asm ("cvttq/svd %1,%0" : "=&f"(ret) : "f"(x)); + + return ret; +} + +strong_alias (__lrint, __llrint) +weak_alias (__lrint, lrint) +weak_alias (__llrint, llrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__lrint, __lrintl) +strong_alias (__lrint, __llrintl) +weak_alias (__lrintl, lrintl) +weak_alias (__llrintl, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1); +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_lrintf.c b/REORG.TODO/sysdeps/alpha/fpu/s_lrintf.c new file mode 100644 index 0000000000..cfcf35caae --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_lrintf.c @@ -0,0 +1,38 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __llrintf not___llrintf +#define llrintf not_llrintf +#include <math.h> +#undef __llrintf +#undef llrintf + +long int +__lrintf (float x) +{ + double tmp; + long ret; + + __asm ("cvtst/s %2,%1\n\tcvttq/svd %1,%0" + : "=&f"(ret), "=&f"(tmp) : "f"(x)); + + return ret; +} + +strong_alias (__lrintf, __llrintf) +weak_alias (__lrintf, lrintf) +weak_alias (__llrintf, llrintf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_lround.c b/REORG.TODO/sysdeps/alpha/fpu/s_lround.c new file mode 100644 index 0000000000..78a067daf1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_lround.c @@ -0,0 +1,47 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __llround not___llround +#define llround not_llround +#include <math.h> +#include <math_ldbl_opt.h> +#undef __llround +#undef llround + +long int +__lround (double x) +{ + double adj, y; + + adj = copysign (0.5, x); + asm("addt/suc %1,%2,%0" : "=&f"(y) : "f"(x), "f"(adj)); + return y; +} + +strong_alias (__lround, __llround) +weak_alias (__lround, lround) +weak_alias (__llround, llround) +#ifdef NO_LONG_DOUBLE +strong_alias (__lround, __lroundl) +strong_alias (__lround, __llroundl) +weak_alias (__lroundl, lroundl) +weak_alias (__llroundl, llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1); +compat_symbol (libm, __llround, llroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_lroundf.c b/REORG.TODO/sysdeps/alpha/fpu/s_lroundf.c new file mode 100644 index 0000000000..37df944224 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_lroundf.c @@ -0,0 +1,37 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __llroundf not___llroundf +#define llroundf not_llroundf +#include <math.h> +#undef __llroundf +#undef llroundf + + +long int +__lroundf (float x) +{ + float adj, y; + + adj = copysignf (0.5f, x); + asm("adds/suc %1,%2,%0" : "=&f"(y) : "f"(x), "f"(adj)); + return y; +} + +strong_alias (__lroundf, __llroundf) +weak_alias (__lroundf, lroundf) +weak_alias (__llroundf, llroundf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_nearbyint.c b/REORG.TODO/sysdeps/alpha/fpu/s_nearbyint.c new file mode 100644 index 0000000000..c3f204fff1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_nearbyint.c @@ -0,0 +1,25 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math_ldbl_opt.h> + +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c> + +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_rint.c b/REORG.TODO/sysdeps/alpha/fpu/s_rint.c new file mode 100644 index 0000000000..fca35cf961 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_rint.c @@ -0,0 +1,51 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + + +double +__rint (double x) +{ + if (isnan (x)) + return x + x; + + if (isless (fabs (x), 9007199254740992.0)) /* 1 << DBL_MANT_DIG */ + { + double tmp1, new_x; + __asm ("cvttq/svid %2,%1\n\t" + "cvtqt/d %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1) + : "f"(x)); + + /* rint(-0.1) == -0, and in general we'll always have the same + sign as our input. */ + x = copysign(new_x, x); + } + return x; +} + +weak_alias (__rint, rint) +#ifdef NO_LONG_DOUBLE +strong_alias (__rint, __rintl) +weak_alias (__rint, rintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __rint, rintl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_rintf.c b/REORG.TODO/sysdeps/alpha/fpu/s_rintf.c new file mode 100644 index 0000000000..b6e8d2dd07 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_rintf.c @@ -0,0 +1,50 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + + +float +__rintf (float x) +{ + if (isnanf (x)) + return x + x; + + if (isless (fabsf (x), 16777216.0f)) /* 1 << FLT_MANT_DIG */ + { + /* Note that Alpha S_Floating is stored in registers in a + restricted T_Floating format, so we don't even need to + convert back to S_Floating in the end. The initial + conversion to T_Floating is needed to handle denormals. */ + + float tmp1, tmp2, new_x; + + __asm ("cvtst/s %3,%2\n\t" + "cvttq/svid %2,%1\n\t" + "cvtqt/d %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2) + : "f"(x)); + + /* rint(-0.1) == -0, and in general we'll always have the same + sign as our input. */ + x = copysignf(new_x, x); + } + return x; +} + +weak_alias (__rintf, rintf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_trunc.c b/REORG.TODO/sysdeps/alpha/fpu/s_trunc.c new file mode 100644 index 0000000000..68a013d222 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_trunc.c @@ -0,0 +1,51 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + + +/* Use the chopped rounding mode conversion instructions to implement trunc. */ + +double +__trunc (double x) +{ + double two52 = copysign (0x1.0p52, x); + double r, tmp; + + if (isgreaterequal (fabs (x), 0x1.0p52)) + return x; + + __asm ( + "addt/suc %2, %3, %1\n\tsubt/suc %1, %3, %0" + : "=&f"(r), "=&f"(tmp) + : "f"(x), "f"(two52)); + + /* trunc(-0) == -0, and in general we'll always have the same + sign as our input. */ + return copysign (r, x); +} + +weak_alias (__trunc, trunc) +#ifdef NO_LONG_DOUBLE +strong_alias (__trunc, __truncl) +weak_alias (__trunc, truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_truncf.c b/REORG.TODO/sysdeps/alpha/fpu/s_truncf.c new file mode 100644 index 0000000000..ca47fdc2b5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_truncf.c @@ -0,0 +1,43 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + + +/* Use the chopped rounding mode conversion instructions to implement trunc. */ + +float +__truncf (float x) +{ + float two23 = copysignf (0x1.0p23, x); + float r, tmp; + + if (isgreaterequal (fabsf (x), 0x1.0p23)) + return x; + + __asm ( + "adds/suc %2, %3, %1\n\tsubs/suc %1, %3, %0" + : "=&f"(r), "=&f"(tmp) + : "f"(x), "f"(two23)); + + /* trunc(-0) == -0, and in general we'll always have the same + sign as our input. */ + return copysignf (r, x); +} + +weak_alias (__truncf, truncf) diff --git a/REORG.TODO/sysdeps/alpha/gccframe.h b/REORG.TODO/sysdeps/alpha/gccframe.h new file mode 100644 index 0000000000..769cf2da65 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/gccframe.h @@ -0,0 +1,21 @@ +/* Definition of object in frame unwind info. alpha version. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define FIRST_PSEUDO_REGISTER 64 + +#include <sysdeps/generic/gccframe.h> diff --git a/REORG.TODO/sysdeps/alpha/hp-timing.h b/REORG.TODO/sysdeps/alpha/hp-timing.h new file mode 100644 index 0000000000..84380f34a4 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/hp-timing.h @@ -0,0 +1,46 @@ +/* High precision, low overhead timing functions. Alpha version. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +/* We always have the timestamp register, but it's got only a 4 second + range. Use it for ld.so profiling only. */ +#define HP_TIMING_AVAIL (0) +#define HP_SMALL_TIMING_AVAIL (1) + +/* We indeed have inlined functions. */ +#define HP_TIMING_INLINE (1) + +/* We use 32 bit values for the times. */ +typedef unsigned int hp_timing_t; + +/* The "rpcc" instruction returns a 32-bit counting half and a 32-bit + "virtual cycle counter displacement". Subtracting the two gives us + a virtual cycle count. */ +#define HP_TIMING_NOW(VAR) \ + do { \ + unsigned long int x_; \ + asm volatile ("rpcc %0" : "=r"(x_)); \ + (VAR) = (int) (x_) - (int) (x_ >> 32); \ + } while (0) + +#include <hp-timing-common.h> + +#endif /* hp-timing.h */ diff --git a/REORG.TODO/sysdeps/alpha/htonl.S b/REORG.TODO/sysdeps/alpha/htonl.S new file mode 100644 index 0000000000..17b25c3424 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/htonl.S @@ -0,0 +1,43 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(htonl) +#ifdef PROF + ldgp gp, 0(pv) + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at + .prologue 1 +#else + .prologue 0 +#endif + + inslh a0, 7, t0 # t0 = 0000000000AABBCC + inswl a0, 3, t1 # t1 = 000000CCDD000000 + or t1, t0, t1 # t1 = 000000CCDDAABBCC + srl t1, 16, t2 # t2 = 0000000000CCDDAA + zapnot t1, 0x0A, t0 # t0 = 00000000DD00BB00 + zapnot t2, 0x05, t3 # t3 = 0000000000CC00AA + addl t0, t3, v0 # v0 = ssssssssDDCCBBAA + ret + + END(htonl) + +weak_alias (htonl, ntohl) diff --git a/REORG.TODO/sysdeps/alpha/htons.S b/REORG.TODO/sysdeps/alpha/htons.S new file mode 100644 index 0000000000..e61322d003 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/htons.S @@ -0,0 +1,39 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(htons) +#ifdef PROF + ldgp gp, 0(pv) + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at + .prologue 1 +#else + .prologue 0 +#endif + + extwh a0, 7, t1 # t1 = bb00 + extbl a0, 1, v0 # v0 = 00aa + bis v0, t1, v0 # v0 = bbaa + ret + + END(htons) + +weak_alias (htons, ntohs) diff --git a/REORG.TODO/sysdeps/alpha/jmpbuf-offsets.h b/REORG.TODO/sysdeps/alpha/jmpbuf-offsets.h new file mode 100644 index 0000000000..a9e752999f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/jmpbuf-offsets.h @@ -0,0 +1,35 @@ +/* Private macros for accessing __jmp_buf contents. Alpha version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define JB_S0 0 +#define JB_S1 1 +#define JB_S2 2 +#define JB_S3 3 +#define JB_S4 4 +#define JB_S5 5 +#define JB_PC 6 +#define JB_FP 7 +#define JB_SP 8 +#define JB_F2 9 +#define JB_F3 10 +#define JB_F4 11 +#define JB_F5 12 +#define JB_F6 13 +#define JB_F7 14 +#define JB_F8 15 +#define JB_F9 16 diff --git a/REORG.TODO/sysdeps/alpha/jmpbuf-unwind.h b/REORG.TODO/sysdeps/alpha/jmpbuf-unwind.h new file mode 100644 index 0000000000..b90c81c481 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/jmpbuf-unwind.h @@ -0,0 +1,47 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <setjmp.h> +#include <jmpbuf-offsets.h> +#include <stdint.h> +#include <unwind.h> +#include <sysdep.h> + +/* Test if longjmp to JMPBUF would unwind the frame containing a local + variable at ADDRESS. */ +#define _JMPBUF_UNWINDS(_jmpbuf, _address, _demangle) \ + ((void *)(_address) < (void *) _demangle ((_jmpbuf)[JB_SP])) + +#define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \ + _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) _Unwind_GetCFA (_context), _adj) + +static inline uintptr_t __attribute__ ((unused)) +_jmpbuf_sp (__jmp_buf regs) +{ + uintptr_t sp = regs[JB_SP]; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (sp); +#endif + return sp; +} + +#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \ + ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj)) + +/* We use the normal longjmp for unwinding. */ +#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val) diff --git a/REORG.TODO/sysdeps/alpha/ldiv.S b/REORG.TODO/sysdeps/alpha/ldiv.S new file mode 100644 index 0000000000..a32264b143 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/ldiv.S @@ -0,0 +1,218 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + +#undef FRAME +#ifdef __alpha_fix__ +#define FRAME 0 +#else +#define FRAME 16 +#endif + +#undef X +#undef Y +#define X $17 +#define Y $18 + + .set noat + + .align 4 + .globl ldiv + .ent ldiv +ldiv: + .frame sp, FRAME, ra +#if FRAME > 0 + lda sp, -FRAME(sp) +#endif +#ifdef PROF + .set macro + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .set nomacro + .prologue 1 +#else + .prologue 0 +#endif + + beq Y, $divbyzero + excb + mf_fpcr $f10 + + _ITOFT2 X, $f0, 0, Y, $f1, 8 + + .align 4 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + unop + + /* Check to see if X fit in the double as an exact value. */ + sll X, (64-53), AT + sra AT, (64-53), AT + cmpeq X, AT, AT + beq AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert and clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f10 + _FTOIT $f0, $0, 0 + +$egress: + mulq $0, Y, $1 + subq X, $1, $1 + + stq $0, 0($16) + stq $1, 8($16) + mov $16, $0 + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret + + .align 4 +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + +#define Q v0 /* quotient */ +#define R t0 /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + /* The fixup code below can only handle unsigned values. */ + or X, Y, AT + mov $31, t5 + blt AT, $fix_sign_in +$fix_sign_in_ret1: + cvttq/c $f0, $f0 + + _FTOIT $f0, Q, 8 +$fix_sign_in_ret2: + mulq Q, Y, QY + excb + mt_fpcr $f10 + + .align 4 + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + negq Q, t4 + cmovlbs t5, t4, Q + br $egress + + .align 4 + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 +$fix_sign_in: + /* If we got here, then X|Y is negative. Need to adjust everything + such that we're doing unsigned division in the fixup loop. */ + /* T5 is true if result should be negative. */ + xor X, Y, AT + cmplt AT, 0, t5 + cmplt X, 0, AT + negq X, t0 + + cmovne AT, t0, X + cmplt Y, 0, AT + negq Y, t0 + + cmovne AT, t0, Y + blbc t5, $fix_sign_in_ret1 + + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + .align 3 + negq Q, Q + br $fix_sign_in_ret2 + +$divbyzero: + mov a0, v0 + lda a0, GEN_INTDIV + call_pal PAL_gentrap + stq zero, 0(v0) + stq zero, 8(v0) + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret + + .end ldiv + +weak_alias (ldiv, lldiv) +weak_alias (ldiv, imaxdiv) diff --git a/REORG.TODO/sysdeps/alpha/ldsodefs.h b/REORG.TODO/sysdeps/alpha/ldsodefs.h new file mode 100644 index 0000000000..09dfed764f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/ldsodefs.h @@ -0,0 +1,42 @@ +/* Run-time dynamic linker data structures for loaded ELF shared objects. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef __LDSODEFS_H + +#include <elf.h> + +struct La_alpha_regs; +struct La_alpha_retval; + +#define ARCH_PLTENTER_MEMBERS \ + Elf64_Addr (*alpha_gnu_pltenter) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_alpha_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); + +#define ARCH_PLTEXIT_MEMBERS \ + unsigned int (*alpha_gnu_pltexit) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + const struct La_alpha_regs *, \ + struct La_alpha_retval *, \ + const char *); + +#include_next <ldsodefs.h> + +#endif diff --git a/REORG.TODO/sysdeps/alpha/libc-tls.c b/REORG.TODO/sysdeps/alpha/libc-tls.c new file mode 100644 index 0000000000..392e6c7289 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/libc-tls.c @@ -0,0 +1,32 @@ +/* Thread-local storage handling in the ELF dynamic linker. Alpha version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <csu/libc-tls.c> +#include <dl-tls.h> + +/* On Alpha, linker optimizations are not required, so __tls_get_addr + can be called even in statically linked binaries. In this case module + must be always 1 and PT_TLS segment exist in the binary, otherwise it + would not link. */ + +void * +__tls_get_addr (tls_index *ti) +{ + dtv_t *dtv = THREAD_DTV (); + return (char *) dtv[1].pointer.val + ti->ti_offset; +} diff --git a/REORG.TODO/sysdeps/alpha/lldiv.S b/REORG.TODO/sysdeps/alpha/lldiv.S new file mode 100644 index 0000000000..80c450a3fc --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/lldiv.S @@ -0,0 +1 @@ +/* lldiv is the same as ldiv on the Alpha. */ diff --git a/REORG.TODO/sysdeps/alpha/lshift.S b/REORG.TODO/sysdeps/alpha/lshift.S new file mode 100644 index 0000000000..a9386e50c0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/lshift.S @@ -0,0 +1,107 @@ + # Alpha 21064 __mpn_lshift -- + + # Copyright (C) 1994-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, + # it would take 4 cycles/limb. It should be possible to get down to 3 + # cycles/limb since both ldq and stq can be paired with the other used + # instructions. But there are many restrictions in the 21064 pipeline that + # makes it hard, if not impossible, to get down to 3 cycles/limb: + + # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. + # 2. Only aligned instruction pairs can be paired. + # 3. The store buffer or silo might not be able to deal with the bandwidth. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .frame $30,0,$26,0 + + s8addq $18,$17,$17 # make r17 point at end of s1 + ldq $4,-8($17) # load first limb + subq $17,8,$17 + subq $31,$19,$7 + s8addq $18,$16,$16 # make r16 point at end of RES + subq $18,1,$18 + and $18,4-1,$20 # number of limbs in first loop + srl $4,$7,$0 # compute function result + + beq $20,.L0 + subq $18,$20,$18 + + .align 3 +.Loop0: + ldq $3,-8($17) + subq $16,8,$16 + subq $17,8,$17 + subq $20,1,$20 + sll $4,$19,$5 + srl $3,$7,$6 + bis $3,$3,$4 + bis $5,$6,$8 + stq $8,0($16) + bne $20,.Loop0 + +.L0: beq $18,.Lend + + .align 3 +.Loop: ldq $3,-8($17) + subq $16,32,$16 + subq $18,4,$18 + sll $4,$19,$5 + srl $3,$7,$6 + + ldq $4,-16($17) + sll $3,$19,$1 + bis $5,$6,$8 + stq $8,24($16) + srl $4,$7,$2 + + ldq $3,-24($17) + sll $4,$19,$5 + bis $1,$2,$8 + stq $8,16($16) + srl $3,$7,$6 + + ldq $4,-32($17) + sll $3,$19,$1 + bis $5,$6,$8 + stq $8,8($16) + srl $4,$7,$2 + + subq $17,32,$17 + bis $1,$2,$8 + stq $8,0($16) + + bgt $18,.Loop + +.Lend: sll $4,$19,$8 + stq $8,-8($16) + ret $31,($26),1 + .end __mpn_lshift diff --git a/REORG.TODO/sysdeps/alpha/machine-gmon.h b/REORG.TODO/sysdeps/alpha/machine-gmon.h new file mode 100644 index 0000000000..6937f6bb99 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/machine-gmon.h @@ -0,0 +1,25 @@ +/* Machine-specific calling sequence for `mcount' profiling function. alpha + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define _MCOUNT_DECL(from, self) \ + void __mcount (u_long from, u_long self) + +/* Call __mcount with our the return PC for our caller, and the return + PC our caller will return to. Empty since we use an assembly stub + instead. */ +#define MCOUNT diff --git a/REORG.TODO/sysdeps/alpha/memchr.c b/REORG.TODO/sysdeps/alpha/memchr.c new file mode 100644 index 0000000000..402088dc9e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/memchr.c @@ -0,0 +1,177 @@ +/* Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +typedef unsigned long word; + +static inline word +ldq_u(const void *s) +{ + return *(const word *)((word)s & -8); +} + +#define unlikely(X) __builtin_expect ((X), 0) +#define prefetch(X) __builtin_prefetch ((void *)(X), 0) + +#define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X)) +#define find(X, Y) cmpbeq0 ((X) ^ (Y)) + +/* Search no more than N bytes of S for C. */ + +void * +__memchr (const void *s, int xc, size_t n) +{ + const word *s_align; + word t, current, found, mask, offset; + + if (unlikely (n == 0)) + return 0; + + current = ldq_u (s); + + /* Replicate low byte of XC into all bytes of C. */ + t = xc & 0xff; /* 0000000c */ + t = (t << 8) | t; /* 000000cc */ + t = (t << 16) | t; /* 0000cccc */ + const word c = (t << 32) | t; /* cccccccc */ + + /* Align the source, and decrement the count by the number + of bytes searched in the first word. */ + s_align = (const word *)((word)s & -8); + { + size_t inc = n + ((word)s & 7); + n = inc | -(inc < n); + } + + /* Deal with misalignment in the first word for the comparison. */ + mask = (1ul << ((word)s & 7)) - 1; + + /* If the entire string fits within one word, we may need masking + at both the front and the back of the string. */ + if (unlikely (n <= 8)) + { + mask |= -1ul << n; + goto last_quad; + } + + found = find (current, c) & ~mask; + if (unlikely (found)) + goto found_it; + + s_align++; + n -= 8; + + /* If the block is sufficiently large, align to cacheline and prefetch. */ + if (unlikely (n >= 256)) + { + /* Prefetch 3 cache lines beyond the one we're working on. */ + prefetch (s_align + 8); + prefetch (s_align + 16); + prefetch (s_align + 24); + + while ((word)s_align & 63) + { + current = *s_align; + found = find (current, c); + if (found) + goto found_it; + s_align++; + n -= 8; + } + + /* Within each cacheline, advance the load for the next word + before the test for the previous word is complete. This + allows us to hide the 3 cycle L1 cache load latency. We + only perform this advance load within a cacheline to prevent + reading across page boundary. */ +#define CACHELINE_LOOP \ + do { \ + word i, next = s_align[0]; \ + for (i = 0; i < 7; ++i) \ + { \ + current = next; \ + next = s_align[1]; \ + found = find (current, c); \ + if (unlikely (found)) \ + goto found_it; \ + s_align++; \ + } \ + current = next; \ + found = find (current, c); \ + if (unlikely (found)) \ + goto found_it; \ + s_align++; \ + n -= 64; \ + } while (0) + + /* While there's still lots more data to potentially be read, + continue issuing prefetches for the 4th cacheline out. */ + while (n >= 256) + { + prefetch (s_align + 24); + CACHELINE_LOOP; + } + + /* Up to 3 cache lines remaining. Continue issuing advanced + loads, but stop prefetching. */ + while (n >= 64) + CACHELINE_LOOP; + + /* We may have exhausted the buffer. */ + if (n == 0) + return NULL; + } + + /* Quadword aligned loop. */ + current = *s_align; + while (n > 8) + { + found = find (current, c); + if (unlikely (found)) + goto found_it; + current = *++s_align; + n -= 8; + } + + /* The last word may need masking at the tail of the compare. */ + mask = -1ul << n; + last_quad: + found = find (current, c) & ~mask; + if (found == 0) + return NULL; + + found_it: +#ifdef __alpha_cix__ + offset = __builtin_alpha_cttz (found); +#else + /* Extract LSB. */ + found &= -found; + + /* Binary search for the LSB. */ + offset = (found & 0x0f ? 0 : 4); + offset += (found & 0x33 ? 0 : 2); + offset += (found & 0x55 ? 0 : 1); +#endif + + return (void *)((word)s_align + offset); +} + +#ifdef weak_alias +weak_alias (__memchr, memchr) +#endif +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/alpha/memset.S b/REORG.TODO/sysdeps/alpha/memset.S new file mode 100644 index 0000000000..2e061b90e6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/memset.S @@ -0,0 +1,127 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Fill a block of memory with a character. Optimized for the Alpha + architecture: + + - memory accessed as aligned quadwords only + - destination memory not read unless needed for good cache behaviour + - basic blocks arranged to optimize branch prediction for full-quadword + aligned memory blocks. + - partial head and tail quadwords constructed with byte-mask instructions + + This is generally scheduled for the EV5 (got to look out for my own + interests :-), but with EV4 needs in mind. There *should* be no more + stalls for the EV4 than there are for the EV5. +*/ + + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + .type memset, @function + .globl memset + .usepv memset, USEPV_PROF + + cfi_startproc + + /* On entry to this basic block: + t3 == loop counter + t4 == bytes in partial final word + a0 == possibly misaligned destination pointer + a1 == replicated source character */ + + .align 3 +memset_loop: + beq t3, $tail + blbc t3, 0f # skip single store if count even + + stq_u a1, 0(a0) # e0 : store one word + subq t3, 1, t3 # .. e1 : + addq a0, 8, a0 # e0 : + beq t3, $tail # .. e1 : + +0: stq_u a1, 0(a0) # e0 : store two words + subq t3, 2, t3 # .. e1 : + stq_u a1, 8(a0) # e0 : + addq a0, 16, a0 # .. e1 : + bne t3, 0b # e1 : + +$tail: bne t4, 1f # is there a tail to do? + ret # no + + .align 3 +1: ldq_u t0, 0(a0) # e1 : yes, load original data + mskql a1, t4, t1 # .. e0 : + mskqh t0, t4, t0 # e0 : + or t0, t1, t0 # e1 (stall) + stq_u t0, 0(a0) # e0 : + ret # .. e1 : + +memset: +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + and a1, 0xff, a1 # e0 : zero extend input character + mov a0, v0 # .. e1 : move return value in place + sll a1, 8, t0 # e0 : begin replicating the char + beq a2, $done # .. e1 : early exit for zero-length store + or t0, a1, a1 # e0 : + and a0, 7, t1 # .. e1 : dest misalignment + sll a1, 16, t0 # e0 : + addq a2, t1, a2 # .. e1 : add dest misalignment to count + or t0, a1, a1 # e0 : + srl a2, 3, t3 # .. e1 : loop = count >> 3 + sll a1, 32, t0 # e0 : + and a2, 7, t4 # .. e1 : find number of bytes in tail + or t0, a1, a1 # e0 : character replication done + + beq t1, memset_loop # .. e1 : aligned head, jump right in + + ldq_u t0, 0(a0) # e1 : load original data to mask into + mskqh a1, a0, t1 # .. e0 : + + cmpult a2, 8, t2 # e0 : is this a sub-word set? + bne t2, $oneq # .. e1 (zdb) + + mskql t0, a0, t0 # e0 : we span words. finish this partial + subq t3, 1, t3 # .. e1 : + addq a0, 8, a0 # e0 : + or t0, t1, t0 # .. e1 : + stq_u t0, -8(a0) # e0 : + br memset_loop # .. e1 : + + .align 3 +$oneq: + mskql t1, a2, t1 # e0 : entire operation within one word + mskql t0, a0, t2 # e0 : + mskqh t0, a2, t3 # e0 : + or t1, t2, t0 # .. e1 : + or t0, t3, t0 # e1 : + stq_u t0, 0(a0) # e0 (stall) + +$done: ret + + cfi_endproc +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/alpha/memusage.h b/REORG.TODO/sysdeps/alpha/memusage.h new file mode 100644 index 0000000000..83a0ea531b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/memusage.h @@ -0,0 +1,20 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define GETSP() ({ register uintptr_t stack_ptr asm ("$30"); stack_ptr; }) + +#include <sysdeps/generic/memusage.h> diff --git a/REORG.TODO/sysdeps/alpha/mul_1.S b/REORG.TODO/sysdeps/alpha/mul_1.S new file mode 100644 index 0000000000..099e22b441 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/mul_1.S @@ -0,0 +1,83 @@ + # Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store + # the result in a second limb vector. + + # Copyright (C) 1992-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5. + + # To improve performance for long multiplications, we would use + # 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use + # these instructions without slowing down the general code: 1. We can + # only have two prefetches in operation at any time in the Alpha + # architecture. 2. There will seldom be any special alignment + # between RES_PTR and S1_PTR. Maybe we can simply divide the current + # loop into an inner and outer loop, having the inner loop handle + # exactly one prefetch block? + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_mul_1 + .ent __mpn_mul_1 2 +__mpn_mul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + bic $31,$31,$4 # clear cy_limb + umulh $2,$19,$0 # $0 = prod_high + beq $18,Lend1 # jump if size was == 1 + ldq $2,8($17) # $2 = s1_limb + subq $18,1,$18 # size-- + stq $3,0($16) + beq $18,Lend2 # jump if size was == 2 + + .align 3 +Loop: mulq $2,$19,$3 # $3 = prod_low + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,16($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + stq $3,8($16) + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $16,8,$16 # res_ptr++ + bne $18,Loop + +Lend2: mulq $2,$19,$3 # $3 = prod_low + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + stq $3,8($16) + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +Lend1: stq $3,0($16) + ret $31,($26),1 + + .end __mpn_mul_1 diff --git a/REORG.TODO/sysdeps/alpha/nptl/Makefile b/REORG.TODO/sysdeps/alpha/nptl/Makefile new file mode 100644 index 0000000000..90f5fcf350 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/Makefile @@ -0,0 +1,20 @@ +# Copyright (C) 2003-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library. If not, see +# <http://www.gnu.org/licenses/>. + +ifeq ($(subdir),csu) +gen-as-const-headers += tcb-offsets.sym +endif diff --git a/REORG.TODO/sysdeps/alpha/nptl/bits/pthreadtypes-arch.h b/REORG.TODO/sysdeps/alpha/nptl/bits/pthreadtypes-arch.h new file mode 100644 index 0000000000..b6f6cb1347 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/bits/pthreadtypes-arch.h @@ -0,0 +1,59 @@ +/* Machine-specific pthread type layouts. Alpha version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_PTHREADTYPES_ARCH_H +#define _BITS_PTHREADTYPES_ARCH_H 1 + +#define __SIZEOF_PTHREAD_ATTR_T 56 +#define __SIZEOF_PTHREAD_MUTEX_T 40 +#define __SIZEOF_PTHREAD_MUTEXATTR_T 4 +#define __SIZEOF_PTHREAD_COND_T 48 +#define __SIZEOF_PTHREAD_CONDATTR_T 4 +#define __SIZEOF_PTHREAD_RWLOCK_T 56 +#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 +#define __SIZEOF_PTHREAD_BARRIER_T 32 +#define __SIZEOF_PTHREAD_BARRIERATTR_T 4 + +/* Definitions for internal mutex struct. */ +#define __PTHREAD_COMPAT_PADDING_MID +#define __PTHREAD_COMPAT_PADDING_END +#define __PTHREAD_MUTEX_LOCK_ELISION 0 + +#define __LOCK_ALIGNMENT +#define __ONCE_ALIGNMENT + +struct __pthread_rwlock_arch_t +{ + unsigned int __readers; + unsigned int __writers; + unsigned int __wrphase_futex; + unsigned int __writers_futex; + unsigned int __pad3; + unsigned int __pad4; + int __cur_writer; + int __shared; + unsigned long int __pad1; + unsigned long int __pad2; + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned int __flags; +}; + +#define __PTHREAD_RWLOCK_ELISION_EXTRA 0 + +#endif /* bits/pthreadtypes.h */ diff --git a/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_lock.S b/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_lock.S new file mode 100644 index 0000000000..5a28bdce3a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_lock.S @@ -0,0 +1,44 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@twiddle.net>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + + + .text + .align 4 + + .globl pthread_spin_lock + .ent pthread_spin_lock +pthread_spin_lock: + .frame $sp, 0, $26, 0 + .prologue 0 + +0: ldl_l $1, 0($16) + lda $2, 1 + lda $0, 0 + bne $1, 1f + + stl_c $2, 0($16) + beq $2, 1f + mb + ret + +1: ldl $1, 0($16) + bne $1, 1b + unop + br 0b + + .end pthread_spin_lock diff --git a/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_trylock.S b/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_trylock.S new file mode 100644 index 0000000000..a5eab1353d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_trylock.S @@ -0,0 +1,45 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@twiddle.net>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + + +#define _ERRNO_H 1 +#include <bits/errno.h> + + .text + .align 4 + + .globl pthread_spin_trylock + .ent pthread_spin_trylock +pthread_spin_trylock: + .frame $sp, 0, $26, 0 + .prologue 0 + +0: ldl_l $1, 0($16) + lda $2, 1 + lda $0, EBUSY + bne $1, 1f + + stl_c $2, 0($16) + beq $2, 2f + mb + lda $0, 0 + +1: ret +2: br 0b + + .end pthread_spin_trylock diff --git a/REORG.TODO/sysdeps/alpha/nptl/pthreaddef.h b/REORG.TODO/sysdeps/alpha/nptl/pthreaddef.h new file mode 100644 index 0000000000..581dd60a39 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/pthreaddef.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Default stack size. */ +#define ARCH_STACK_DEFAULT_SIZE (4 * 1024 * 1024) + +/* Required stack pointer alignment at beginning. The ABI requires 16. */ +#define STACK_ALIGN 16 + +/* Minimal stack size after allocating thread descriptor and guard size. */ +#define MINIMAL_REST_STACK 4096 + +/* Alignment requirement for TCB. */ +#define TCB_ALIGNMENT 16 + +/* Location of current stack frame. */ +#define CURRENT_STACK_FRAME __builtin_frame_address (0) diff --git a/REORG.TODO/sysdeps/alpha/nptl/tcb-offsets.sym b/REORG.TODO/sysdeps/alpha/nptl/tcb-offsets.sym new file mode 100644 index 0000000000..1005621b37 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/tcb-offsets.sym @@ -0,0 +1,13 @@ +#include <sysdep.h> +#include <tls.h> + +-- + +-- Abuse tls.h macros to derive offsets relative to the thread register. +-- # define __builtin_thread_pointer() ((void *) 0) +-- # define thread_offsetof(mem) ((void *) &THREAD_SELF->mem - (void *) 0) +-- Ho hum, this doesn't work in gcc4, so Know Things about THREAD_SELF +#define thread_offsetof(mem) (long)(offsetof(struct pthread, mem) - sizeof(struct pthread)) + +MULTIPLE_THREADS_OFFSET thread_offsetof (header.multiple_threads) +TID_OFFSET thread_offsetof (tid) diff --git a/REORG.TODO/sysdeps/alpha/nptl/tls.h b/REORG.TODO/sysdeps/alpha/nptl/tls.h new file mode 100644 index 0000000000..c16f5da04a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/tls.h @@ -0,0 +1,132 @@ +/* Definition for thread-local data handling. NPTL/Alpha version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _TLS_H +#define _TLS_H 1 + +# include <dl-sysdep.h> + +#ifndef __ASSEMBLER__ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <dl-dtv.h> + +/* Get system call information. */ +# include <sysdep.h> + +/* The TP points to the start of the thread blocks. */ +# define TLS_DTV_AT_TP 1 +# define TLS_TCB_AT_TP 0 + +/* Get the thread descriptor definition. */ +# include <nptl/descr.h> + +typedef struct +{ + dtv_t *dtv; + void *__private; +} tcbhead_t; + +/* This is the size of the initial TCB. */ +# define TLS_INIT_TCB_SIZE sizeof (tcbhead_t) + +/* Alignment requirements for the initial TCB. */ +# define TLS_INIT_TCB_ALIGN 16 + +/* This is the size of the TCB. */ +# define TLS_TCB_SIZE sizeof (tcbhead_t) + +/* This is the size we need before TCB. */ +# define TLS_PRE_TCB_SIZE sizeof (struct pthread) + +/* Alignment requirements for the TCB. */ +# define TLS_TCB_ALIGN 16 + +/* Install the dtv pointer. The pointer passed is to the element with + index -1 which contain the length. */ +# define INSTALL_DTV(tcbp, dtvp) \ + (((tcbhead_t *) (tcbp))->dtv = (dtvp) + 1) + +/* Install new dtv for current thread. */ +# define INSTALL_NEW_DTV(dtv) \ + (THREAD_DTV() = (dtv)) + +/* Return dtv of given thread descriptor. */ +# define GET_DTV(tcbp) \ + (((tcbhead_t *) (tcbp))->dtv) + +/* Code to initially initialize the thread pointer. This might need + special attention since 'errno' is not yet available and if the + operation can cause a failure 'errno' must not be touched. */ +# define TLS_INIT_TP(tcbp) \ + (__builtin_set_thread_pointer ((void *)(tcbp)), NULL) + +/* Value passed to 'clone' for initialization of the thread register. */ +# define TLS_DEFINE_INIT_TP(tp, pd) void *tp = (pd) + 1 + +/* Return the address of the dtv for the current thread. */ +# define THREAD_DTV() \ + (((tcbhead_t *) __builtin_thread_pointer ())->dtv) + +/* Return the thread descriptor for the current thread. */ +# define THREAD_SELF \ + ((struct pthread *)__builtin_thread_pointer () - 1) + +/* Magic for libthread_db to know how to do THREAD_SELF. */ +# define DB_THREAD_SELF \ + REGISTER (64, 64, 32 * 8, -sizeof (struct pthread)) + +/* Access to data in the thread descriptor is easy. */ +#define THREAD_GETMEM(descr, member) \ + descr->member +#define THREAD_GETMEM_NC(descr, member, idx) \ + descr->member[idx] +#define THREAD_SETMEM(descr, member, value) \ + descr->member = (value) +#define THREAD_SETMEM_NC(descr, member, idx, value) \ + descr->member[idx] = (value) + +/* Get and set the global scope generation counter in struct pthread. */ +#define THREAD_GSCOPE_FLAG_UNUSED 0 +#define THREAD_GSCOPE_FLAG_USED 1 +#define THREAD_GSCOPE_FLAG_WAIT 2 +#define THREAD_GSCOPE_RESET_FLAG() \ + do \ + { int __res \ + = atomic_exchange_rel (&THREAD_SELF->header.gscope_flag, \ + THREAD_GSCOPE_FLAG_UNUSED); \ + if (__res == THREAD_GSCOPE_FLAG_WAIT) \ + lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \ + } \ + while (0) +#define THREAD_GSCOPE_SET_FLAG() \ + do \ + { \ + THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED; \ + atomic_write_barrier (); \ + } \ + while (0) +#define THREAD_GSCOPE_WAIT() \ + GL(dl_wait_lookup_done) () + +#else /* __ASSEMBLER__ */ +# include <tcb-offsets.h> +#endif /* __ASSEMBLER__ */ + +#endif /* tls.h */ diff --git a/REORG.TODO/sysdeps/alpha/nscd-types.h b/REORG.TODO/sysdeps/alpha/nscd-types.h new file mode 100644 index 0000000000..c222ef1204 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nscd-types.h @@ -0,0 +1,21 @@ +/* Types for the NSCD implementation. Alpha version. + Copyright (c) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +typedef int64_t nscd_ssize_t; diff --git a/REORG.TODO/sysdeps/alpha/preconfigure b/REORG.TODO/sysdeps/alpha/preconfigure new file mode 100644 index 0000000000..cb02cffc3a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/preconfigure @@ -0,0 +1,3 @@ +case "$machine" in +alpha*) base_machine=alpha machine=alpha/$machine +esac diff --git a/REORG.TODO/sysdeps/alpha/rawmemchr.S b/REORG.TODO/sysdeps/alpha/rawmemchr.S new file mode 100644 index 0000000000..d32d037bfa --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/rawmemchr.S @@ -0,0 +1,89 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return pointer to first occurrence of CH in STR. */ + +#include <sysdep.h> + + .set noreorder + .set noat + +ENTRY(__rawmemchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + zapnot a1, 1, a1 # e0 : zero extend the search character + ldq_u t0, 0(a0) # .. e1 : load first quadword + sll a1, 8, t5 # e0 : replicate the search character + andnot a0, 7, v0 # .. e1 : align our loop pointer + + or t5, a1, a1 # e0 : + lda t4, -1 # .. e1 : build garbage mask + sll a1, 16, t5 # e0 : + unop # : + + mskqh t4, a0, t4 # e0 : + or t5, a1, a1 # .. e1 : + sll a1, 32, t5 # e0 : + cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage + + or t5, a1, a1 # e0 : + xor t0, a1, t1 # .. e1 : make bytes == c zero + cmpbge zero, t1, t3 # e0 : bits set iff byte == c + unop # : + + andnot t3, t4, t0 # e0 : clear garbage bits + fnop # .. fa : + unop # : + bne t0, $found # .. e1 (zdb) + + .align 4 +$loop: + ldq t0, 8(v0) # e0 : + addq v0, 8, v0 # .. e1 : + nop # e0 : + xor t0, a1, t1 # .. e1 (ev5 data stall) + + cmpbge zero, t1, t0 # e0 : bits set iff byte == c + beq t0, $loop # .. e1 (zdb) + +$found: + negq t0, t1 # e0 : clear all but least set bit + and t0, t1, t0 # e1 (stall) + and t0, 0xf0, t2 # e0 : binary search for that set bit + and t0, 0xcc, t3 # .. e1 : + + and t0, 0xaa, t4 # e0 : + cmovne t2, 4, t2 # .. e1 : + cmovne t3, 2, t3 # e0 : + cmovne t4, 1, t4 # .. e1 : + + addq t2, t3, t2 # e0 : + addq v0, t4, v0 # .. e1 : + addq v0, t2, v0 # e0 : + ret # .. e1 : + + END(__rawmemchr) + +libc_hidden_def (__rawmemchr) +weak_alias (__rawmemchr, rawmemchr) diff --git a/REORG.TODO/sysdeps/alpha/reml.S b/REORG.TODO/sysdeps/alpha/reml.S new file mode 100644 index 0000000000..60fc6b8cad --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/reml.S @@ -0,0 +1,86 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@twiddle.net> + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + +/* 32-bit signed int remainder. This is not a normal C function. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may + be clobbered. + + The FPU can handle the division for all input values except zero. + All we have to do is compute the remainder via multiply-and-subtract. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + +#ifndef EXTEND +#define EXTEND(S,D) sextl S, D +#endif + + .text + .align 4 + .globl __reml + .type __reml, @funcnoplt + .usepv __reml, no + + cfi_startproc + cfi_return_column (RA) +__reml: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f2, 16(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f2, 16) + mf_fpcr $f2 + + EXTEND (X, RV) + EXTEND (Y, AT) + _ITOFT2 RV, $f0, 24, AT, $f1, 32 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + cvttq/c $f0, $f0 + excb + mt_fpcr $f2 + _FTOIT $f0, RV, 24 + + ldt $f0, 0(sp) + mull RV, Y, RV + ldt $f1, 8(sp) + ldt $f2, 16(sp) + lda sp, FRAME(sp) + cfi_restore ($f0) + cfi_restore ($f1) + cfi_restore ($f2) + cfi_def_cfa_offset (0) + subl X, RV, RV + ret $31, (RA), 1 + + cfi_endproc + .size __reml, .-__reml + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/remlu.S b/REORG.TODO/sysdeps/alpha/remlu.S new file mode 100644 index 0000000000..f8691e19a4 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/remlu.S @@ -0,0 +1,4 @@ +#define UNSIGNED +#define EXTEND(S,D) zapnot S, 15, D +#define __reml __remlu +#include <reml.S> diff --git a/REORG.TODO/sysdeps/alpha/remq.S b/REORG.TODO/sysdeps/alpha/remq.S new file mode 100644 index 0000000000..cfc82aeb8b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/remq.S @@ -0,0 +1,268 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + + +/* 64-bit signed long remainder. These are not normal C functions. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may + be clobbered. + + Theory of operation here is that we can use the FPU divider for virtually + all operands that we see: all dividend values between -2**53 and 2**53-1 + can be computed directly. Note that divisor values need not be checked + against that range because the rounded fp value will be close enough such + that the quotient is < 1, which will properly be truncated to zero when we + convert back to integer. + + When the dividend is outside the range for which we can compute exact + results, we use the fp quotent as an estimate from which we begin refining + an exact integral value. This reduces the number of iterations in the + shift-and-subtract loop significantly. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + + .text + .align 4 + .globl __remq + .type __remq, @funcnoplt + .usepv __remq, no + + cfi_startproc + cfi_return_column (RA) +__remq: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + + /* Get the fp divide insn issued as quickly as possible. After + that's done, we have at least 22 cycles until its results are + ready -- all the time in the world to figure out how we're + going to use the results. */ + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f3, 48(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f3, 48) + mf_fpcr $f3 + + _ITOFT2 X, $f0, 16, Y, $f1, 24 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + + /* Check to see if X fit in the double as an exact value. */ + sll X, (64-53), AT + ldt $f1, 8(sp) + sra AT, (64-53), AT + cmpeq X, AT, AT + beq AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert, compute remainder, clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f3 + _FTOIT $f0, AT, 16 + mulq AT, Y, AT + ldt $f0, 0(sp) + ldt $f3, 48(sp) + cfi_restore ($f1) + cfi_remember_state + cfi_restore ($f0) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + lda sp, FRAME(sp) + subq X, AT, RV + ret $31, (RA), 1 + + .align 4 + cfi_restore_state +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + stq t0, 16(sp) + stq t1, 24(sp) + stq t2, 32(sp) + stq t5, 40(sp) + cfi_rel_offset (t0, 16) + cfi_rel_offset (t1, 24) + cfi_rel_offset (t2, 32) + cfi_rel_offset (t5, 40) + +#define Q t0 /* quotient */ +#define R RV /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + /* The fixup code below can only handle unsigned values. */ + or X, Y, AT + mov $31, t5 + blt AT, $fix_sign_in +$fix_sign_in_ret1: + cvttq/c $f0, $f0 + + _FTOIT $f0, Q, 8 + .align 3 +$fix_sign_in_ret2: + ldt $f0, 0(sp) + stq t3, 0(sp) + cfi_restore ($f0) + cfi_rel_offset (t3, 0) + + mulq Q, Y, QY + excb + stq t4, 8(sp) + mt_fpcr $f3 + cfi_rel_offset (t4, 8) + + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + ldq t0, 16(sp) + ldq t1, 24(sp) + ldq t2, 32(sp) + bne t5, $fix_sign_out + +$fix_sign_out_ret: + ldq t3, 0(sp) + ldq t4, 8(sp) + ldq t5, 40(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore (t0) + cfi_restore (t1) + cfi_restore (t2) + cfi_restore (t3) + cfi_restore (t4) + cfi_restore (t5) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 +$fix_sign_in: + /* If we got here, then X|Y is negative. Need to adjust everything + such that we're doing unsigned division in the fixup loop. */ + /* T5 records the changes we had to make: + bit 0: set if X was negated. Note that the sign of the + remainder follows the sign of the divisor. + bit 2: set if Y was negated. + */ + xor X, Y, t1 + cmplt X, 0, t5 + negq X, t0 + cmovne t5, t0, X + + cmplt Y, 0, AT + negq Y, t0 + s4addq AT, t5, t5 + cmovne AT, t0, Y + + bge t1, $fix_sign_in_ret1 + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + .align 3 + negq Q, Q + br $fix_sign_in_ret2 + + .align 4 +$fix_sign_out: + /* Now we get to undo what we did above. */ + /* ??? Is this really faster than just increasing the size of + the stack frame and storing X and Y in memory? */ + and t5, 4, AT + negq Y, t4 + cmovne AT, t4, Y + + negq X, t4 + cmovlbs t5, t4, X + negq RV, t4 + cmovlbs t5, t4, RV + + br $fix_sign_out_ret + + cfi_endproc + .size __remq, .-__remq + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/remqu.S b/REORG.TODO/sysdeps/alpha/remqu.S new file mode 100644 index 0000000000..732a350ea9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/remqu.S @@ -0,0 +1,271 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + + +/* 64-bit unsigned long remainder. These are not normal C functions. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may be + clobbered. + + Theory of operation here is that we can use the FPU divider for virtually + all operands that we see: all dividend values between -2**53 and 2**53-1 + can be computed directly. Note that divisor values need not be checked + against that range because the rounded fp value will be close enough such + that the quotient is < 1, which will properly be truncated to zero when we + convert back to integer. + + When the dividend is outside the range for which we can compute exact + results, we use the fp quotent as an estimate from which we begin refining + an exact integral value. This reduces the number of iterations in the + shift-and-subtract loop significantly. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + + .text + .align 4 + .globl __remqu + .type __remqu, @funcnoplt + .usepv __remqu, no + + cfi_startproc + cfi_return_column (RA) +__remqu: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + + /* Get the fp divide insn issued as quickly as possible. After + that's done, we have at least 22 cycles until its results are + ready -- all the time in the world to figure out how we're + going to use the results. */ + subq Y, 1, AT + stt $f0, 0(sp) + and Y, AT, AT + + stt $f1, 8(sp) + excb + stt $f3, 48(sp) + beq AT, $powerof2 + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f3, 48) + + _ITOFT2 X, $f0, 16, Y, $f1, 24 + mf_fpcr $f3 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + + blt X, $x_is_neg + divt/c $f0, $f1, $f0 + + /* Check to see if Y was mis-converted as signed value. */ + ldt $f1, 8(sp) + blt Y, $y_is_neg + + /* Check to see if X fit in the double as an exact value. */ + srl X, 53, AT + bne AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert, compute remainder, clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f3 + _FTOIT $f0, AT, 16 + + mulq AT, Y, AT + ldt $f0, 0(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore ($f0) + cfi_restore ($f1) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + + .align 4 + subq X, AT, RV + ret $31, (RA), 1 + + .align 4 + cfi_restore_state +$x_is_neg: + /* If we get here, X is so big that bit 63 is set, which made the + conversion come out negative. Fix it up lest we not even get + a good estimate. */ + ldah AT, 0x5f80 /* 2**64 as float. */ + stt $f2, 24(sp) + cfi_rel_offset ($f2, 24) + _ITOFS AT, $f2, 16 + + addt $f0, $f2, $f0 + divt/c $f0, $f1, $f0 + + /* Ok, we've now the divide issued. Continue with other checks. */ + .align 4 + ldt $f1, 8(sp) + unop + ldt $f2, 24(sp) + blt Y, $y_is_neg + cfi_restore ($f1) + cfi_restore ($f2) + cfi_remember_state /* for y_is_neg */ + + .align 4 +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + stq t0, 16(sp) + stq t1, 24(sp) + stq t2, 32(sp) + stq t3, 40(sp) + cfi_rel_offset (t0, 16) + cfi_rel_offset (t1, 24) + cfi_rel_offset (t2, 32) + cfi_rel_offset (t3, 40) + +#define Q t0 /* quotient */ +#define R RV /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + mulq Q, Y, QY + + .align 4 + stq t4, 8(sp) + excb + ldt $f0, 0(sp) + mt_fpcr $f3 + cfi_rel_offset (t4, 8) + cfi_restore ($f0) + + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + ldq t4, 8(sp) + ldq t0, 16(sp) + ldq t1, 24(sp) + ldq t2, 32(sp) + + ldq t3, 40(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore (t0) + cfi_restore (t1) + cfi_restore (t2) + cfi_restore (t3) + cfi_restore (t4) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 + cfi_restore_state +$y_is_neg: + /* If we get here, Y is so big that bit 63 is set. The results + from the divide will be completely wrong. Fortunately, the + quotient must be either 0 or 1, so the remainder must be X + or X-Y, so just compute it directly. */ + cmpule Y, X, AT + subq X, Y, RV + ldt $f0, 0(sp) + cmoveq AT, X, RV + + lda sp, FRAME(sp) + cfi_restore ($f0) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_def_cfa_offset (FRAME) +$powerof2: + subq Y, 1, AT + beq Y, DIVBYZERO + and X, AT, RV + lda sp, FRAME(sp) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + cfi_endproc + .size __remqu, .-__remqu + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/rshift.S b/REORG.TODO/sysdeps/alpha/rshift.S new file mode 100644 index 0000000000..65db71c2c9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/rshift.S @@ -0,0 +1,105 @@ + # Alpha 21064 __mpn_rshift -- + + # Copyright (C) 1994-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, + # it would take 4 cycles/limb. It should be possible to get down to 3 + # cycles/limb since both ldq and stq can be paired with the other used + # instructions. But there are many restrictions in the 21064 pipeline that + # makes it hard, if not impossible, to get down to 3 cycles/limb: + + # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. + # 2. Only aligned instruction pairs can be paired. + # 3. The store buffer or silo might not be able to deal with the bandwidth. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .frame $30,0,$26,0 + + ldq $4,0($17) # load first limb + addq $17,8,$17 + subq $31,$19,$7 + subq $18,1,$18 + and $18,4-1,$20 # number of limbs in first loop + sll $4,$7,$0 # compute function result + + beq $20,.L0 + subq $18,$20,$18 + + .align 3 +.Loop0: + ldq $3,0($17) + addq $16,8,$16 + addq $17,8,$17 + subq $20,1,$20 + srl $4,$19,$5 + sll $3,$7,$6 + bis $3,$3,$4 + bis $5,$6,$8 + stq $8,-8($16) + bne $20,.Loop0 + +.L0: beq $18,.Lend + + .align 3 +.Loop: ldq $3,0($17) + addq $16,32,$16 + subq $18,4,$18 + srl $4,$19,$5 + sll $3,$7,$6 + + ldq $4,8($17) + srl $3,$19,$1 + bis $5,$6,$8 + stq $8,-32($16) + sll $4,$7,$2 + + ldq $3,16($17) + srl $4,$19,$5 + bis $1,$2,$8 + stq $8,-24($16) + sll $3,$7,$6 + + ldq $4,24($17) + srl $3,$19,$1 + bis $5,$6,$8 + stq $8,-16($16) + sll $4,$7,$2 + + addq $17,32,$17 + bis $1,$2,$8 + stq $8,-8($16) + + bgt $18,.Loop + +.Lend: srl $4,$19,$8 + stq $8,0($16) + ret $31,($26),1 + .end __mpn_rshift diff --git a/REORG.TODO/sysdeps/alpha/setjmp.S b/REORG.TODO/sysdeps/alpha/setjmp.S new file mode 100644 index 0000000000..a6758646f1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/setjmp.S @@ -0,0 +1,120 @@ +/* Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ASSEMBLY__ + +#include <sysdep.h> +#include <jmpbuf-offsets.h> + + .ent __sigsetjmp + .global __sigsetjmp +__sigsetjmp: + ldgp gp, 0(pv) + +$sigsetjmp_local: +#ifndef PIC +#define FRAME 16 + subq sp, FRAME, sp + .frame sp, FRAME, ra, 0 + stq ra, 0(sp) + .mask 0x04000000, -FRAME +#else +#define FRAME 0 + .frame sp, FRAME, ra, 0 +#endif +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + stq s0, JB_S0*8(a0) + stq s1, JB_S1*8(a0) + stq s2, JB_S2*8(a0) + stq s3, JB_S3*8(a0) + stq s4, JB_S4*8(a0) + stq s5, JB_S5*8(a0) +#ifdef PTR_MANGLE + PTR_MANGLE(t1, ra, t0) + stq t1, JB_PC*8(a0) +#else + stq ra, JB_PC*8(a0) +#endif +#if defined(PTR_MANGLE) && FRAME == 0 + PTR_MANGLE2(t1, sp, t0) +#else + addq sp, FRAME, t1 +# ifdef PTR_MANGLE + PTR_MANGLE2(t1, t1, t0) +# endif +#endif + stq t1, JB_SP*8(a0) +#ifdef PTR_MANGLE + PTR_MANGLE2(t1, fp, t0) + stq t1, JB_FP*8(a0) +#else + stq fp, JB_FP*8(a0) +#endif + stt $f2, JB_F2*8(a0) + stt $f3, JB_F3*8(a0) + stt $f4, JB_F4*8(a0) + stt $f5, JB_F5*8(a0) + stt $f6, JB_F6*8(a0) + stt $f7, JB_F7*8(a0) + stt $f8, JB_F8*8(a0) + stt $f9, JB_F9*8(a0) + +#ifndef PIC + /* Call to C to (potentially) save our signal mask. */ + jsr ra, __sigjmp_save + ldq ra, 0(sp) + addq sp, 16, sp + ret +#elif IS_IN (rtld) + /* In ld.so we never save the signal mask. */ + mov 0, v0 + ret +#else + /* Tailcall to save the signal mask. */ + br $31, __sigjmp_save !samegp +#endif + +END(__sigsetjmp) +hidden_def (__sigsetjmp) + +/* Put these traditional entry points in the same file so that we can + elide much of the nonsense in trying to jmp to the real function. */ + +ENTRY(_setjmp) + ldgp gp, 0(pv) + .prologue 1 + mov 0, a1 + br $sigsetjmp_local +END(_setjmp) +libc_hidden_def (_setjmp) + +ENTRY(setjmp) + ldgp gp, 0(pv) + .prologue 1 + mov 1, a1 + br $sigsetjmp_local +END(setjmp) + +weak_extern(_setjmp) +weak_extern(setjmp) diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/Makefile b/REORG.TODO/sysdeps/alpha/soft-fp/Makefile new file mode 100644 index 0000000000..83baa7c49d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/Makefile @@ -0,0 +1,11 @@ +# Software floating-point emulation. + +ifeq ($(subdir),soft-fp) +sysdep_routines += ots_add ots_sub ots_mul ots_div ots_cmp ots_cmpe \ + ots_cvtxq ots_cvtqx ots_cvtqux ots_cvttx ots_cvtxt ots_nintxq \ + fraiseexcpt +endif + +ifeq ($(subdir),math) +CPPFLAGS += -I../soft-fp +endif diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/Versions b/REORG.TODO/sysdeps/alpha/soft-fp/Versions new file mode 100644 index 0000000000..3901287115 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/Versions @@ -0,0 +1,8 @@ +libc { + GLIBC_2.3.4 { + _OtsAddX; _OtsSubX; _OtsMulX; _OtsDivX; + _OtsEqlX; _OtsNeqX; _OtsLssX; _OtsLeqX; _OtsGtrX; _OtsGeqX; + _OtsCvtQX; _OtsCvtQUX; _OtsCvtXQ; _OtsNintXQ; + _OtsConvertFloatTX; _OtsConvertFloatXT; + } +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/e_sqrtl.c b/REORG.TODO/sysdeps/alpha/soft-fp/e_sqrtl.c new file mode 100644 index 0000000000..8b30bcbc52 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/e_sqrtl.c @@ -0,0 +1,49 @@ +/* long double square root in software floating-point emulation. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <soft-fp.h> +#include <quad.h> +#include <shlib-compat.h> + +long double +__ieee754_sqrtl (const long double a) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(C); + long double c; + long _round = 4; /* dynamic rounding */ + + FP_INIT_ROUNDMODE; + FP_UNPACK_Q(A, a); + FP_SQRT_Q(C, A); + FP_PACK_Q(c, C); + FP_HANDLE_EXCEPTIONS; + return c; +} + +/* ??? We forgot to add this symbol in 2.15. Getting this into 2.18 isn't as + straight-forward as just adding the alias, since a generic Versions file + includes the 2.15 version and the linker uses the first one it sees. */ +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +versioned_symbol (libm, __ieee754_sqrtl, __sqrtl_finite, GLIBC_2_18); +#else +strong_alias(__ieee754_sqrtl, __sqrtl_finite) +#endif diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/local-soft-fp.h b/REORG.TODO/sysdeps/alpha/soft-fp/local-soft-fp.h new file mode 100644 index 0000000000..d562e0829e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/local-soft-fp.h @@ -0,0 +1,55 @@ +#include <stdlib.h> +#include <soft-fp.h> +#include <quad.h> + +/* Helpers for the Ots functions which receive long double arguments + in two integer registers, and return values in $16+$17. */ + +#define AXP_UNPACK_RAW_Q(X, val) \ + do { \ + union _FP_UNION_Q _flo; \ + _flo.longs.a = val##l; \ + _flo.longs.b = val##h; \ + FP_UNPACK_RAW_QP(X, &_flo); \ + } while (0) + +#define AXP_UNPACK_SEMIRAW_Q(X, val) \ + do { \ + union _FP_UNION_Q _flo; \ + _flo.longs.a = val##l; \ + _flo.longs.b = val##h; \ + FP_UNPACK_SEMIRAW_QP(X, &_flo); \ + } while (0) + +#define AXP_UNPACK_Q(X, val) \ + do { \ + AXP_UNPACK_RAW_Q(X, val); \ + _FP_UNPACK_CANONICAL(Q, 2, X); \ + } while (0) + +#define AXP_PACK_RAW_Q(val, X) FP_PACK_RAW_QP(&val##_flo, X) + +#define AXP_PACK_SEMIRAW_Q(val, X) \ + do { \ + _FP_PACK_SEMIRAW(Q, 2, X); \ + AXP_PACK_RAW_Q(val, X); \ + } while (0) + +#define AXP_PACK_Q(val, X) \ + do { \ + _FP_PACK_CANONICAL(Q, 2, X); \ + AXP_PACK_RAW_Q(val, X); \ + } while (0) + +#define AXP_DECL_RETURN_Q(X) union _FP_UNION_Q X##_flo + +/* ??? We don't have a real way to tell the compiler that we're wanting + to return values in $16+$17. Instead use a volatile asm to make sure + that the values are live, and just hope that nothing kills the values + in between here and the end of the function. */ +#define AXP_RETURN_Q(X) \ + do { \ + register long r16 __asm__("16") = X##_flo.longs.a; \ + register long r17 __asm__("17") = X##_flo.longs.b; \ + asm volatile ("" : : "r"(r16), "r"(r17)); \ + } while (0) diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_add.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_add.c new file mode 100644 index 0000000000..7291a730e7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_add.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: addition. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +void +_OtsAddX(long al, long ah, long bl, long bh, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_INIT_ROUNDMODE; + AXP_UNPACK_SEMIRAW_Q(A, a); + AXP_UNPACK_SEMIRAW_Q(B, b); + FP_ADD_Q(C, A, B); + AXP_PACK_SEMIRAW_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmp.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmp.c new file mode 100644 index 0000000000..84498f8059 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmp.c @@ -0,0 +1,63 @@ +/* Software floating-point emulation: comparison. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +static long +internal_equality (long al, long ah, long bl, long bh, long neq) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + long r; + + AXP_UNPACK_RAW_Q(A, a); + AXP_UNPACK_RAW_Q(B, b); + + if ((A_e == _FP_EXPMAX_Q && !_FP_FRAC_ZEROP_2(A)) + || (B_e == _FP_EXPMAX_Q && !_FP_FRAC_ZEROP_2(B))) + { + /* EQ and NE signal invalid operation only if either operand is SNaN. */ + if (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B)) + { + FP_SET_EXCEPTION(FP_EX_INVALID); + FP_HANDLE_EXCEPTIONS; + } + return -1; + } + + r = (A_e == B_e + && _FP_FRAC_EQ_2 (A, B) + && (A_s == B_s || (!A_e && _FP_FRAC_ZEROP_2(A)))); + r ^= neq; + + return r; +} + +long +_OtsEqlX (long al, long ah, long bl, long bh) +{ + return internal_equality (al, ah, bl, bh, 0); +} + +long +_OtsNeqX (long al, long ah, long bl, long bh) +{ + return internal_equality (al, ah, bl, bh, 1); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmpe.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmpe.c new file mode 100644 index 0000000000..d1e950d991 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmpe.c @@ -0,0 +1,77 @@ +/* Software floating-point emulation: comparison. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +static long +internal_compare (long al, long ah, long bl, long bh) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + long r; + + AXP_UNPACK_RAW_Q(A, a); + AXP_UNPACK_RAW_Q(B, b); + FP_CMP_Q (r, A, B, 2, 2); + + FP_HANDLE_EXCEPTIONS; + + return r; +} + +long +_OtsLssX (long al, long ah, long bl, long bh) +{ + long r = internal_compare (al, ah, bl, bh); + if (r == 2) + return -1; + else + return r < 0; +} + +long +_OtsLeqX (long al, long ah, long bl, long bh) +{ + long r = internal_compare (al, ah, bl, bh); + if (r == 2) + return -1; + else + return r <= 0; +} + +long +_OtsGtrX (long al, long ah, long bl, long bh) +{ + long r = internal_compare (al, ah, bl, bh); + if (r == 2) + return -1; + else + return r > 0; +} + +long +_OtsGeqX (long al, long ah, long bl, long bh) +{ + long r = internal_compare (al, ah, bl, bh); + if (r == 2) + return -1; + else + return r >= 0; +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqux.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqux.c new file mode 100644 index 0000000000..bb6fac00a3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqux.c @@ -0,0 +1,39 @@ +/* Software floating-point emulation: unsigned integer to float conversion. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +/* Should never actually be used, since we've more bits of precision + than the incomming long, but needed for linkage. */ +#undef FP_ROUNDMODE +#define FP_ROUNDMODE FP_RND_ZERO + +void +_OtsCvtQUX (unsigned long a) +{ + FP_DECL_EX; + FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_FROM_INT_Q(C, a, 64, unsigned long); + AXP_PACK_RAW_Q(c, C); + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqx.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqx.c new file mode 100644 index 0000000000..e1d8a7a4ae --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqx.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: signed integer to float conversion. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +/* Should never actually be used, since we've more bits of precision + than the incomming long, but needed for linkage. */ +#undef FP_ROUNDMODE +#define FP_ROUNDMODE FP_RND_ZERO + +void +_OtsCvtQX (long a) +{ + FP_DECL_EX; + FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_FROM_INT_Q(C, a, 64, unsigned long); + AXP_PACK_RAW_Q(c, C); + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvttx.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvttx.c new file mode 100644 index 0000000000..00e13f3396 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvttx.c @@ -0,0 +1,47 @@ +/* Software floating-point emulation: floating point extension. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" +#include "double.h" + +/* Should never actually be used, since we're extending, but needed + for linkage. */ +#undef FP_ROUNDMODE +#define FP_ROUNDMODE FP_RND_ZERO + +void +_OtsConvertFloatTX(double a) +{ + FP_DECL_EX; + FP_DECL_D(A); + FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_UNPACK_RAW_D(A, a); +#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q + FP_EXTEND(Q,D,4,2,C,A); +#else + FP_EXTEND(Q,D,2,1,C,A); +#endif + AXP_PACK_RAW_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxq.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxq.c new file mode 100644 index 0000000000..eda0074ef0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxq.c @@ -0,0 +1,41 @@ +/* Software floating-point emulation: float to integer conversion. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +long +_OtsCvtXQ (long al, long ah, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); + unsigned long r; + long s; + + /* If bit 3 is set, then integer overflow detection is requested. */ + s = _round & 8 ? 1 : -1; + _round = _round & 3; + + FP_INIT_ROUNDMODE; + AXP_UNPACK_RAW_Q(A, a); + FP_TO_INT_Q(r, A, 64, s); + FP_HANDLE_EXCEPTIONS; + + return r; +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxt.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxt.c new file mode 100644 index 0000000000..59be37e394 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxt.c @@ -0,0 +1,43 @@ +/* Software floating-point emulation: floating point truncation. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" +#include "double.h" + +double +_OtsConvertFloatXT (long al, long ah, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); + FP_DECL_D(R); + double r; + + FP_INIT_ROUNDMODE; + AXP_UNPACK_SEMIRAW_Q(A, a); +#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q + FP_TRUNC(D,Q,2,4,R,A); +#else + FP_TRUNC(D,Q,1,2,R,A); +#endif + FP_PACK_SEMIRAW_D(r, R); + FP_HANDLE_EXCEPTIONS; + + return r; +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_div.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_div.c new file mode 100644 index 0000000000..1ce38ced05 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_div.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: division. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +void +_OtsDivX(long al, long ah, long bl, long bh, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_INIT_ROUNDMODE; + AXP_UNPACK_Q(A, a); + AXP_UNPACK_Q(B, b); + FP_DIV_Q(C, A, B); + AXP_PACK_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_mul.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_mul.c new file mode 100644 index 0000000000..937c24246d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_mul.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: multiplication. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +void +_OtsMulX(long al, long ah, long bl, long bh, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_INIT_ROUNDMODE; + AXP_UNPACK_Q(A, a); + AXP_UNPACK_Q(B, b); + FP_MUL_Q(C, A, B); + AXP_PACK_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_nintxq.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_nintxq.c new file mode 100644 index 0000000000..ce3be4fd62 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_nintxq.c @@ -0,0 +1,51 @@ +/* Software floating-point emulation: convert to fortran nearest. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +long +_OtsNintXQ (long al, long ah, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + unsigned long r; + long s; + + /* If bit 3 is set, then integer overflow detection is requested. */ + s = _round & 8 ? 1 : -1; + _round = _round & 3; + + FP_INIT_ROUNDMODE; + AXP_UNPACK_SEMIRAW_Q(A, a); + + /* Build 0.5 * sign(A) */ + B_e = _FP_EXPBIAS_Q; + __FP_FRAC_SET_2 (B, 0, 0); + B_s = A_s; + + FP_ADD_Q(C, A, B); + _FP_FRAC_SRL_2(C, _FP_WORKBITS); + _FP_FRAC_HIGH_RAW_Q(C) &= ~(_FP_W_TYPE)_FP_IMPLBIT_Q; + FP_TO_INT_Q(r, C, 64, s); + if (s > 0 && (_fex &= FP_EX_INVALID)) + FP_HANDLE_EXCEPTIONS; + + return r; +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_sub.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_sub.c new file mode 100644 index 0000000000..69893f1ea8 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_sub.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: subtraction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +void +_OtsSubX(long al, long ah, long bl, long bh, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_INIT_ROUNDMODE; + AXP_UNPACK_SEMIRAW_Q(A, a); + AXP_UNPACK_SEMIRAW_Q(B, b); + FP_SUB_Q(C, A, B); + AXP_PACK_SEMIRAW_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/sfp-machine.h b/REORG.TODO/sysdeps/alpha/soft-fp/sfp-machine.h new file mode 100644 index 0000000000..7935b540db --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/sfp-machine.h @@ -0,0 +1,99 @@ +/* Machine-dependent software floating-point definitions. + Alpha userland IEEE 128-bit version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com), + Jakub Jelinek (jj@ultra.linux.cz) and + David S. Miller (davem@redhat.com). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +/* Alpha Architecture Handbook, 4.7.10.4 sez that we should prefer any + type of NaN in Fb, then Fa. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + R##_c = FP_CLS_NAN; \ + } while (0) + +/* Rounding mode settings. */ +#define FP_RND_NEAREST FE_TONEAREST +#define FP_RND_ZERO FE_TOWARDZERO +#define FP_RND_PINF FE_UPWARD +#define FP_RND_MINF FE_DOWNWARD + +/* Obtain the current rounding mode. It's given as an argument to + all the Ots functions, with 4 meaning "dynamic". */ +#define FP_ROUNDMODE _round + +/* Exception flags. */ +#define FP_EX_INVALID FE_INVALID +#define FP_EX_OVERFLOW FE_OVERFLOW +#define FP_EX_UNDERFLOW FE_UNDERFLOW +#define FP_EX_DIVZERO FE_DIVBYZERO +#define FP_EX_INEXACT FE_INEXACT + +#define _FP_TININESS_AFTER_ROUNDING 1 + +#define FP_INIT_ROUNDMODE \ +do { \ + if (__builtin_expect (_round == 4, 0)) \ + { \ + unsigned long t; \ + __asm__ __volatile__("excb; mf_fpcr %0" : "=f"(t)); \ + _round = (t >> FPCR_ROUND_SHIFT) & 3; \ + } \ +} while (0) + +/* We copy the libm function into libc for soft-fp. */ +extern int __feraiseexcept (int __excepts) attribute_hidden; + +#define FP_HANDLE_EXCEPTIONS \ +do { \ + if (__builtin_expect (_fex, 0)) \ + __feraiseexcept (_fex); \ +} while (0) + +#define FP_TRAPPING_EXCEPTIONS \ + ((__ieee_get_fp_control () & SWCR_ENABLE_MASK) << SWCR_ENABLE_SHIFT) diff --git a/REORG.TODO/sysdeps/alpha/sotruss-lib.c b/REORG.TODO/sysdeps/alpha/sotruss-lib.c new file mode 100644 index 0000000000..01ded0131a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/sotruss-lib.c @@ -0,0 +1,50 @@ +/* Override generic sotruss-lib.c to define actual functions for Alpha. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define HAVE_ARCH_PLTENTER +#define HAVE_ARCH_PLTEXIT + +#include <elf/sotruss-lib.c> + +ElfW(Addr) +la_alpha_gnu_pltenter (ElfW(Sym) *sym __attribute__ ((unused)), + unsigned int ndx __attribute__ ((unused)), + uintptr_t *refcook, uintptr_t *defcook, + La_alpha_regs *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + print_enter (refcook, defcook, symname, + regs->lr_r16, regs->lr_r17, regs->lr_r18, *flags); + + /* No need to copy anything, we will not need the parameters in any case. */ + *framesizep = 0; + + return sym->st_value; +} + +unsigned int +la_alpha_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, + const struct La_alpha_regs *inregs, + struct La_alpha_retval *outregs, const char *symname) +{ + print_exit (refcook, defcook, symname, outregs->lrv_r0); + + return 0; +} diff --git a/REORG.TODO/sysdeps/alpha/stackinfo.h b/REORG.TODO/sysdeps/alpha/stackinfo.h new file mode 100644 index 0000000000..0796dc8e4c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stackinfo.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains a bit of information about the stack allocation + of the processor. */ + +#ifndef _STACKINFO_H +#define _STACKINFO_H 1 + +#include <elf.h> + +/* On Alpha the stack grows down. */ +#define _STACK_GROWS_DOWN 1 + +/* Default to an executable stack. PF_X can be overridden if PT_GNU_STACK is + * present, but it is presumed absent. */ +#define DEFAULT_STACK_PERMS (PF_R|PF_W|PF_X) + +#endif /* stackinfo.h */ diff --git a/REORG.TODO/sysdeps/alpha/start.S b/REORG.TODO/sysdeps/alpha/start.S new file mode 100644 index 0000000000..b149b1fcac --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/start.S @@ -0,0 +1,85 @@ +/* Startup code for Alpha/ELF. + Copyright (C) 1993-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text + .align 3 + .globl _start + .ent _start, 0 + .type _start,@function +_start: + .frame $15, 0, $15 + br gp, 1f +1: ldgp gp, 0(gp) + subq sp, 16, sp + mov 0, $15 + .prologue 0 + + /* Load address of the user's main function. */ + lda a0, main + + ldl a1, 16(sp) /* get argc */ + lda a2, 24(sp) /* get argv */ + + /* Load address of our own entry points to .fini and .init. */ + lda a3, __libc_csu_init + lda a4, __libc_csu_fini + + /* Store address of the shared library termination function. */ + mov v0, a5 + + /* Provide the highest stack address to the user code. */ + stq sp, 0(sp) + + /* Call the user's main function, and exit with its value. + But let the libc call main. */ + jsr ra, __libc_start_main + + /* Die very horribly if exit returns. Call_pal hlt is callable from + kernel mode only; this will result in an illegal instruction trap. */ + call_pal 0 + .end _start + +/* For ECOFF backwards compatibility. */ +weak_alias (_start, __start) + +/* Define a symbol for the first piece of initialized data. */ + .data + .globl __data_start +__data_start: + .weak data_start + data_start = __data_start diff --git a/REORG.TODO/sysdeps/alpha/stpcpy.S b/REORG.TODO/sysdeps/alpha/stpcpy.S new file mode 100644 index 0000000000..62fd0d9e36 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stpcpy.S @@ -0,0 +1,55 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy a null-terminated string from SRC to DST. Return a pointer + to the null-terminator in the source. */ + +#include <sysdep.h> + + .text + +ENTRY(__stpcpy) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + jsr t9, __stxcpy # do the work of the copy + + and t8, 0xf0, t2 # binary search for byte offset of the + and t8, 0xcc, t1 # last byte written. + and t8, 0xaa, t0 + andnot a0, 7, a0 + cmovne t2, 4, t2 + cmovne t1, 2, t1 + cmovne t0, 1, t0 + addq a0, t2, v0 + addq t0, t1, t0 + addq v0, t0, v0 + + ret + + END(__stpcpy) + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/alpha/stpncpy.S b/REORG.TODO/sysdeps/alpha/stpncpy.S new file mode 100644 index 0000000000..62f6a8f2fd --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stpncpy.S @@ -0,0 +1,106 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@tamu.edu) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than COUNT bytes of the null-terminated string from + SRC to DST. If SRC does not cover all of COUNT, the balance is + zeroed. Return the address of the terminating null in DEST, if + any, else DEST + COUNT. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + +ENTRY(__stpncpy) + ldgp gp, 0(pv) +#ifdef PROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + .prologue 1 + + beq a2, $zerocount + jsr t9, __stxncpy # do the work of the copy + + and t8, 0xf0, t3 # binary search for byte offset of the + and t8, 0xcc, t2 # last byte written. + and t8, 0xaa, t1 + andnot a0, 7, v0 + cmovne t3, 4, t3 + cmovne t2, 2, t2 + cmovne t1, 1, t1 + addq v0, t3, v0 + addq t1, t2, t1 + addq v0, t1, v0 + + bne a2, $multiword # do we have full words left? + + .align 3 + zapnot t0, t8, t4 # e0 : was last byte a null? + subq t8, 1, t2 # .. e1 : + addq v0, 1, t5 # e0 : + subq t10, 1, t3 # .. e1 : + or t2, t8, t2 # e0 : clear the bits between the last + or t3, t10, t3 # .. e1 : written byte and the last byte in + andnot t3, t2, t3 # e0 : COUNT + cmovne t4, t5, v0 # .. e1 : if last written wasnt null, inc v0 + zap t0, t3, t0 # e0 : + stq_u t0, 0(a0) # e1 : + ret # .. e1 : + + .align 3 +$multiword: + subq t8, 1, t7 # e0 : clear the final bits in the prev + or t7, t8, t7 # e1 : word + zapnot t0, t7, t0 # e0 : + subq a2, 1, a2 # .. e1 : + stq_u t0, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + + beq a2, 1f # e1 : + blbc a2, 0f # e1 : + + stq_u zero, 0(a0) # e0 : zero one word + subq a2, 1, a2 # .. e1 : + addq a0, 8, a0 # e0 : + beq a2, 1f # .. e1 : + +0: stq_u zero, 0(a0) # e0 : zero two words + subq a2, 2, a2 # .. e1 : + stq_u zero, 8(a0) # e0 : + addq a0, 16, a0 # .. e1 : + bne a2, 0b # e1 : + unop + +1: ldq_u t0, 0(a0) # e0 : clear the leading bits in the final + subq t10, 1, t7 # .. e1 : word + or t7, t10, t7 # e0 : + zap t0, t7, t0 # e1 (stall) + stq_u t0, 0(a0) # e0 : + ret # .. e1 : + +$zerocount: + mov a0, v0 + ret + + END(__stpncpy) + +libc_hidden_def (__stpncpy) +weak_alias (__stpncpy, stpncpy) diff --git a/REORG.TODO/sysdeps/alpha/strcat.S b/REORG.TODO/sysdeps/alpha/strcat.S new file mode 100644 index 0000000000..bffd3e8ee0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strcat.S @@ -0,0 +1,71 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Append a null-terminated string from SRC to DST. */ + +#include <sysdep.h> + + .text + +ENTRY(strcat) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov a0, v0 # set up return value + + /* Find the end of the string. */ + + ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned) + lda t1, -1(zero) + insqh t1, a0, t1 + andnot a0, 7, a0 + or t1, t0, t0 + cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne t1, $found + +$loop: ldq t0, 8(a0) + addq a0, 8, a0 # addr += 8 + cmpbge zero, t0, t1 + beq t1, $loop + +$found: negq t1, t2 # clear all but least set bit + and t1, t2, t1 + + and t1, 0xf0, t2 # binary search for that set bit + and t1, 0xcc, t3 + and t1, 0xaa, t4 + cmovne t2, 4, t2 + cmovne t3, 2, t3 + cmovne t4, 1, t4 + addq t2, t3, t2 + addq a0, t4, a0 + addq a0, t2, a0 + + /* Now do the append. */ + + mov ra, t9 + jmp $31, __stxcpy + + END(strcat) +libc_hidden_builtin_def (strcat) diff --git a/REORG.TODO/sysdeps/alpha/strchr.S b/REORG.TODO/sysdeps/alpha/strchr.S new file mode 100644 index 0000000000..e3cf75f39f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strchr.S @@ -0,0 +1,94 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@tamu.edu) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return the address of a given character within a null-terminated + string, or null if it is not found. + + This is generally scheduled for the EV5 (got to look out for my own + interests :-), but with EV4 needs in mind. There *should* be no more + stalls for the EV4 than there are for the EV5. +*/ + +#include <sysdep.h> + + .set noreorder + .set noat + +ENTRY(strchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + zapnot a1, 1, a1 # e0 : zero extend the search character + ldq_u t0, 0(a0) # .. e1 : load first quadword + sll a1, 8, t5 # e0 : replicate the search character + andnot a0, 7, v0 # .. e1 : align our loop pointer + or t5, a1, a1 # e0 : + lda t4, -1 # .. e1 : build garbage mask + sll a1, 16, t5 # e0 : + cmpbge zero, t0, t2 # .. e1 : bits set iff byte == zero + mskqh t4, a0, t4 # e0 : + or t5, a1, a1 # .. e1 : + sll a1, 32, t5 # e0 : + cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage + or t5, a1, a1 # e0 : + xor t0, a1, t1 # .. e1 : make bytes == c zero + cmpbge zero, t1, t3 # e0 : bits set iff byte == c + or t2, t3, t0 # e1 : bits set iff char match or zero match + andnot t0, t4, t0 # e0 : clear garbage bits + bne t0, $found # .. e1 (zdb) + +$loop: ldq t0, 8(v0) # e0 : + addq v0, 8, v0 # .. e1 : + nop # e0 : + xor t0, a1, t1 # .. e1 (ev5 data stall) + cmpbge zero, t0, t2 # e0 : bits set iff byte == 0 + cmpbge zero, t1, t3 # .. e1 : bits set iff byte == c + or t2, t3, t0 # e0 : + beq t0, $loop # .. e1 (zdb) + +$found: negq t0, t1 # e0 : clear all but least set bit + and t0, t1, t0 # e1 (stall) + + and t0, t3, t1 # e0 : bit set iff byte was the char + beq t1, $retnull # .. e1 (zdb) + + and t0, 0xf0, t2 # e0 : binary search for that set bit + and t0, 0xcc, t3 # .. e1 : + and t0, 0xaa, t4 # e0 : + cmovne t2, 4, t2 # .. e1 : + cmovne t3, 2, t3 # e0 : + cmovne t4, 1, t4 # .. e1 : + addq t2, t3, t2 # e0 : + addq v0, t4, v0 # .. e1 : + addq v0, t2, v0 # e0 : + ret # .. e1 : + +$retnull: + mov zero, v0 # e0 : + ret # .. e1 : + + END(strchr) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/alpha/strcmp.S b/REORG.TODO/sysdeps/alpha/strcmp.S new file mode 100644 index 0000000000..ae211bb7ae --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strcmp.S @@ -0,0 +1,194 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Bytewise compare two null-terminated strings. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + +ENTRY(strcmp) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jmp AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u t0, 0(a0) # e0 : give cache time to catch up + xor a0, a1, t2 # .. e1 : are s1 and s2 co-aligned? + ldq_u t1, 0(a1) # e0 : + and t2, 7, t2 # .. e1 : + lda t3, -1 # e0 : + bne t2, $unaligned # .. e1 : + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. + t3 == -1. */ + +$aligned: + mskqh t3, a0, t3 # e0 : + nop # .. e1 : + ornot t1, t3, t1 # e0 : + ornot t0, t3, t0 # .. e1 : + cmpbge zero, t1, t7 # e0 : bits set iff null found + bne t7, $eos # e1 (zdb) + + /* Aligned compare main loop. + On entry to this basic block: + t0 == an s1 word. + t1 == an s2 word not containing a null. */ + +$a_loop: + xor t0, t1, t2 # e0 : + bne t2, $wordcmp # .. e1 (zdb) + ldq_u t1, 8(a1) # e0 : + ldq_u t0, 8(a0) # .. e1 : + addq a1, 8, a1 # e0 : + addq a0, 8, a0 # .. e1 : + cmpbge zero, t1, t7 # e0 : + beq t7, $a_loop # .. e1 (zdb) + br $eos # e1 : + + /* The two strings are not co-aligned. Align s1 and cope. */ + +$unaligned: + and a0, 7, t4 # e0 : find s1 misalignment + and a1, 7, t5 # .. e1 : find s2 misalignment + subq a1, t4, a1 # e0 : + + /* If s2 misalignment is larger than s2 misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t8 # .. e1 : + beq t8, $u_head # e1 : + + mskqh t3, t5, t3 # e0 : + ornot t1, t3, t3 # e0 : + cmpbge zero, t3, t7 # e1 : is there a zero? + beq t7, $u_head # e1 : + + /* We've found a zero in the first partial word of s2. Align + our current s1 and s2 words and compare what we've got. */ + + extql t1, t5, t1 # e0 : + extql t0, a0, t0 # e0 : + cmpbge zero, t1, t7 # .. e1 : find that zero again + br $eos # e1 : and finish up + + .align 3 +$u_head: + /* We know just enough now to be able to assemble the first + full word of s2. We can still find a zero at the end of it. + + On entry to this basic block: + t0 == first word of s1 + t1 == first partial word of s2. */ + + ldq_u t2, 8(a1) # e0 : load second partial s2 word + lda t3, -1 # .. e1 : create leading garbage mask + extql t1, a1, t1 # e0 : create first s2 word + mskqh t3, a0, t3 # e0 : + extqh t2, a1, t4 # e0 : + ornot t0, t3, t0 # .. e1 : kill s1 garbage + or t1, t4, t1 # e0 : s2 word now complete + cmpbge zero, t0, t7 # .. e1 : find zero in first s1 word + ornot t1, t3, t1 # e0 : kill s2 garbage + lda t3, -1 # .. e1 : + mskql t3, a1, t3 # e0 : mask for s2[1] bits we have seen + bne t7, $eos # .. e1 : + xor t0, t1, t4 # e0 : compare aligned words + bne t4, $wordcmp # .. e1 (zdb) + or t2, t3, t3 # e0 : + cmpbge zero, t3, t7 # e1 : + bne t7, $u_final # e1 : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned words from s2. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t2 == the unshifted low-bits from the next s2 word. */ + + .align 3 +$u_loop: + extql t2, a1, t3 # e0 : + ldq_u t2, 16(a1) # .. e1 : load next s2 high bits + ldq_u t0, 8(a0) # e0 : load next s1 word + addq a1, 8, a1 # .. e1 : + addq a0, 8, a0 # e0 : + nop # .. e1 : + extqh t2, a1, t1 # e0 : + cmpbge zero, t0, t7 # .. e1 : find zero in current s1 word + or t1, t3, t1 # e0 : + bne t7, $eos # .. e1 : + xor t0, t1, t4 # e0 : compare the words + bne t4, $wordcmp # .. e1 (zdb) + cmpbge zero, t2, t4 # e0 : find zero in next low bits + beq t4, $u_loop # .. e1 (zdb) + + /* We've found a zero in the low bits of the last s2 word. Get + the next s1 word and align them. */ +$u_final: + ldq_u t0, 8(a0) # e1 : + extql t2, a1, t1 # .. e0 : + cmpbge zero, t1, t7 # e0 : + + /* We've found a zero somewhere in a word we just read. + On entry to this basic block: + t0 == s1 word + t1 == s2 word + t7 == cmpbge mask containing the zero. */ + + .align 3 +$eos: + negq t7, t6 # e0 : create bytemask of valid data + and t6, t7, t8 # e1 : + subq t8, 1, t6 # e0 : + or t6, t8, t7 # e1 : + zapnot t0, t7, t0 # e0 : kill the garbage + zapnot t1, t7, t1 # .. e1 : + xor t0, t1, v0 # e0 : and compare + beq v0, $done # .. e1 : + + /* Here we have two differing co-aligned words in t0 & t1. + Bytewise compare them and return (t0 > t1 ? 1 : -1). */ +$wordcmp: + cmpbge t0, t1, t2 # e0 : comparison yields bit mask of ge + cmpbge t1, t0, t3 # .. e1 : + xor t2, t3, t0 # e0 : bits set iff t0/t1 bytes differ + negq t0, t1 # e1 : clear all but least bit + and t0, t1, t0 # e0 : + lda v0, -1 # .. e1 : + and t0, t2, t1 # e0 : was bit set in t0 > t1? + cmovne t1, 1, v0 # .. e1 (zdb) + +$done: + ret # e1 : + + END(strcmp) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/alpha/strcpy.S b/REORG.TODO/sysdeps/alpha/strcpy.S new file mode 100644 index 0000000000..4726630aab --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strcpy.S @@ -0,0 +1,41 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy a null-terminated string from SRC to DST. Return a pointer + to the null-terminator in the source. */ + +#include <sysdep.h> + + .text + +ENTRY(strcpy) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov a0, v0 # set up return value + mov ra, t9 + jmp $31, __stxcpy # do the copy + + END(strcpy) +libc_hidden_builtin_def (strcpy) diff --git a/REORG.TODO/sysdeps/alpha/strlen.S b/REORG.TODO/sysdeps/alpha/strlen.S new file mode 100644 index 0000000000..72aef91e0e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strlen.S @@ -0,0 +1,76 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds length of a 0-terminated string. Optimized for the Alpha + architecture: + + - memory accessed as aligned quadwords only + - uses cmpbge to compare 8 bytes in parallel + - does binary search to find 0 byte in last quadword (HAKMEM + needed 12 instructions to do this instead of the 8 instructions + that the binary search needs). +*/ + +#include <sysdep.h> + + .set noreorder + .set noat + +ENTRY(strlen) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned) + lda t1, -1(zero) + insqh t1, a0, t1 + andnot a0, 7, v0 + or t1, t0, t0 + nop # dual issue the next two on ev5 + cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne t1, $found + +$loop: ldq t0, 8(v0) + addq v0, 8, v0 # addr += 8 + cmpbge zero, t0, t1 + beq t1, $loop + +$found: negq t1, t2 # clear all but least set bit + and t1, t2, t1 + + and t1, 0xf0, t2 # binary search for that set bit + and t1, 0xcc, t3 + and t1, 0xaa, t4 + cmovne t2, 4, t2 + cmovne t3, 2, t3 + cmovne t4, 1, t4 + addq t2, t3, t2 + addq v0, t4, v0 + addq v0, t2, v0 + nop # dual issue next two on ev4 and ev5 + + subq v0, a0, v0 + ret + + END(strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/alpha/strncat.S b/REORG.TODO/sysdeps/alpha/strncat.S new file mode 100644 index 0000000000..61c4445da3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strncat.S @@ -0,0 +1,94 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Append no more than COUNT characters from the null-terminated string SRC + to the null-terminated string DST. Always null-terminate the new DST. */ + +#include <sysdep.h> + + .text + +ENTRY(strncat) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov a0, v0 # set up return value + beq a2, $zerocount + + /* Find the end of the string. */ + + ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned) + lda t1, -1(zero) + insqh t1, a0, t1 + andnot a0, 7, a0 + or t1, t0, t0 + cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne t1, $found + +$loop: ldq t0, 8(a0) + addq a0, 8, a0 # addr += 8 + cmpbge zero, t0, t1 + beq t1, $loop + +$found: negq t1, t2 # clear all but least set bit + and t1, t2, t1 + + and t1, 0xf0, t2 # binary search for that set bit + and t1, 0xcc, t3 + and t1, 0xaa, t4 + cmovne t2, 4, t2 + cmovne t3, 2, t3 + cmovne t4, 1, t4 + addq t2, t3, t2 + addq a0, t4, a0 + addq a0, t2, a0 + + /* Now do the append. */ + + jsr t9, __stxncpy + + /* Worry about the null termination. */ + + zapnot t0, t8, t1 # was last byte a null? + bne t1, 0f + ret + +0: and t10, 0x80, t1 + bne t1, 1f + + /* Here there are bytes left in the current word. Clear one. */ + addq t10, t10, t10 # end-of-count bit <<= 1 + zap t0, t10, t0 + stq_u t0, 0(a0) + ret + +1: /* Here we must read the next DST word and clear the first byte. */ + ldq_u t0, 8(a0) + zap t0, 1, t0 + stq_u t0, 8(a0) + +$zerocount: + ret + + END(strncat) diff --git a/REORG.TODO/sysdeps/alpha/strncmp.S b/REORG.TODO/sysdeps/alpha/strncmp.S new file mode 100644 index 0000000000..e5a6e0a832 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strncmp.S @@ -0,0 +1,277 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Bytewise compare two null-terminated strings of length no longer than N. */ + +#include <sysdep.h> + + .set noat + .set noreorder + +/* EV6 only predicts one branch per octaword. We'll use these to push + subsequent branches back to the next bundle. This will generally add + a fetch+decode cycle to older machines, so skip in that case. */ +#ifdef __alpha_fix__ +# define ev6_unop unop +#else +# define ev6_unop +#endif + + .text + +ENTRY(strncmp) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + xor a0, a1, t2 # are s1 and s2 co-aligned? + beq a2, $zerolength + ldq_u t0, 0(a0) # load asap to give cache time to catch up + ldq_u t1, 0(a1) + lda t3, -1 + and t2, 7, t2 + srl t3, 1, t6 + and a0, 7, t4 # find s1 misalignment + and a1, 7, t5 # find s2 misalignment + cmovlt a2, t6, a2 # bound neg count to LONG_MAX + addq a1, a2, a3 # s2+count + addq a2, t4, a2 # bias count by s1 misalignment + and a2, 7, t10 # ofs of last byte in s1 last word + srl a2, 3, a2 # remaining full words in s1 count + bne t2, $unaligned + + /* On entry to this basic block: + t0 == the first word of s1. + t1 == the first word of s2. + t3 == -1. */ +$aligned: + mskqh t3, a1, t8 # mask off leading garbage + ornot t1, t8, t1 + ornot t0, t8, t0 + cmpbge zero, t1, t7 # bits set iff null found + beq a2, $eoc # check end of count + bne t7, $eos + beq t10, $ant_loop + + /* Aligned compare main loop. + On entry to this basic block: + t0 == an s1 word. + t1 == an s2 word not containing a null. */ + + .align 4 +$a_loop: + xor t0, t1, t2 # e0 : + bne t2, $wordcmp # .. e1 (zdb) + ldq_u t1, 8(a1) # e0 : + ldq_u t0, 8(a0) # .. e1 : + + subq a2, 1, a2 # e0 : + addq a1, 8, a1 # .. e1 : + addq a0, 8, a0 # e0 : + beq a2, $eoc # .. e1 : + + cmpbge zero, t1, t7 # e0 : + beq t7, $a_loop # .. e1 : + + br $eos + + /* Alternate aligned compare loop, for when there's no trailing + bytes on the count. We have to avoid reading too much data. */ + .align 4 +$ant_loop: + xor t0, t1, t2 # e0 : + ev6_unop + ev6_unop + bne t2, $wordcmp # .. e1 (zdb) + + subq a2, 1, a2 # e0 : + beq a2, $zerolength # .. e1 : + ldq_u t1, 8(a1) # e0 : + ldq_u t0, 8(a0) # .. e1 : + + addq a1, 8, a1 # e0 : + addq a0, 8, a0 # .. e1 : + cmpbge zero, t1, t7 # e0 : + beq t7, $ant_loop # .. e1 : + + br $eos + + /* The two strings are not co-aligned. Align s1 and cope. */ + /* On entry to this basic block: + t0 == the first word of s1. + t1 == the first word of s2. + t3 == -1. + t4 == misalignment of s1. + t5 == misalignment of s2. + t10 == misalignment of s1 end. */ + .align 4 +$unaligned: + /* If s1 misalignment is larger than s2 misalignment, we need + extra startup checks to avoid SEGV. */ + subq a1, t4, a1 # adjust s2 for s1 misalignment + cmpult t4, t5, t9 + subq a3, 1, a3 # last byte of s2 + bic a1, 7, t8 + mskqh t3, t5, t7 # mask garbage in s2 + subq a3, t8, a3 + ornot t1, t7, t7 + srl a3, 3, a3 # remaining full words in s2 count + beq t9, $u_head + + /* Failing that, we need to look for both eos and eoc within the + first word of s2. If we find either, we can continue by + pretending that the next word of s2 is all zeros. */ + lda t2, 0 # next = zero + cmpeq a3, 0, t8 # eoc in the first word of s2? + cmpbge zero, t7, t7 # eos in the first word of s2? + or t7, t8, t8 + bne t8, $u_head_nl + + /* We know just enough now to be able to assemble the first + full word of s2. We can still find a zero at the end of it. + + On entry to this basic block: + t0 == first word of s1 + t1 == first partial word of s2. + t3 == -1. + t10 == ofs of last byte in s1 last word. + t11 == ofs of last byte in s2 last word. */ +$u_head: + ldq_u t2, 8(a1) # load second partial s2 word + subq a3, 1, a3 +$u_head_nl: + extql t1, a1, t1 # create first s2 word + mskqh t3, a0, t8 + extqh t2, a1, t4 + ornot t0, t8, t0 # kill s1 garbage + or t1, t4, t1 # s2 word now complete + cmpbge zero, t0, t7 # find eos in first s1 word + ornot t1, t8, t1 # kill s2 garbage + beq a2, $eoc + subq a2, 1, a2 + bne t7, $eos + mskql t3, a1, t8 # mask out s2[1] bits we have seen + xor t0, t1, t4 # compare aligned words + or t2, t8, t8 + bne t4, $wordcmp + cmpbge zero, t8, t7 # eos in high bits of s2[1]? + cmpeq a3, 0, t8 # eoc in s2[1]? + or t7, t8, t7 + bne t7, $u_final + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned words from s2. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t2 == the unshifted low-bits from the next s2 word. + t10 == ofs of last byte in s1 last word. + t11 == ofs of last byte in s2 last word. */ + .align 4 +$u_loop: + extql t2, a1, t3 # e0 : + ldq_u t2, 16(a1) # .. e1 : load next s2 high bits + ldq_u t0, 8(a0) # e0 : load next s1 word + addq a1, 8, a1 # .. e1 : + + addq a0, 8, a0 # e0 : + subq a3, 1, a3 # .. e1 : + extqh t2, a1, t1 # e0 : + cmpbge zero, t0, t7 # .. e1 : eos in current s1 word + + or t1, t3, t1 # e0 : + beq a2, $eoc # .. e1 : eoc in current s1 word + subq a2, 1, a2 # e0 : + cmpbge zero, t2, t4 # .. e1 : eos in s2[1] + + xor t0, t1, t3 # e0 : compare the words + ev6_unop + ev6_unop + bne t7, $eos # .. e1 : + + cmpeq a3, 0, t5 # e0 : eoc in s2[1] + ev6_unop + ev6_unop + bne t3, $wordcmp # .. e1 : + + or t4, t5, t4 # e0 : eos or eoc in s2[1]. + beq t4, $u_loop # .. e1 (zdb) + + /* We've found a zero in the low bits of the last s2 word. Get + the next s1 word and align them. */ + .align 3 +$u_final: + ldq_u t0, 8(a0) + extql t2, a1, t1 + cmpbge zero, t1, t7 + bne a2, $eos + + /* We've hit end of count. Zero everything after the count + and compare whats left. */ + .align 3 +$eoc: + mskql t0, t10, t0 + mskql t1, t10, t1 + cmpbge zero, t1, t7 + + /* We've found a zero somewhere in a word we just read. + On entry to this basic block: + t0 == s1 word + t1 == s2 word + t7 == cmpbge mask containing the zero. */ + .align 3 +$eos: + negq t7, t6 # create bytemask of valid data + and t6, t7, t8 + subq t8, 1, t6 + or t6, t8, t7 + zapnot t0, t7, t0 # kill the garbage + zapnot t1, t7, t1 + xor t0, t1, v0 # ... and compare + beq v0, $done + + /* Here we have two differing co-aligned words in t0 & t1. + Bytewise compare them and return (t0 > t1 ? 1 : -1). */ + .align 3 +$wordcmp: + cmpbge t0, t1, t2 # comparison yields bit mask of ge + cmpbge t1, t0, t3 + xor t2, t3, t0 # bits set iff t0/t1 bytes differ + negq t0, t1 # clear all but least bit + and t0, t1, t0 + lda v0, -1 + and t0, t2, t1 # was bit set in t0 > t1? + cmovne t1, 1, v0 +$done: + ret + + .align 3 +$zerolength: + clr v0 + ret + + END(strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/alpha/strncpy.S b/REORG.TODO/sysdeps/alpha/strncpy.S new file mode 100644 index 0000000000..96f3973ff7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strncpy.S @@ -0,0 +1,87 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than COUNT bytes of the null-terminated string from + SRC to DST. If SRC does not cover all of COUNT, the balance is + zeroed. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + +ENTRY(strncpy) + ldgp gp, 0(pv) +#ifdef PROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + .prologue 1 + + mov a0, v0 # set return value now + beq a2, $zerocount + jsr t9, __stxncpy # do the work of the copy + + bne a2, $multiword # do we have full words left? + + .align 3 + subq t8, 1, t2 # e0 : guess not + subq t10, 1, t3 # .. e1 : + or t2, t8, t2 # e0 : clear the bits between the last + or t3, t10, t3 # .. e1 : written byte and the last byte in + andnot t3, t2, t3 # e0 : COUNT + zap t0, t3, t0 # e1 : + stq_u t0, 0(a0) # e0 : + ret # .. e1 : + +$multiword: + subq t8, 1, t7 # e0 : clear the final bits in the prev + or t7, t8, t7 # e1 : word + zapnot t0, t7, t0 # e0 : + subq a2, 1, a2 # .. e1 : + stq_u t0, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + + beq a2, 1f # e1 : + blbc a2, 0f # e1 : + + stq_u zero, 0(a0) # e0 : zero one word + subq a2, 1, a2 # .. e1 : + addq a0, 8, a0 # e0 : + beq a2, 1f # .. e1 : + +0: stq_u zero, 0(a0) # e0 : zero two words + subq a2, 2, a2 # .. e1 : + stq_u zero, 8(a0) # e0 : + addq a0, 16, a0 # .. e1 : + bne a2, 0b # e1 : + unop + +1: ldq_u t0, 0(a0) # e0 : clear the leading bits in the final + subq t10, 1, t7 # .. e1 : word + or t7, t10, t7 # e0 : + zap t0, t7, t0 # e1 (stall) + stq_u t0, 0(a0) # e0 : + +$zerocount: + ret # .. e1 : + + END(strncpy) +libc_hidden_builtin_def (strncpy) diff --git a/REORG.TODO/sysdeps/alpha/strrchr.S b/REORG.TODO/sysdeps/alpha/strrchr.S new file mode 100644 index 0000000000..6b169ab86c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strrchr.S @@ -0,0 +1,110 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return the address of the last occurrence of a given character + within a null-terminated string, or null if it is not found. + + This is generally scheduled for the EV5 (got to look out for my own + interests :-), but with EV4 needs in mind. There are, in fact, fewer + stalls on the EV4 than there are on the EV5. +*/ + +#include <sysdep.h> + + .set noreorder + .set noat + +ENTRY(strrchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + and a1, 0xff, a1 # e0 : zero extend our test character + mov zero, t6 # .. e1 : t6 is last match aligned addr + sll a1, 8, t5 # e0 : replicate our test character + mov zero, t7 # .. e1 : t7 is last match byte compare mask + or t5, a1, a1 # e0 : + ldq_u t0, 0(a0) # .. e1 : load first quadword + sll a1, 16, t5 # e0 : + andnot a0, 7, v0 # .. e1 : align source addr + or t5, a1, a1 # e0 : + lda t4, -1 # .. e1 : build garbage mask + sll a1, 32, t5 # e0 : + cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero + mskqh t4, a0, t4 # e0 : + or t5, a1, a1 # .. e1 : character replication complete + xor t0, a1, t2 # e0 : make bytes == c zero + cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage + cmpbge zero, t2, t3 # e0 : bits set iff byte == c + andnot t1, t4, t1 # .. e1 : clear garbage from null test + andnot t3, t4, t3 # e0 : clear garbage from char test + bne t1, $eos # .. e1 : did we already hit the terminator? + + /* Character search main loop */ +$loop: + ldq t0, 8(v0) # e0 : load next quadword + cmovne t3, v0, t6 # .. e1 : save previous comparisons match + cmovne t3, t3, t7 # e0 : + addq v0, 8, v0 # .. e1 : + xor t0, a1, t2 # e0 : + cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero + cmpbge zero, t2, t3 # e0 : bits set iff byte == c + beq t1, $loop # .. e1 : if we havnt seen a null, loop + + /* Mask out character matches after terminator */ +$eos: + negq t1, t4 # e0 : isolate first null byte match + and t1, t4, t4 # e1 : + subq t4, 1, t5 # e0 : build a mask of the bytes upto... + or t4, t5, t4 # e1 : ... and including the null + + and t3, t4, t3 # e0 : mask out char matches after null + cmovne t3, t3, t7 # .. e1 : save it, if match found + cmovne t3, v0, t6 # e0 : + + /* Locate the address of the last matched character */ + + /* Retain the early exit for the ev4 -- the ev5 mispredict penalty + is 5 cycles -- the same as just falling through. */ + beq t7, $retnull # .. e1 : + + and t7, 0xf0, t2 # e0 : binary search for the high bit set + cmovne t2, t2, t7 # .. e1 (zdb) + cmovne t2, 4, t2 # e0 : + and t7, 0xcc, t1 # .. e1 : + cmovne t1, t1, t7 # e0 : + cmovne t1, 2, t1 # .. e1 : + and t7, 0xaa, t0 # e0 : + cmovne t0, 1, t0 # .. e1 (zdb) + addq t2, t1, t1 # e0 : + addq t6, t0, v0 # .. e1 : add our aligned base ptr to the mix + addq v0, t1, v0 # e0 : + ret # .. e1 : + +$retnull: + mov zero, v0 # e0 : + ret # .. e1 : + + END(strrchr) + +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/alpha/stxcpy.S b/REORG.TODO/sysdeps/alpha/stxcpy.S new file mode 100644 index 0000000000..a3efd9ce48 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stxcpy.S @@ -0,0 +1,294 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy a null-terminated string from SRC to DST. + + This is an internal routine used by strcpy, stpcpy, and strcat. + As such, it uses special linkage conventions to make implementation + of these public functions more efficient. + + On input: + t9 = return address + a0 = DST + a1 = SRC + + On output: + t8 = bitmask (with one bit set) indicating the last byte written + a0 = unaligned address of the last *word* written + + Furthermore, v0, a3-a5, t11, and t12 are untouched. +*/ + +/* This is generally scheduled for the EV5, but should still be pretty + good for the EV4 too. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + .type __stxcpy, @function + .globl __stxcpy + .usepv __stxcpy, no + + cfi_startproc + cfi_return_column (t9) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + .align 3 +stxcpy_aligned: + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # e1 : build a mask against false zero + mskqh t2, a1, t2 # e0 : detection in the src word + mskqh t1, a1, t3 # e0 : + ornot t1, t2, t2 # .. e1 : + mskql t0, a1, t0 # e0 : assemble the first output word + cmpbge zero, t2, t7 # .. e1 : bits set iff null found + or t0, t3, t1 # e0 : + bne t7, $a_eos # .. e1 : + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == a source word not containing a null. */ +$a_loop: + stq_u t1, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + ldq_u t1, 0(a1) # e0 : + addq a1, 8, a1 # .. e1 : + cmpbge zero, t1, t7 # e0 (stall) + beq t7, $a_loop # .. e1 (zdb) + + /* Take care of the final (partial) word store. + On entry to this basic block we have: + t1 == the source word containing the null + t7 == the cmpbge mask that found it. */ +$a_eos: + negq t7, t6 # e0 : find low bit set + and t7, t6, t8 # e1 (stall) + + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t8, 0x80, t6 # e0 : + bne t6, 1f # .. e1 (zdb) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t0, 0(a0) # e0 : + subq t8, 1, t6 # .. e1 : + zapnot t1, t6, t1 # e0 : clear src bytes >= null + or t8, t6, t7 # .. e1 : + zap t0, t7, t0 # e0 : clear dst bytes <= null + or t0, t1, t1 # e1 : + +1: stq_u t1, 0(a0) # e0 : + ret (t9) # .. e1 : + + .align 3 +__stxcpy: + /* Are source and destination co-aligned? */ + xor a0, a1, t0 # e0 : + unop # : + and t0, 7, t0 # e0 : + bne t0, $unaligned # .. e1 : + + /* We are co-aligned; take care of a partial first word. */ + ldq_u t1, 0(a1) # e0 : load first src word + and a0, 7, t0 # .. e1 : take care not to load a word ... + addq a1, 8, a1 # e0 : + beq t0, stxcpy_aligned # .. e1 : ... if we wont need it + ldq_u t0, 0(a0) # e0 : + br stxcpy_aligned # .. e1 : + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 3 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, for masking back in, if needed else 0 + t1 == the low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # e0 : + addq a1, 8, a1 # .. e1 : + + extql t1, a1, t1 # e0 : + extqh t2, a1, t4 # e0 : + mskql t0, a0, t0 # e0 : + or t1, t4, t1 # .. e1 : + mskqh t1, a0, t1 # e0 : + or t0, t1, t1 # e1 : + + or t1, t6, t6 # e0 : + cmpbge zero, t6, t7 # .. e1 : + lda t6, -1 # e0 : for masking just below + bne t7, $u_final # .. e1 : + + mskql t6, a1, t6 # e0 : mask out the bits we have + or t6, t2, t2 # e1 : already extracted before + cmpbge zero, t2, t7 # e0 : testing eos + bne t7, $u_late_head_exit # .. e1 (zdb) + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + stq_u t1, 0(a0) # e0 : store first output word + addq a0, 8, a0 # .. e1 : + extql t2, a1, t0 # e0 : position ho-bits of lo word + ldq_u t2, 8(a1) # .. e1 : read next high-order source word + addq a1, 8, a1 # e0 : + cmpbge zero, t2, t7 # .. e1 : + nop # e0 : + bne t7, $u_eos # .. e1 : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + extqh t2, a1, t1 # e0 : extract high bits for current word + addq a1, 8, a1 # .. e1 : + extql t2, a1, t3 # e0 : extract low bits for next time + addq a0, 8, a0 # .. e1 : + or t0, t1, t1 # e0 : current dst word now complete + ldq_u t2, 0(a1) # .. e1 : load high word for next time + stq_u t1, -8(a0) # e0 : save the current word + mov t3, t0 # .. e1 : + cmpbge zero, t2, t7 # e0 : test new word for eos + beq t7, $u_loop # .. e1 : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + extqh t2, a1, t1 # e0 : + or t0, t1, t1 # e1 : first (partial) source word complete + + cmpbge zero, t1, t7 # e0 : is the null in this first bit? + bne t7, $u_final # .. e1 (zdb) + +$u_late_head_exit: + stq_u t1, 0(a0) # e0 : the null was in the high-order bits + addq a0, 8, a0 # .. e1 : + extql t2, a1, t1 # e0 : + cmpbge zero, t1, t7 # .. e1 : + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t1 == assembled source word + t7 == cmpbge mask that found the null. */ +$u_final: + negq t7, t6 # e0 : isolate low bit set + and t6, t7, t8 # e1 : + + and t8, 0x80, t6 # e0 : avoid dest word load if we can + bne t6, 1f # .. e1 (zdb) + + ldq_u t0, 0(a0) # e0 : + subq t8, 1, t6 # .. e1 : + or t6, t8, t7 # e0 : + zapnot t1, t6, t1 # .. e1 : kill source bytes >= null + zap t0, t7, t0 # e0 : kill dest bytes <= null + or t0, t1, t1 # e1 : + +1: stq_u t1, 0(a0) # e0 : + ret (t9) # .. e1 : + + /* Unaligned copy entry point. */ + .align 3 +$unaligned: + + ldq_u t1, 0(a1) # e0 : load first source word + + and a0, 7, t4 # .. e1 : find dest misalignment + and a1, 7, t5 # e0 : find src misalignment + + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + + mov zero, t0 # .. e1 : + mov zero, t6 # e0 : + beq t4, 1f # .. e1 : + ldq_u t0, 0(a0) # e0 : + lda t6, -1 # .. e1 : + mskql t6, a0, t6 # e0 : +1: + subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t8 # e0 : + beq t8, $u_head # .. e1 (zdb) + + lda t2, -1 # e1 : mask out leading garbage in source + mskqh t2, t5, t2 # e0 : + nop # e0 : + ornot t1, t2, t3 # .. e1 : + cmpbge zero, t3, t7 # e0 : is there a zero? + beq t7, $u_head # .. e1 (zdb) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # e0 : + + negq t7, t6 # .. e1 : build bitmask of bytes <= zero + and t6, t7, t8 # e0 : + and a1, 7, t5 # .. e1 : + subq t8, 1, t6 # e0 : + or t6, t8, t7 # e1 : + srl t8, t5, t8 # e0 : adjust final null return value + + zapnot t2, t7, t2 # .. e1 : prepare source word; mirror changes + and t1, t2, t1 # e1 : to source validity mask + extql t2, a1, t2 # .. e0 : + extql t1, a1, t1 # e0 : + + andnot t0, t2, t0 # .. e1 : zero place for source to reside + or t0, t1, t1 # e1 : and put it there + stq_u t1, 0(a0) # .. e0 : + ret (t9) + + cfi_endproc diff --git a/REORG.TODO/sysdeps/alpha/stxncpy.S b/REORG.TODO/sysdeps/alpha/stxncpy.S new file mode 100644 index 0000000000..718a37ad0a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stxncpy.S @@ -0,0 +1,352 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than COUNT bytes of the null-terminated string from + SRC to DST. + + This is an internal routine used by strncpy, stpncpy, and strncat. + As such, it uses special linkage conventions to make implementation + of these public functions more efficient. + + On input: + t9 = return address + a0 = DST + a1 = SRC + a2 = COUNT + + Furthermore, COUNT may not be zero. + + On output: + t0 = last word written + t8 = bitmask (with one bit set) indicating the last byte written + t10 = bitmask (with one bit set) indicating the byte position of + the end of the range specified by COUNT + a0 = unaligned address of the last *word* written + a2 = the number of full words left in COUNT + + Furthermore, v0, a3-a5, t11, and t12 are untouched. +*/ + + +/* This is generally scheduled for the EV5, but should still be pretty + good for the EV4 too. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + .type __stxncpy, @function + .globl __stxncpy + .usepv __stxncpy, no + + cfi_startproc + cfi_return_column (t9) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + .align 3 +stxncpy_aligned: + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # e1 : build a mask against false zero + mskqh t2, a1, t2 # e0 : detection in the src word + mskqh t1, a1, t3 # e0 : + ornot t1, t2, t2 # .. e1 : + mskql t0, a1, t0 # e0 : assemble the first output word + cmpbge zero, t2, t7 # .. e1 : bits set iff null found + or t0, t3, t0 # e0 : + beq a2, $a_eoc # .. e1 : + bne t7, $a_eos # .. e1 : + + /* On entry to this basic block: + t0 == a source word not containing a null. */ +$a_loop: + stq_u t0, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + ldq_u t0, 0(a1) # e0 : + addq a1, 8, a1 # .. e1 : + subq a2, 1, a2 # e0 : + cmpbge zero, t0, t7 # .. e1 (stall) + beq a2, $a_eoc # e1 : + beq t7, $a_loop # e1 : + + /* Take care of the final (partial) word store. At this point + the end-of-count bit is set in t7 iff it applies. + + On entry to this basic block we have: + t0 == the source word containing the null + t7 == the cmpbge mask that found it. */ +$a_eos: + negq t7, t8 # e0 : find low bit set + and t7, t8, t8 # e1 (stall) + + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t8, 0x80, t6 # e0 : + bne t6, 1f # .. e1 (zdb) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t1, 0(a0) # e0 : + subq t8, 1, t6 # .. e1 : + or t8, t6, t7 # e0 : + unop # + zapnot t0, t7, t0 # e0 : clear src bytes > null + zap t1, t7, t1 # .. e1 : clear dst bytes <= null + or t0, t1, t0 # e1 : + +1: stq_u t0, 0(a0) # e0 : + ret (t9) # e1 : + + /* Add the end-of-count bit to the eos detection bitmask. */ +$a_eoc: + or t10, t7, t7 + br $a_eos + + .align 3 +__stxncpy: + /* Are source and destination co-aligned? */ + lda t2, -1 + xor a0, a1, t1 + srl t2, 1, t2 + and a0, 7, t0 # find dest misalignment + cmovlt a2, t2, a2 # bound neg count to LONG_MAX + and t1, 7, t1 + addq a2, t0, a2 # bias count by dest misalignment + subq a2, 1, a2 + and a2, 7, t2 + srl a2, 3, a2 # a2 = loop counter = (count - 1)/8 + addq zero, 1, t10 + sll t10, t2, t10 # t10 = bitmask of last count byte + bne t1, $unaligned + + /* We are co-aligned; take care of a partial first word. */ + + ldq_u t1, 0(a1) # e0 : load first src word + addq a1, 8, a1 # .. e1 : + + beq t0, stxncpy_aligned # avoid loading dest word if not needed + ldq_u t0, 0(a0) # e0 : + br stxncpy_aligned # .. e1 : + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 3 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, unmasked + t1 == the shifted low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # e0 : load second src word + addq a1, 8, a1 # .. e1 : + mskql t0, a0, t0 # e0 : mask trailing garbage in dst + extqh t2, a1, t4 # e0 : + or t1, t4, t1 # e1 : first aligned src word complete + mskqh t1, a0, t1 # e0 : mask leading garbage in src + or t0, t1, t0 # e0 : first output word complete + or t0, t6, t6 # e1 : mask original data for zero test + cmpbge zero, t6, t7 # e0 : + beq a2, $u_eocfin # .. e1 : + lda t6, -1 # e0 : + bne t7, $u_final # .. e1 : + + mskql t6, a1, t6 # e0 : mask out bits already seen + nop # .. e1 : + stq_u t0, 0(a0) # e0 : store first output word + or t6, t2, t2 # .. e1 : + cmpbge zero, t2, t7 # e0 : find nulls in second partial + addq a0, 8, a0 # .. e1 : + subq a2, 1, a2 # e0 : + bne t7, $u_late_head_exit # .. e1 : + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + extql t2, a1, t1 # e0 : position hi-bits of lo word + beq a2, $u_eoc # .. e1 : + ldq_u t2, 8(a1) # e0 : read next high-order source word + addq a1, 8, a1 # .. e1 : + extqh t2, a1, t0 # e0 : position lo-bits of hi word + cmpbge zero, t2, t7 # .. e1 : test new word for eos + nop # e0 : + bne t7, $u_eos # .. e1 : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + or t0, t1, t0 # e0 : current dst word now complete + subq a2, 1, a2 # .. e1 : decrement word count + stq_u t0, 0(a0) # e0 : save the current word + addq a0, 8, a0 # .. e1 : + extql t2, a1, t1 # e0 : extract high bits for next time + beq a2, $u_eoc # .. e1 : + ldq_u t2, 8(a1) # e0 : load high word for next time + addq a1, 8, a1 # .. e1 : + nop # e0 : + cmpbge zero, t2, t7 # .. e1 : test new word for eos + extqh t2, a1, t0 # e0 : extract low bits for current word + beq t7, $u_loop # .. e1 : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + or t0, t1, t0 # e0 : first (partial) source word complete + cmpbge zero, t0, t7 # e0 : is the null in this first bit? + bne t7, $u_final # .. e1 (zdb) + + stq_u t0, 0(a0) # e0 : the null was in the high-order bits + addq a0, 8, a0 # .. e1 : + subq a2, 1, a2 # e0 : + +$u_late_head_exit: + extql t2, a1, t0 # e0 : + cmpbge zero, t0, t7 # e0 : + or t7, t10, t6 # e1 : + cmoveq a2, t6, t7 # e0 : + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t0 == assembled source word + t7 == cmpbge mask that found the null. */ +$u_final: + negq t7, t6 # e0 : isolate low bit set + and t6, t7, t8 # e1 : + + and t8, 0x80, t6 # e0 : avoid dest word load if we can + bne t6, 1f # .. e1 (zdb) + + ldq_u t1, 0(a0) # e0 : + subq t8, 1, t6 # .. e1 : + or t6, t8, t7 # e0 : + zapnot t0, t7, t0 # .. e1 : kill source bytes > null + zap t1, t7, t1 # e0 : kill dest bytes <= null + or t0, t1, t0 # e1 : + +1: stq_u t0, 0(a0) # e0 : + ret (t9) # .. e1 : + + /* Got to end-of-count before end of string. + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word */ +$u_eoc: + and a1, 7, t6 # e1 : + sll t10, t6, t6 # e0 : + and t6, 0xff, t6 # e0 : + bne t6, 1f # e1 : avoid src word load if we can + + ldq_u t2, 8(a1) # e0 : load final src word + nop # .. e1 : + extqh t2, a1, t0 # e0 : extract high bits for last word + or t1, t0, t1 # e1 : + +1: cmpbge zero, t1, t7 + mov t1, t0 + +$u_eocfin: # end-of-count, final word + or t10, t7, t7 + br $u_final + + /* Unaligned copy entry point. */ + .align 3 +$unaligned: + + ldq_u t1, 0(a1) # e0 : load first source word + + and a0, 7, t4 # .. e1 : find dest misalignment + and a1, 7, t5 # e0 : find src misalignment + + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + + mov zero, t0 # .. e1 : + mov zero, t6 # e0 : + beq t4, 1f # .. e1 : + ldq_u t0, 0(a0) # e0 : + lda t6, -1 # .. e1 : + mskql t6, a0, t6 # e0 : +1: + subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t8 # e1 : + extql t1, a1, t1 # .. e0 : shift src into place + lda t2, -1 # e0 : for creating masks later + beq t8, $u_head # e1 : + + mskqh t2, t5, t2 # e0 : begin src byte validity mask + cmpbge zero, t1, t7 # .. e1 : is there a zero? + extql t2, a1, t2 # e0 : + or t7, t10, t5 # .. e1 : test for end-of-count too + cmpbge zero, t2, t3 # e0 : + cmoveq a2, t5, t7 # .. e1 : + andnot t7, t3, t7 # e0 : + beq t7, $u_head # .. e1 (zdb) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # e0 : + negq t7, t6 # .. e1 : build bitmask of bytes <= zero + mskqh t1, t4, t1 # e0 : + and t6, t7, t8 # .. e1 : + subq t8, 1, t6 # e0 : + or t6, t8, t7 # e1 : + + zapnot t2, t7, t2 # e0 : prepare source word; mirror changes + zapnot t1, t7, t1 # .. e1 : to source validity mask + + andnot t0, t2, t0 # e0 : zero place for source to reside + or t0, t1, t0 # e1 : and put it there + stq_u t0, 0(a0) # e0 : + ret (t9) # .. e1 : + + cfi_endproc diff --git a/REORG.TODO/sysdeps/alpha/sub_n.S b/REORG.TODO/sysdeps/alpha/sub_n.S new file mode 100644 index 0000000000..bc529e490e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/sub_n.S @@ -0,0 +1,118 @@ + # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .frame $30,0,$26,0 + + ldq $3,0($17) + ldq $4,0($18) + + subq $19,1,$19 + and $19,4-1,$2 # number of limbs in first loop + bis $31,$31,$0 + beq $2,.L0 # if multiple of 4 limbs, skip first loop + + subq $19,$2,$19 + +.Loop0: subq $2,1,$2 + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + + addq $17,8,$17 + addq $18,8,$18 + bis $5,$5,$3 + bis $6,$6,$4 + addq $16,8,$16 + bne $2,.Loop0 + +.L0: beq $19,.Lend + + .align 3 +.Loop: subq $19,4,$19 + + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + + ldq $3,16($17) + addq $6,$0,$6 + ldq $4,16($18) + cmpult $6,$0,$1 + subq $5,$6,$6 + cmpult $5,$6,$0 + stq $6,8($16) + or $0,$1,$0 + + ldq $5,24($17) + addq $4,$0,$4 + ldq $6,24($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,16($16) + or $0,$1,$0 + + ldq $3,32($17) + addq $6,$0,$6 + ldq $4,32($18) + cmpult $6,$0,$1 + subq $5,$6,$6 + cmpult $5,$6,$0 + stq $6,24($16) + or $0,$1,$0 + + addq $17,32,$17 + addq $18,32,$18 + addq $16,32,$16 + bne $19,.Loop + +.Lend: addq $4,$0,$4 + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + ret $31,($26),1 + + .end __mpn_sub_n diff --git a/REORG.TODO/sysdeps/alpha/submul_1.S b/REORG.TODO/sysdeps/alpha/submul_1.S new file mode 100644 index 0000000000..020866733a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/submul_1.S @@ -0,0 +1,90 @@ + # Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and + # subtract the result from a second limb vector. + + # Copyright (C) 1992-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_submul_1 + .ent __mpn_submul_1 2 +__mpn_submul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + umulh $2,$19,$0 # $0 = prod_high + beq $18,.Lend1 # jump if size was == 1 + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + subq $5,$3,$3 + cmpult $5,$3,$4 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + beq $18,.Lend2 # jump if size was == 2 + + .align 3 +.Loop: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + addq $5,$0,$0 # combine carries + bne $18,.Loop + +.Lend2: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $5,$0,$0 # combine carries + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +.Lend1: subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $0,$5,$0 + ret $31,($26),1 + + .end __mpn_submul_1 diff --git a/REORG.TODO/sysdeps/alpha/tininess.h b/REORG.TODO/sysdeps/alpha/tininess.h new file mode 100644 index 0000000000..1db37790f8 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/tininess.h @@ -0,0 +1 @@ +#define TININESS_AFTER_ROUNDING 1 diff --git a/REORG.TODO/sysdeps/alpha/tls-macros.h b/REORG.TODO/sysdeps/alpha/tls-macros.h new file mode 100644 index 0000000000..00489c289f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/tls-macros.h @@ -0,0 +1,25 @@ +/* Macros to support TLS testing in times of missing compiler support. */ + +extern void *__tls_get_addr (void *); + +# define TLS_GD(x) \ + ({ register void *__gp asm ("$29"); void *__result; \ + asm ("lda %0, " #x "($gp) !tlsgd" : "=r" (__result) : "r"(__gp)); \ + __tls_get_addr (__result); }) + +# define TLS_LD(x) \ + ({ register void *__gp asm ("$29"); void *__result; \ + asm ("lda %0, " #x "($gp) !tlsldm" : "=r" (__result) : "r"(__gp)); \ + __result = __tls_get_addr (__result); \ + asm ("lda %0, " #x "(%0) !dtprel" : "+r" (__result)); \ + __result; }) + +# define TLS_IE(x) \ + ({ register void *__gp asm ("$29"); long ofs; \ + asm ("ldq %0, " #x "($gp) !gottprel" : "=r"(ofs) : "r"(__gp)); \ + __builtin_thread_pointer () + ofs; }) + +# define TLS_LE(x) \ + ({ void *__result = __builtin_thread_pointer (); \ + asm ("lda %0, " #x "(%0) !tprel" : "+r" (__result)); \ + __result; }) diff --git a/REORG.TODO/sysdeps/alpha/tst-audit.h b/REORG.TODO/sysdeps/alpha/tst-audit.h new file mode 100644 index 0000000000..042a8c7718 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/tst-audit.h @@ -0,0 +1,24 @@ +/* Definitions for testing PLT entry/exit auditing. Alpha version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define pltenter la_alpha_gnu_pltenter +#define pltexit la_alpha_gnu_pltexit +#define La_regs La_alpha_regs +#define La_retval La_alpha_retval +#define int_retval lrv_r0 diff --git a/REORG.TODO/sysdeps/alpha/udiv_qrnnd.S b/REORG.TODO/sysdeps/alpha/udiv_qrnnd.S new file mode 100644 index 0000000000..899b445641 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/udiv_qrnnd.S @@ -0,0 +1,159 @@ + # Alpha 21064 __udiv_qrnnd + + # Copyright (C) 1992-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + + .set noreorder + .set noat + + .text + +LEAF(__udiv_qrnnd, 0) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 + + ldiq cnt,16 + blt d,$largedivisor + +$loop1: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop1 + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addq $5,$6,$5 + +$loop2: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop2 + + addq n1,n1,n1 + addq $4,n1,n1 + bne $6,$Odd + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$Odd: + /* q' in n0. r' in n1 */ + addq n1,n0,n1 + + cmpult n1,n0,tmp # tmp := carry from addq + subq n1,d,AT + addq n0,tmp,n0 + cmovne tmp,AT,n1 + + cmpult n1,d,tmp + addq n0,1,AT + cmoveq tmp,AT,n0 + subq n1,d,AT + cmoveq tmp,AT,n1 + + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + + .end __udiv_qrnnd |