diff options
author | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
---|---|---|
committer | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
commit | 5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch) | |
tree | 4470480d904b65cf14ca524f96f79eca818c3eaf /REORG.TODO/sysdeps/alpha | |
parent | 199fc19d3aaaf57944ef036e15904febe877fc93 (diff) | |
download | glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.tar.gz glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.tar.xz glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.zip |
Prepare for radical source tree reorganization. zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage directory, REORG.TODO, except for files that will certainly still exist in their current form at top level when we're done (COPYING, COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which are moved to the new directory OldChangeLogs, instead), and the generated file INSTALL (which is just deleted; in the new order, there will be no generated files checked into version control).
Diffstat (limited to 'REORG.TODO/sysdeps/alpha')
210 files changed, 18135 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/alpha/Implies b/REORG.TODO/sysdeps/alpha/Implies new file mode 100644 index 0000000000..d03783b127 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/Implies @@ -0,0 +1,7 @@ +wordsize-64 +# Alpha uses IEEE 754 single, double and quad precision floating point. +ieee754/ldbl-128 +ieee754/dbl-64/wordsize-64 +ieee754/dbl-64 +ieee754/flt-32 +alpha/soft-fp diff --git a/REORG.TODO/sysdeps/alpha/Makefile b/REORG.TODO/sysdeps/alpha/Makefile new file mode 100644 index 0000000000..98da3b57e6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/Makefile @@ -0,0 +1,62 @@ +# Copyright (C) 1993-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# Contributed by Brendan Kehoe (brendan@zen.org). + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library. If not, see +# <http://www.gnu.org/licenses/>. + +ifeq ($(subdir),db2) +CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_ALPHA=1 +endif + +ifeq ($(subdir),debug) +# Consider making this GCC's default... +CFLAGS-backtrace.c = -fasynchronous-unwind-tables +endif + +ifeq ($(subdir),gmon) +sysdep_routines += _mcount +endif + +ifeq ($(subdir),gnulib) +sysdep_routines += divl divlu divq divqu reml remlu remq remqu +endif + +ifeq ($(subdir),string) +sysdep_routines += stxcpy stxncpy +endif + +ifeq ($(subdir),elf) +# The ld.so startup code cannot use literals until it self-relocates. +CFLAGS-rtld.c = -mbuild-constants +endif + +ifeq ($(subdir),math) +# The fma routines rely on inexact being raised for correct results. +CFLAGS-s_fma.c = -mieee-with-inexact +CFLAGS-s_fmaf.c = -mieee-with-inexact +# This test tries to check for inexact being raised by arithmetic. +CFLAGS-test-misc.c += -mieee-with-inexact +# Avoid "conflicting types for built-in function" warnings +CFLAGS-s_isnan.c += -fno-builtin-isnanf +endif + +# Build everything with full IEEE math support, and with dynamic rounding; +# there are a number of math routines that are defined to work with the +# "current" rounding mode, and it's easiest to set this with all of them. +sysdep-CFLAGS += -mieee -mfp-rounding-mode=d + +# libc.so requires about 16k for the small data area, which is well +# below the 64k maximum. +pic-ccflag = -fpic diff --git a/REORG.TODO/sysdeps/alpha/Subdirs b/REORG.TODO/sysdeps/alpha/Subdirs new file mode 100644 index 0000000000..87eadf3024 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/Subdirs @@ -0,0 +1 @@ +soft-fp diff --git a/REORG.TODO/sysdeps/alpha/Versions b/REORG.TODO/sysdeps/alpha/Versions new file mode 100644 index 0000000000..ae8fde7b23 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/Versions @@ -0,0 +1,17 @@ +libc { + GLIBC_2.0 { + # functions with special/multiple interfaces + __divqu; __remqu; __divqs; __remqs; __divlu; __remlu; __divls; + __remls; __divl; __reml; __divq; __remq; __divqu; __remqu; + } +} +libm { + GLIBC_2.0 { + # used in inline functions. + __atan2; + } + GLIBC_2.18 { + # forgotten when the symbols were added to glibc 2.15 for other targets + __sqrt_finite; __sqrtf_finite; __sqrtl_finite; + } +} diff --git a/REORG.TODO/sysdeps/alpha/__longjmp.S b/REORG.TODO/sysdeps/alpha/__longjmp.S new file mode 100644 index 0000000000..ae8de82669 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/__longjmp.S @@ -0,0 +1,63 @@ +/* Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ASSEMBLY__ + +#include <sysdep.h> +#include <jmpbuf-offsets.h> + + +ENTRY(__longjmp) +#ifdef PROF + ldgp gp, 0(pv) + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at + .prologue 1 +#else + .prologue 0 +#endif + + mov a1, v0 + ldq s0, JB_S0*8(a0) + ldq s1, JB_S1*8(a0) + ldq s2, JB_S2*8(a0) + ldq s3, JB_S3*8(a0) + ldq s4, JB_S4*8(a0) + ldq s5, JB_S5*8(a0) + ldq ra, JB_PC*8(a0) + ldq fp, JB_FP*8(a0) + ldq t0, JB_SP*8(a0) + ldt $f2, JB_F2*8(a0) + ldt $f3, JB_F3*8(a0) + ldt $f4, JB_F4*8(a0) + ldt $f5, JB_F5*8(a0) + ldt $f6, JB_F6*8(a0) + ldt $f7, JB_F7*8(a0) + ldt $f8, JB_F8*8(a0) + ldt $f9, JB_F9*8(a0) +#ifdef PTR_DEMANGLE + PTR_DEMANGLE(ra, t1) + PTR_DEMANGLE2(t0, t1) + PTR_DEMANGLE2(fp, t1) +#endif + cmoveq v0, 1, v0 + mov t0, sp + ret + +END(__longjmp) diff --git a/REORG.TODO/sysdeps/alpha/_mcount.S b/REORG.TODO/sysdeps/alpha/_mcount.S new file mode 100644 index 0000000000..8a35f3903b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/_mcount.S @@ -0,0 +1,105 @@ +/* Machine-specific calling sequence for `mcount' profiling function. alpha + Copyright (C) 1995-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Assembly stub to invoke _mcount(). Compiler generated code calls + this stub after executing a function's prologue and without saving any + registers. It is therefore necessary to preserve a0..a5 as they may + contain function arguments. To work correctly with frame- less + functions, it is also necessary to preserve ra. Finally, division + routines are invoked with a special calling convention and the + compiler treats those calls as if they were instructions. In + particular, it doesn't save any of the temporary registers (caller + saved registers). It is therefore necessary to preserve all + caller-saved registers as well. + + Upon entering _mcount, register $at holds the return address and ra + holds the return address of the function's caller (selfpc and frompc, + respectively in gmon.c language...). */ + +#include <sysdep.h> + + .set noat + .set noreorder + +LEAF(_mcount, 0xb0) + subq sp, 0xb0, sp + .prologue 0 + stq a0, 0x00(sp) + mov ra, a0 # a0 = caller-pc + stq a1, 0x08(sp) + mov $at, a1 # a1 = self-pc + stq $at, 0x10(sp) + + stq a2, 0x18(sp) + stq a3, 0x20(sp) + stq a4, 0x28(sp) + stq a5, 0x30(sp) + stq ra, 0x38(sp) + stq gp, 0x40(sp) + + br gp, 1f +1: ldgp gp, 0(gp) + + stq t0, 0x48(sp) + stq t1, 0x50(sp) + stq t2, 0x58(sp) + stq t3, 0x60(sp) + stq t4, 0x68(sp) + stq t5, 0x70(sp) + stq t6, 0x78(sp) + + stq t7, 0x80(sp) + stq t8, 0x88(sp) + stq t9, 0x90(sp) + stq t10, 0x98(sp) + stq t11, 0xa0(sp) + stq v0, 0xa8(sp) + + jsr ra, __mcount + + ldq a0, 0x00(sp) + ldq a1, 0x08(sp) + ldq $at, 0x10(sp) # restore self-pc + ldq a2, 0x18(sp) + ldq a3, 0x20(sp) + ldq a4, 0x28(sp) + ldq a5, 0x30(sp) + ldq ra, 0x38(sp) + ldq gp, 0x40(sp) + mov $at, pv # make pv point to return address + ldq t0, 0x48(sp) # this is important under OSF/1 to + ldq t1, 0x50(sp) # ensure that the code that we return + ldq t2, 0x58(sp) # can correctly compute its gp + ldq t3, 0x60(sp) + ldq t4, 0x68(sp) + ldq t5, 0x70(sp) + ldq t6, 0x78(sp) + ldq t7, 0x80(sp) + ldq t8, 0x88(sp) + ldq t9, 0x90(sp) + ldq t10, 0x98(sp) + ldq t11, 0xa0(sp) + ldq v0, 0xa8(sp) + + addq sp, 0xb0, sp + ret zero,($at),1 + + END(_mcount) + +weak_alias (_mcount, mcount) diff --git a/REORG.TODO/sysdeps/alpha/add_n.S b/REORG.TODO/sysdeps/alpha/add_n.S new file mode 100644 index 0000000000..85f8e270cb --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/add_n.S @@ -0,0 +1,118 @@ + # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .frame $30,0,$26,0 + + ldq $3,0($17) + ldq $4,0($18) + + subq $19,1,$19 + and $19,4-1,$2 # number of limbs in first loop + bis $31,$31,$0 + beq $2,.L0 # if multiple of 4 limbs, skip first loop + + subq $19,$2,$19 + +.Loop0: subq $2,1,$2 + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + addq $17,8,$17 + addq $18,8,$18 + bis $5,$5,$3 + bis $6,$6,$4 + addq $16,8,$16 + bne $2,.Loop0 + +.L0: beq $19,.Lend + + .align 3 +.Loop: subq $19,4,$19 + + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + ldq $3,16($17) + addq $6,$0,$6 + ldq $4,16($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,8($16) + or $0,$1,$0 + + ldq $5,24($17) + addq $4,$0,$4 + ldq $6,24($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,16($16) + or $0,$1,$0 + + ldq $3,32($17) + addq $6,$0,$6 + ldq $4,32($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,24($16) + or $0,$1,$0 + + addq $17,32,$17 + addq $18,32,$18 + addq $16,32,$16 + bne $19,.Loop + +.Lend: addq $4,$0,$4 + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + ret $31,($26),1 + + .end __mpn_add_n diff --git a/REORG.TODO/sysdeps/alpha/addmul_1.S b/REORG.TODO/sysdeps/alpha/addmul_1.S new file mode 100644 index 0000000000..11bd135e83 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/addmul_1.S @@ -0,0 +1,90 @@ + # Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + # the result to a second limb vector. + + # Copyright (C) 1992-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 2 +__mpn_addmul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + umulh $2,$19,$0 # $0 = prod_high + beq $18,.Lend1 # jump if size was == 1 + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + addq $5,$3,$3 + cmpult $3,$5,$4 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + beq $18,.Lend2 # jump if size was == 2 + + .align 3 +.Loop: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + addq $5,$0,$0 # combine carries + bne $18,.Loop + +.Lend2: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $5,$0,$0 # combine carries + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +.Lend1: addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $0,$5,$0 + ret $31,($26),1 + + .end __mpn_addmul_1 diff --git a/REORG.TODO/sysdeps/alpha/alphaev5/add_n.S b/REORG.TODO/sysdeps/alpha/alphaev5/add_n.S new file mode 100644 index 0000000000..d7db8f4672 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev5/add_n.S @@ -0,0 +1,146 @@ + # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .frame $30,0,$26,0 + + or $31,$31,$25 # clear cy + subq $19,4,$19 # decr loop cnt + blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop + # Start software pipeline for 1st loop + ldq $0,0($18) + ldq $1,8($18) + ldq $4,0($17) + ldq $5,8($17) + addq $17,32,$17 # update s1_ptr + ldq $2,16($18) + addq $0,$4,$20 # 1st main add + ldq $3,24($18) + subq $19,4,$19 # decr loop cnt + ldq $6,-16($17) + cmpult $20,$0,$25 # compute cy from last add + ldq $7,-8($17) + addq $1,$25,$28 # cy add + addq $18,32,$18 # update s2_ptr + addq $5,$28,$21 # 2nd main add + cmpult $28,$25,$8 # compute cy from last add + blt $19,.Lend1 # if less than 4 limbs remain, jump + # 1st loop handles groups of 4 limbs in a software pipeline + .align 4 +.Loop: cmpult $21,$28,$25 # compute cy from last add + ldq $0,0($18) + or $8,$25,$25 # combine cy from the two adds + ldq $1,8($18) + addq $2,$25,$28 # cy add + ldq $4,0($17) + addq $28,$6,$22 # 3rd main add + ldq $5,8($17) + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + addq $17,32,$17 # update s1_ptr + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + addq $0,$25,$28 # cy add + ldq $2,16($18) + addq $4,$28,$20 # 1st main add + ldq $3,24($18) + cmpult $28,$25,$8 # compute cy from last add + ldq $6,-16($17) + cmpult $20,$28,$25 # compute cy from last add + ldq $7,-8($17) + or $8,$25,$25 # combine cy from the two adds + subq $19,4,$19 # decr loop cnt + stq $22,-16($16) + addq $1,$25,$28 # cy add + stq $23,-8($16) + addq $5,$28,$21 # 2nd main add + addq $18,32,$18 # update s2_ptr + cmpult $28,$25,$8 # compute cy from last add + bge $19,.Loop + # Finish software pipeline for 1st loop +.Lend1: cmpult $21,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $2,$25,$28 # cy add + addq $28,$6,$22 # 3rd main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $22,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + addq $28,$7,$23 # 4th main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $23,$28,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + stq $22,-16($16) + stq $23,-8($16) +.Lend2: addq $19,4,$19 # restore loop cnt + beq $19,.Lret + # Start software pipeline for 2nd loop + ldq $0,0($18) + ldq $4,0($17) + subq $19,1,$19 + beq $19,.Lend0 + # 2nd loop handles remaining 1-3 limbs + .align 4 +.Loop0: addq $0,$25,$28 # cy add + ldq $0,8($18) + addq $4,$28,$20 # main add + ldq $4,8($17) + addq $18,8,$18 + cmpult $28,$25,$8 # compute cy from last add + addq $17,8,$17 + stq $20,0($16) + cmpult $20,$28,$25 # compute cy from last add + subq $19,1,$19 # decr loop cnt + or $8,$25,$25 # combine cy from the two adds + addq $16,8,$16 + bne $19,.Loop0 +.Lend0: addq $0,$25,$28 # cy add + addq $4,$28,$20 # main add + cmpult $28,$25,$8 # compute cy from last add + cmpult $20,$28,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + +.Lret: or $25,$31,$0 # return cy + ret $31,($26),1 + .end __mpn_add_n diff --git a/REORG.TODO/sysdeps/alpha/alphaev5/lshift.S b/REORG.TODO/sysdeps/alpha/alphaev5/lshift.S new file mode 100644 index 0000000000..24ff8e2fc3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev5/lshift.S @@ -0,0 +1,172 @@ + # Alpha EV5 __mpn_lshift -- + + # Copyright (C) 1994-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 3.25 cycles/limb on the EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .frame $30,0,$26,0 + + s8addq $18,$17,$17 # make r17 point at end of s1 + ldq $4,-8($17) # load first limb + subq $31,$19,$20 + s8addq $18,$16,$16 # make r16 point at end of RES + subq $18,1,$18 + and $18,4-1,$28 # number of limbs in first loop + srl $4,$20,$0 # compute function result + + beq $28,.L0 + subq $18,$28,$18 + + .align 3 +.Loop0: ldq $3,-16($17) + subq $16,8,$16 + sll $4,$19,$5 + subq $17,8,$17 + subq $28,1,$28 + srl $3,$20,$6 + or $3,$3,$4 + or $5,$6,$8 + stq $8,0($16) + bne $28,.Loop0 + +.L0: sll $4,$19,$24 + beq $18,.Lend + # warm up phase 1 + ldq $1,-16($17) + subq $18,4,$18 + ldq $2,-24($17) + ldq $3,-32($17) + ldq $4,-40($17) + beq $18,.Lend1 + # warm up phase 2 + srl $1,$20,$7 + sll $1,$19,$21 + srl $2,$20,$8 + ldq $1,-48($17) + sll $2,$19,$22 + ldq $2,-56($17) + srl $3,$20,$5 + or $7,$24,$7 + sll $3,$19,$23 + or $8,$21,$8 + srl $4,$20,$6 + ldq $3,-64($17) + sll $4,$19,$24 + ldq $4,-72($17) + subq $18,4,$18 + beq $18,.Lend2 + .align 4 + # main loop +.Loop: stq $7,-8($16) + or $5,$22,$5 + stq $8,-16($16) + or $6,$23,$6 + + srl $1,$20,$7 + subq $18,4,$18 + sll $1,$19,$21 + unop # ldq $31,-96($17) + + srl $2,$20,$8 + ldq $1,-80($17) + sll $2,$19,$22 + ldq $2,-88($17) + + stq $5,-24($16) + or $7,$24,$7 + stq $6,-32($16) + or $8,$21,$8 + + srl $3,$20,$5 + unop # ldq $31,-96($17) + sll $3,$19,$23 + subq $16,32,$16 + + srl $4,$20,$6 + ldq $3,-96($17) + sll $4,$19,$24 + ldq $4,-104($17) + + subq $17,32,$17 + bne $18,.Loop + # cool down phase 2/1 +.Lend2: stq $7,-8($16) + or $5,$22,$5 + stq $8,-16($16) + or $6,$23,$6 + srl $1,$20,$7 + sll $1,$19,$21 + srl $2,$20,$8 + sll $2,$19,$22 + stq $5,-24($16) + or $7,$24,$7 + stq $6,-32($16) + or $8,$21,$8 + srl $3,$20,$5 + sll $3,$19,$23 + srl $4,$20,$6 + sll $4,$19,$24 + # cool down phase 2/2 + stq $7,-40($16) + or $5,$22,$5 + stq $8,-48($16) + or $6,$23,$6 + stq $5,-56($16) + stq $6,-64($16) + # cool down phase 2/3 + stq $24,-72($16) + ret $31,($26),1 + + # cool down phase 1/1 +.Lend1: srl $1,$20,$7 + sll $1,$19,$21 + srl $2,$20,$8 + sll $2,$19,$22 + srl $3,$20,$5 + or $7,$24,$7 + sll $3,$19,$23 + or $8,$21,$8 + srl $4,$20,$6 + sll $4,$19,$24 + # cool down phase 1/2 + stq $7,-8($16) + or $5,$22,$5 + stq $8,-16($16) + or $6,$23,$6 + stq $5,-24($16) + stq $6,-32($16) + stq $24,-40($16) + ret $31,($26),1 + +.Lend: stq $24,-8($16) + ret $31,($26),1 + .end __mpn_lshift diff --git a/REORG.TODO/sysdeps/alpha/alphaev5/rshift.S b/REORG.TODO/sysdeps/alpha/alphaev5/rshift.S new file mode 100644 index 0000000000..0a44c77d0a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev5/rshift.S @@ -0,0 +1,170 @@ + # Alpha EV5 __mpn_rshift -- + + # Copyright (C) 1994-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 3.25 cycles/limb on the EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .frame $30,0,$26,0 + + ldq $4,0($17) # load first limb + subq $31,$19,$20 + subq $18,1,$18 + and $18,4-1,$28 # number of limbs in first loop + sll $4,$20,$0 # compute function result + + beq $28,.L0 + subq $18,$28,$18 + + .align 3 +.Loop0: ldq $3,8($17) + addq $16,8,$16 + srl $4,$19,$5 + addq $17,8,$17 + subq $28,1,$28 + sll $3,$20,$6 + or $3,$3,$4 + or $5,$6,$8 + stq $8,-8($16) + bne $28,.Loop0 + +.L0: srl $4,$19,$24 + beq $18,.Lend + # warm up phase 1 + ldq $1,8($17) + subq $18,4,$18 + ldq $2,16($17) + ldq $3,24($17) + ldq $4,32($17) + beq $18,.Lend1 + # warm up phase 2 + sll $1,$20,$7 + srl $1,$19,$21 + sll $2,$20,$8 + ldq $1,40($17) + srl $2,$19,$22 + ldq $2,48($17) + sll $3,$20,$5 + or $7,$24,$7 + srl $3,$19,$23 + or $8,$21,$8 + sll $4,$20,$6 + ldq $3,56($17) + srl $4,$19,$24 + ldq $4,64($17) + subq $18,4,$18 + beq $18,.Lend2 + .align 4 + # main loop +.Loop: stq $7,0($16) + or $5,$22,$5 + stq $8,8($16) + or $6,$23,$6 + + sll $1,$20,$7 + subq $18,4,$18 + srl $1,$19,$21 + unop # ldq $31,-96($17) + + sll $2,$20,$8 + ldq $1,72($17) + srl $2,$19,$22 + ldq $2,80($17) + + stq $5,16($16) + or $7,$24,$7 + stq $6,24($16) + or $8,$21,$8 + + sll $3,$20,$5 + unop # ldq $31,-96($17) + srl $3,$19,$23 + addq $16,32,$16 + + sll $4,$20,$6 + ldq $3,88($17) + srl $4,$19,$24 + ldq $4,96($17) + + addq $17,32,$17 + bne $18,.Loop + # cool down phase 2/1 +.Lend2: stq $7,0($16) + or $5,$22,$5 + stq $8,8($16) + or $6,$23,$6 + sll $1,$20,$7 + srl $1,$19,$21 + sll $2,$20,$8 + srl $2,$19,$22 + stq $5,16($16) + or $7,$24,$7 + stq $6,24($16) + or $8,$21,$8 + sll $3,$20,$5 + srl $3,$19,$23 + sll $4,$20,$6 + srl $4,$19,$24 + # cool down phase 2/2 + stq $7,32($16) + or $5,$22,$5 + stq $8,40($16) + or $6,$23,$6 + stq $5,48($16) + stq $6,56($16) + # cool down phase 2/3 + stq $24,64($16) + ret $31,($26),1 + + # cool down phase 1/1 +.Lend1: sll $1,$20,$7 + srl $1,$19,$21 + sll $2,$20,$8 + srl $2,$19,$22 + sll $3,$20,$5 + or $7,$24,$7 + srl $3,$19,$23 + or $8,$21,$8 + sll $4,$20,$6 + srl $4,$19,$24 + # cool down phase 1/2 + stq $7,0($16) + or $5,$22,$5 + stq $8,8($16) + or $6,$23,$6 + stq $5,16($16) + stq $6,24($16) + stq $24,32($16) + ret $31,($26),1 + +.Lend: stq $24,0($16) + ret $31,($26),1 + .end __mpn_rshift diff --git a/REORG.TODO/sysdeps/alpha/alphaev5/sub_n.S b/REORG.TODO/sysdeps/alpha/alphaev5/sub_n.S new file mode 100644 index 0000000000..032b0c616b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev5/sub_n.S @@ -0,0 +1,147 @@ + # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .frame $30,0,$26,0 + + or $31,$31,$25 # clear cy + subq $19,4,$19 # decr loop cnt + blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop + # Start software pipeline for 1st loop + ldq $0,0($18) + ldq $1,8($18) + ldq $4,0($17) + ldq $5,8($17) + addq $17,32,$17 # update s1_ptr + ldq $2,16($18) + subq $4,$0,$20 # 1st main sub + ldq $3,24($18) + subq $19,4,$19 # decr loop cnt + ldq $6,-16($17) + cmpult $4,$20,$25 # compute cy from last sub + ldq $7,-8($17) + addq $1,$25,$28 # cy add + addq $18,32,$18 # update s2_ptr + subq $5,$28,$21 # 2nd main sub + cmpult $28,$25,$8 # compute cy from last add + blt $19,.Lend1 # if less than 4 limbs remain, jump + # 1st loop handles groups of 4 limbs in a software pipeline + .align 4 +.Loop: cmpult $5,$21,$25 # compute cy from last add + ldq $0,0($18) + or $8,$25,$25 # combine cy from the two adds + ldq $1,8($18) + addq $2,$25,$28 # cy add + ldq $4,0($17) + subq $6,$28,$22 # 3rd main sub + ldq $5,8($17) + cmpult $28,$25,$8 # compute cy from last add + cmpult $6,$22,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + subq $7,$28,$23 # 4th main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $7,$23,$25 # compute cy from last add + addq $17,32,$17 # update s1_ptr + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + addq $0,$25,$28 # cy add + ldq $2,16($18) + subq $4,$28,$20 # 1st main sub + ldq $3,24($18) + cmpult $28,$25,$8 # compute cy from last add + ldq $6,-16($17) + cmpult $4,$20,$25 # compute cy from last add + ldq $7,-8($17) + or $8,$25,$25 # combine cy from the two adds + subq $19,4,$19 # decr loop cnt + stq $22,-16($16) + addq $1,$25,$28 # cy add + stq $23,-8($16) + subq $5,$28,$21 # 2nd main sub + addq $18,32,$18 # update s2_ptr + cmpult $28,$25,$8 # compute cy from last add + bge $19,.Loop + # Finish software pipeline for 1st loop +.Lend1: cmpult $5,$21,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $2,$25,$28 # cy add + subq $6,$28,$22 # 3rd main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $6,$22,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + stq $21,8($16) + addq $3,$25,$28 # cy add + subq $7,$28,$23 # 4th main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $7,$23,$25 # compute cy from last add + or $8,$25,$25 # combine cy from the two adds + addq $16,32,$16 # update res_ptr + stq $22,-16($16) + stq $23,-8($16) +.Lend2: addq $19,4,$19 # restore loop cnt + beq $19,.Lret + # Start software pipeline for 2nd loop + ldq $0,0($18) + ldq $4,0($17) + subq $19,1,$19 + beq $19,.Lend0 + # 2nd loop handles remaining 1-3 limbs + .align 4 +.Loop0: addq $0,$25,$28 # cy add + ldq $0,8($18) + subq $4,$28,$20 # main sub + ldq $1,8($17) + addq $18,8,$18 + cmpult $28,$25,$8 # compute cy from last add + addq $17,8,$17 + stq $20,0($16) + cmpult $4,$20,$25 # compute cy from last add + subq $19,1,$19 # decr loop cnt + or $8,$25,$25 # combine cy from the two adds + addq $16,8,$16 + or $1,$31,$4 + bne $19,.Loop0 +.Lend0: addq $0,$25,$28 # cy add + subq $4,$28,$20 # main sub + cmpult $28,$25,$8 # compute cy from last add + cmpult $4,$20,$25 # compute cy from last add + stq $20,0($16) + or $8,$25,$25 # combine cy from the two adds + +.Lret: or $25,$31,$0 # return cy + ret $31,($26),1 + .end __mpn_sub_n diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/Implies b/REORG.TODO/sysdeps/alpha/alphaev6/Implies new file mode 100644 index 0000000000..0e7fc170ba --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/Implies @@ -0,0 +1 @@ +alpha/alphaev5 diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/addmul_1.S b/REORG.TODO/sysdeps/alpha/alphaev6/addmul_1.S new file mode 100644 index 0000000000..1072ea763f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/addmul_1.S @@ -0,0 +1,477 @@ + # Alpha ev6 mpn_addmul_1 -- Multiply a limb vector with a limb and add + # the result to a second limb vector. + # + # Copyright (C) 2000-2017 Free Software Foundation, Inc. + # + # This file is part of the GNU MP Library. + # + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation; either version 2.1 of the License, or (at + # your option) any later version. + # + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # size $18 + # s2_limb $19 + # + # This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and + # exactly 3.625 cycles/limb on EV6... + # + # This code was written in close cooperation with ev6 pipeline expert + # Steve Root (root@toober.hlo.dec.com). Any errors are tege's fault, though. + # + # Register usages for unrolled loop: + # 0-3 mul's + # 4-7 acc's + # 8-15 mul results + # 20,21 carry's + # 22,23 save for stores + # + # Sustains 8 mul-adds in 29 cycles in the unrolled inner loop. + # + # The stores can issue a cycle late so we have paired no-op's to 'catch' + # them, so that further disturbance to the schedule is damped. + # + # We couldn't pair the loads, because the entangled schedule of the + # carry's has to happen on one side {0} of the machine. Note, the total + # use of U0, and the total use of L0 (after attending to the stores). + # which is part of the reason why.... + # + # This is a great schedule for the d_cache, a poor schedule for the + # b_cache. The lockup on U0 means that any stall can't be recovered + # from. Consider a ldq in L1. say that load gets stalled because it + # collides with a fill from the b_Cache. On the next cycle, this load + # gets priority. If first looks at L0, and goes there. The instruction + # we intended for L0 gets to look at L1, which is NOT where we want + # it. It either stalls 1, because it can't go in L0, or goes there, and + # causes a further instruction to stall. + # + # So for b_cache, we're likely going to want to put one or more cycles + # back into the code! And, of course, put in prefetches. For the + # accumulator, lds, intent to modify. For the multiplier, you might + # want ldq, evict next, if you're not wanting to use it again soon. Use + # 256 ahead of present pointer value. At a place where we have an mt + # followed by a bookkeeping, put the bookkeeping in upper, and the + # prefetch into lower. + # + # Note, the usage of physical registers per cycle is smoothed off, as + # much as possible. + # + # Note, the ldq's and stq's are at the end of the quadpacks. note, we'd + # like not to have a ldq or stq to preceded a conditional branch in a + # quadpack. The conditional branch moves the retire pointer one cycle + # later. + # + # Optimization notes: + # Callee-saves regs: $9 $10 $11 $12 $13 $14 $15 $26 ?$27? + # Reserved regs: $29 $30 $31 + # Free caller-saves regs in unrolled code: $24 $25 $28 + # We should swap some of the callee-saves regs for some of the free + # caller-saves regs, saving some overhead cycles. + # Most importantly, we should write fast code for the 0-7 case. + # The code we use there are for the 21164, and runs at 7 cycles/limb + # on the 21264. Should not be hard, if we write specialized code for + # 1-7 limbs (the one for 0 limbs should be straightforward). We then just + # need a jump table indexed by the low 3 bits of the count argument. + + .set noreorder + .set noat + .text + + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 +__mpn_addmul_1: + .frame $30,0,$26,0 + .prologue 0 + + cmpult $18, 8, $1 + beq $1, $Large + + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + subq $18, 1, $18 # size-- + mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + umulh $2, $19, $0 # $0 = prod_high + beq $18, $Lend0b # jump if size was == 1 + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + subq $18, 1, $18 # size-- + addq $5, $3, $3 + cmpult $3, $5, $4 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + beq $18, $Lend0a # jump if size was == 2 + + .align 3 +$Loop0: mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + addq $4, $0, $0 # cy_limb = cy_limb + 'cy' + subq $18, 1, $18 # size-- + umulh $2, $19, $4 # $4 = cy_limb + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + addq $3, $0, $3 # $3 = cy_limb + prod_low + cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low) + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + addq $5, $0, $0 # combine carries + bne $18, $Loop0 +$Lend0a: + mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + addq $4, $0, $0 # cy_limb = cy_limb + 'cy' + umulh $2, $19, $4 # $4 = cy_limb + addq $3, $0, $3 # $3 = cy_limb + prod_low + cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low) + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $5, $0, $0 # combine carries + addq $4, $0, $0 # cy_limb = prod_high + cy + ret $31, ($26), 1 +$Lend0b: + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $0, $5, $0 + ret $31, ($26), 1 + +$Large: + lda $30, -240($30) + stq $9, 8($30) + stq $10, 16($30) + stq $11, 24($30) + stq $12, 32($30) + stq $13, 40($30) + stq $14, 48($30) + stq $15, 56($30) + + and $18, 7, $20 # count for the first loop, 0-7 + srl $18, 3, $18 # count for unrolled loop + bis $31, $31, $0 + beq $20, $Lunroll + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + subq $20, 1, $20 # size-- + mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + umulh $2, $19, $0 # $0 = prod_high + beq $20, $Lend1b # jump if size was == 1 + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + subq $20, 1, $20 # size-- + addq $5, $3, $3 + cmpult $3, $5, $4 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + beq $20, $Lend1a # jump if size was == 2 + + .align 3 +$Loop1: mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + addq $4, $0, $0 # cy_limb = cy_limb + 'cy' + subq $20, 1, $20 # size-- + umulh $2, $19, $4 # $4 = cy_limb + ldq $2, 0($17) # $2 = s1_limb + addq $17, 8, $17 # s1_ptr++ + addq $3, $0, $3 # $3 = cy_limb + prod_low + cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low) + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + addq $5, $0, $0 # combine carries + bne $20, $Loop1 + +$Lend1a: + mulq $2, $19, $3 # $3 = prod_low + ldq $5, 0($16) # $5 = *res_ptr + addq $4, $0, $0 # cy_limb = cy_limb + 'cy' + umulh $2, $19, $4 # $4 = cy_limb + addq $3, $0, $3 # $3 = cy_limb + prod_low + cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low) + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + addq $5, $0, $0 # combine carries + addq $4, $0, $0 # cy_limb = prod_high + cy + br $31, $Lunroll +$Lend1b: + addq $5, $3, $3 + cmpult $3, $5, $5 + stq $3, 0($16) + addq $16, 8, $16 # res_ptr++ + addq $0, $5, $0 + +$Lunroll: + lda $17, -16($17) # L1 bookkeeping + lda $16, -16($16) # L1 bookkeeping + bis $0, $31, $12 + + # ____ UNROLLED LOOP SOFTWARE PIPELINE STARTUP ____ + + ldq $2, 16($17) # L1 + ldq $3, 24($17) # L1 + lda $18, -1($18) # L1 bookkeeping + ldq $6, 16($16) # L1 + ldq $7, 24($16) # L1 + ldq $0, 32($17) # L1 + mulq $19, $2, $13 # U1 + ldq $1, 40($17) # L1 + umulh $19, $2, $14 # U1 + mulq $19, $3, $15 # U1 + lda $17, 64($17) # L1 bookkeeping + ldq $4, 32($16) # L1 + ldq $5, 40($16) # L1 + umulh $19, $3, $8 # U1 + ldq $2, -16($17) # L1 + mulq $19, $0, $9 # U1 + ldq $3, -8($17) # L1 + umulh $19, $0, $10 # U1 + addq $6, $13, $6 # L0 lo + acc + mulq $19, $1, $11 # U1 + cmpult $6, $13, $20 # L0 lo add => carry + lda $16, 64($16) # L1 bookkeeping + addq $6, $12, $22 # U0 hi add => answer + cmpult $22, $12, $21 # L0 hi add => carry + addq $14, $20, $14 # U0 hi mul + carry + ldq $6, -16($16) # L1 + addq $7, $15, $23 # L0 lo + acc + addq $14, $21, $14 # U0 hi mul + carry + ldq $7, -8($16) # L1 + umulh $19, $1, $12 # U1 + cmpult $23, $15, $20 # L0 lo add => carry + addq $23, $14, $23 # U0 hi add => answer + ldq $0, 0($17) # L1 + mulq $19, $2, $13 # U1 + cmpult $23, $14, $21 # L0 hi add => carry + addq $8, $20, $8 # U0 hi mul + carry + ldq $1, 8($17) # L1 + umulh $19, $2, $14 # U1 + addq $4, $9, $4 # L0 lo + acc + stq $22, -48($16) # L0 + stq $23, -40($16) # L1 + mulq $19, $3, $15 # U1 + addq $8, $21, $8 # U0 hi mul + carry + cmpult $4, $9, $20 # L0 lo add => carry + addq $4, $8, $22 # U0 hi add => answer + ble $18, $Lend # U1 bookkeeping + + # ____ MAIN UNROLLED LOOP ____ + .align 4 +$Loop: + bis $31, $31, $31 # U1 mt + cmpult $22, $8, $21 # L0 hi add => carry + addq $10, $20, $10 # U0 hi mul + carry + ldq $4, 0($16) # L1 + + bis $31, $31, $31 # U1 mt + addq $5, $11, $23 # L0 lo + acc + addq $10, $21, $10 # L0 hi mul + carry + ldq $5, 8($16) # L1 + + umulh $19, $3, $8 # U1 + cmpult $23, $11, $20 # L0 lo add => carry + addq $23, $10, $23 # U0 hi add => answer + ldq $2, 16($17) # L1 + + mulq $19, $0, $9 # U1 + cmpult $23, $10, $21 # L0 hi add => carry + addq $12, $20, $12 # U0 hi mul + carry + ldq $3, 24($17) # L1 + + umulh $19, $0, $10 # U1 + addq $6, $13, $6 # L0 lo + acc + stq $22, -32($16) # L0 + stq $23, -24($16) # L1 + + bis $31, $31, $31 # L0 st slosh + mulq $19, $1, $11 # U1 + bis $31, $31, $31 # L1 st slosh + addq $12, $21, $12 # U0 hi mul + carry + + cmpult $6, $13, $20 # L0 lo add => carry + bis $31, $31, $31 # U1 mt + lda $18, -1($18) # L1 bookkeeping + addq $6, $12, $22 # U0 hi add => answer + + bis $31, $31, $31 # U1 mt + cmpult $22, $12, $21 # L0 hi add => carry + addq $14, $20, $14 # U0 hi mul + carry + ldq $6, 16($16) # L1 + + bis $31, $31, $31 # U1 mt + addq $7, $15, $23 # L0 lo + acc + addq $14, $21, $14 # U0 hi mul + carry + ldq $7, 24($16) # L1 + + umulh $19, $1, $12 # U1 + cmpult $23, $15, $20 # L0 lo add => carry + addq $23, $14, $23 # U0 hi add => answer + ldq $0, 32($17) # L1 + + mulq $19, $2, $13 # U1 + cmpult $23, $14, $21 # L0 hi add => carry + addq $8, $20, $8 # U0 hi mul + carry + ldq $1, 40($17) # L1 + + umulh $19, $2, $14 # U1 + addq $4, $9, $4 # U0 lo + acc + stq $22, -16($16) # L0 + stq $23, -8($16) # L1 + + bis $31, $31, $31 # L0 st slosh + mulq $19, $3, $15 # U1 + bis $31, $31, $31 # L1 st slosh + addq $8, $21, $8 # L0 hi mul + carry + + cmpult $4, $9, $20 # L0 lo add => carry + bis $31, $31, $31 # U1 mt + lda $17, 64($17) # L1 bookkeeping + addq $4, $8, $22 # U0 hi add => answer + + bis $31, $31, $31 # U1 mt + cmpult $22, $8, $21 # L0 hi add => carry + addq $10, $20, $10 # U0 hi mul + carry + ldq $4, 32($16) # L1 + + bis $31, $31, $31 # U1 mt + addq $5, $11, $23 # L0 lo + acc + addq $10, $21, $10 # L0 hi mul + carry + ldq $5, 40($16) # L1 + + umulh $19, $3, $8 # U1 + cmpult $23, $11, $20 # L0 lo add => carry + addq $23, $10, $23 # U0 hi add => answer + ldq $2, -16($17) # L1 + + mulq $19, $0, $9 # U1 + cmpult $23, $10, $21 # L0 hi add => carry + addq $12, $20, $12 # U0 hi mul + carry + ldq $3, -8($17) # L1 + + umulh $19, $0, $10 # U1 + addq $6, $13, $6 # L0 lo + acc + stq $22, 0($16) # L0 + stq $23, 8($16) # L1 + + bis $31, $31, $31 # L0 st slosh + mulq $19, $1, $11 # U1 + bis $31, $31, $31 # L1 st slosh + addq $12, $21, $12 # U0 hi mul + carry + + cmpult $6, $13, $20 # L0 lo add => carry + bis $31, $31, $31 # U1 mt + lda $16, 64($16) # L1 bookkeeping + addq $6, $12, $22 # U0 hi add => answer + + bis $31, $31, $31 # U1 mt + cmpult $22, $12, $21 # L0 hi add => carry + addq $14, $20, $14 # U0 hi mul + carry + ldq $6, -16($16) # L1 + + bis $31, $31, $31 # U1 mt + addq $7, $15, $23 # L0 lo + acc + addq $14, $21, $14 # U0 hi mul + carry + ldq $7, -8($16) # L1 + + umulh $19, $1, $12 # U1 + cmpult $23, $15, $20 # L0 lo add => carry + addq $23, $14, $23 # U0 hi add => answer + ldq $0, 0($17) # L1 + + mulq $19, $2, $13 # U1 + cmpult $23, $14, $21 # L0 hi add => carry + addq $8, $20, $8 # U0 hi mul + carry + ldq $1, 8($17) # L1 + + umulh $19, $2, $14 # U1 + addq $4, $9, $4 # L0 lo + acc + stq $22, -48($16) # L0 + stq $23, -40($16) # L1 + + bis $31, $31, $31 # L0 st slosh + mulq $19, $3, $15 # U1 + bis $31, $31, $31 # L1 st slosh + addq $8, $21, $8 # U0 hi mul + carry + + cmpult $4, $9, $20 # L0 lo add => carry + addq $4, $8, $22 # U0 hi add => answer + bis $31, $31, $31 # L1 mt + bgt $18, $Loop # U1 bookkeeping + +# ____ UNROLLED LOOP SOFTWARE PIPELINE FINISH ____ +$Lend: + cmpult $22, $8, $21 # L0 hi add => carry + addq $10, $20, $10 # U0 hi mul + carry + ldq $4, 0($16) # L1 + addq $5, $11, $23 # L0 lo + acc + addq $10, $21, $10 # L0 hi mul + carry + ldq $5, 8($16) # L1 + umulh $19, $3, $8 # U1 + cmpult $23, $11, $20 # L0 lo add => carry + addq $23, $10, $23 # U0 hi add => answer + mulq $19, $0, $9 # U1 + cmpult $23, $10, $21 # L0 hi add => carry + addq $12, $20, $12 # U0 hi mul + carry + umulh $19, $0, $10 # U1 + addq $6, $13, $6 # L0 lo + acc + stq $22, -32($16) # L0 + stq $23, -24($16) # L1 + mulq $19, $1, $11 # U1 + addq $12, $21, $12 # U0 hi mul + carry + cmpult $6, $13, $20 # L0 lo add => carry + addq $6, $12, $22 # U0 hi add => answer + cmpult $22, $12, $21 # L0 hi add => carry + addq $14, $20, $14 # U0 hi mul + carry + addq $7, $15, $23 # L0 lo + acc + addq $14, $21, $14 # U0 hi mul + carry + umulh $19, $1, $12 # U1 + cmpult $23, $15, $20 # L0 lo add => carry + addq $23, $14, $23 # U0 hi add => answer + cmpult $23, $14, $21 # L0 hi add => carry + addq $8, $20, $8 # U0 hi mul + carry + addq $4, $9, $4 # U0 lo + acc + stq $22, -16($16) # L0 + stq $23, -8($16) # L1 + bis $31, $31, $31 # L0 st slosh + addq $8, $21, $8 # L0 hi mul + carry + cmpult $4, $9, $20 # L0 lo add => carry + addq $4, $8, $22 # U0 hi add => answer + cmpult $22, $8, $21 # L0 hi add => carry + addq $10, $20, $10 # U0 hi mul + carry + addq $5, $11, $23 # L0 lo + acc + addq $10, $21, $10 # L0 hi mul + carry + cmpult $23, $11, $20 # L0 lo add => carry + addq $23, $10, $23 # U0 hi add => answer + cmpult $23, $10, $21 # L0 hi add => carry + addq $12, $20, $12 # U0 hi mul + carry + stq $22, 0($16) # L0 + stq $23, 8($16) # L1 + addq $12, $21, $0 # U0 hi mul + carry + + ldq $9, 8($30) + ldq $10, 16($30) + ldq $11, 24($30) + ldq $12, 32($30) + ldq $13, 40($30) + ldq $14, 48($30) + ldq $15, 56($30) + lda $30, 240($30) + ret $31, ($26), 1 + + .end __mpn_addmul_1 diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrt.S b/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrt.S new file mode 100644 index 0000000000..18d03ee9c9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrt.S @@ -0,0 +1,53 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <shlib-compat.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(__ieee754_sqrt) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + .align 4 +#ifdef _IEEE_FP_INEXACT + sqrtt/suid $f16, $f0 +#else + sqrtt/sud $f16, $f0 +#endif + ret + nop + nop + +END(__ieee754_sqrt) + +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +strong_alias(__ieee754_sqrt, __sqrt_finite1) +compat_symbol(libm, __sqrt_finite1, __sqrt_finite, GLIBC_2_15) +versioned_symbol(libm, __ieee754_sqrt, __sqrt_finite, GLIBC_2_18) +#else +strong_alias(__ieee754_sqrt, __sqrt_finite) +#endif diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S b/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S new file mode 100644 index 0000000000..c4ef9c32c6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S @@ -0,0 +1,53 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <shlib-compat.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(__ieee754_sqrtf) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + .align 4 +#ifdef _IEEE_FP_INEXACT + sqrts/suid $f16, $f0 +#else + sqrts/sud $f16, $f0 +#endif + ret + nop + nop + +END(__ieee754_sqrtf) + +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +strong_alias(__ieee754_sqrtf, __sqrtf_finite1) +compat_symbol(libm, __sqrtf_finite1, __sqrtf_finite, GLIBC_2_15) +versioned_symbol(libm, __ieee754_sqrtf, __sqrtf_finite, GLIBC_2_18) +#else +strong_alias(__ieee754_sqrtf, __sqrtf_finite) +#endif diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/memcpy.S b/REORG.TODO/sysdeps/alpha/alphaev6/memcpy.S new file mode 100644 index 0000000000..170a23b5da --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/memcpy.S @@ -0,0 +1,255 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + * + * Temp usage notes: + * $0 - destination address + * $1,$2, - scratch + */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(memcpy) + .prologue 0 + + mov $16, $0 # E : copy dest to return + ble $18, $nomoredata # U : done with the copy? + xor $16, $17, $1 # E : are source and dest alignments the same? + and $1, 7, $1 # E : are they the same mod 8? + + bne $1, $misaligned # U : Nope - gotta do this the slow way + /* source and dest are same mod 8 address */ + and $16, 7, $1 # E : Are both 0mod8? + beq $1, $both_0mod8 # U : Yes + nop # E : + + /* + * source and dest are same misalignment. move a byte at a time + * until a 0mod8 alignment for both is reached. + * At least one byte more to move + */ + +$head_align: + ldbu $1, 0($17) # L : grab a byte + subq $18, 1, $18 # E : count-- + addq $17, 1, $17 # E : src++ + stb $1, 0($16) # L : + addq $16, 1, $16 # E : dest++ + and $16, 7, $1 # E : Are we at 0mod8 yet? + ble $18, $nomoredata # U : done with the copy? + bne $1, $head_align # U : + +$both_0mod8: + cmple $18, 127, $1 # E : Can we unroll the loop? + bne $1, $no_unroll # U : + and $16, 63, $1 # E : get mod64 alignment + beq $1, $do_unroll # U : no single quads to fiddle + +$single_head_quad: + ldq $1, 0($17) # L : get 8 bytes + subq $18, 8, $18 # E : count -= 8 + addq $17, 8, $17 # E : src += 8 + nop # E : + + stq $1, 0($16) # L : store + addq $16, 8, $16 # E : dest += 8 + and $16, 63, $1 # E : get mod64 alignment + bne $1, $single_head_quad # U : still not fully aligned + +$do_unroll: + addq $16, 64, $7 # E : Initial (+1 trip) wh64 address + cmple $18, 127, $1 # E : Can we go through the unrolled loop? + bne $1, $tail_quads # U : Nope + nop # E : + +$unroll_body: + wh64 ($7) # L1 : memory subsystem hint: 64 bytes at + # ($7) are about to be over-written + ldq $6, 0($17) # L0 : bytes 0..7 + nop # E : + nop # E : + + ldq $4, 8($17) # L : bytes 8..15 + ldq $5, 16($17) # L : bytes 16..23 + addq $7, 64, $7 # E : Update next wh64 address + nop # E : + + ldq $3, 24($17) # L : bytes 24..31 + addq $16, 64, $1 # E : fallback value for wh64 + nop # E : + nop # E : + + addq $17, 32, $17 # E : src += 32 bytes + stq $6, 0($16) # L : bytes 0..7 + nop # E : + nop # E : + + stq $4, 8($16) # L : bytes 8..15 + stq $5, 16($16) # L : bytes 16..23 + subq $18, 192, $2 # E : At least two more trips to go? + nop # E : + + stq $3, 24($16) # L : bytes 24..31 + addq $16, 32, $16 # E : dest += 32 bytes + nop # E : + nop # E : + + ldq $6, 0($17) # L : bytes 0..7 + ldq $4, 8($17) # L : bytes 8..15 + cmovlt $2, $1, $7 # E : Latency 2, extra map slot - Use + # fallback wh64 address if < 2 more trips + nop # E : + + ldq $5, 16($17) # L : bytes 16..23 + ldq $3, 24($17) # L : bytes 24..31 + addq $16, 32, $16 # E : dest += 32 + subq $18, 64, $18 # E : count -= 64 + + addq $17, 32, $17 # E : src += 32 + stq $6, -32($16) # L : bytes 0..7 + stq $4, -24($16) # L : bytes 8..15 + cmple $18, 63, $1 # E : At least one more trip? + + stq $5, -16($16) # L : bytes 16..23 + stq $3, -8($16) # L : bytes 24..31 + nop # E : + beq $1, $unroll_body + +$tail_quads: +$no_unroll: + .align 4 + subq $18, 8, $18 # E : At least a quad left? + blt $18, $less_than_8 # U : Nope + nop # E : + nop # E : + +$move_a_quad: + ldq $1, 0($17) # L : fetch 8 + subq $18, 8, $18 # E : count -= 8 + addq $17, 8, $17 # E : src += 8 + nop # E : + + stq $1, 0($16) # L : store 8 + addq $16, 8, $16 # E : dest += 8 + bge $18, $move_a_quad # U : + nop # E : + +$less_than_8: + .align 4 + addq $18, 8, $18 # E : add back for trailing bytes + ble $18, $nomoredata # U : All-done + nop # E : + nop # E : + + /* Trailing bytes */ +$tail_bytes: + subq $18, 1, $18 # E : count-- + ldbu $1, 0($17) # L : fetch a byte + addq $17, 1, $17 # E : src++ + nop # E : + + stb $1, 0($16) # L : store a byte + addq $16, 1, $16 # E : dest++ + bgt $18, $tail_bytes # U : more to be done? + nop # E : + + /* branching to exit takes 3 extra cycles, so replicate exit here */ + ret $31, ($26), 1 # L0 : + nop # E : + nop # E : + nop # E : + +$misaligned: + mov $0, $4 # E : dest temp + and $0, 7, $1 # E : dest alignment mod8 + beq $1, $dest_0mod8 # U : life doesnt totally suck + nop + +$aligndest: + ble $18, $nomoredata # U : + ldbu $1, 0($17) # L : fetch a byte + subq $18, 1, $18 # E : count-- + addq $17, 1, $17 # E : src++ + + stb $1, 0($4) # L : store it + addq $4, 1, $4 # E : dest++ + and $4, 7, $1 # E : dest 0mod8 yet? + bne $1, $aligndest # U : go until we are aligned. + + /* Source has unknown alignment, but dest is known to be 0mod8 */ +$dest_0mod8: + subq $18, 8, $18 # E : At least a quad left? + blt $18, $misalign_tail # U : Nope + ldq_u $3, 0($17) # L : seed (rotating load) of 8 bytes + nop # E : + +$mis_quad: + ldq_u $16, 8($17) # L : Fetch next 8 + extql $3, $17, $3 # U : masking + extqh $16, $17, $1 # U : masking + bis $3, $1, $1 # E : merged bytes to store + + subq $18, 8, $18 # E : count -= 8 + addq $17, 8, $17 # E : src += 8 + stq $1, 0($4) # L : store 8 (aligned) + mov $16, $3 # E : "rotate" source data + + addq $4, 8, $4 # E : dest += 8 + bge $18, $mis_quad # U : More quads to move + nop + nop + +$misalign_tail: + addq $18, 8, $18 # E : account for tail stuff + ble $18, $nomoredata # U : + nop + nop + +$misalign_byte: + ldbu $1, 0($17) # L : fetch 1 + subq $18, 1, $18 # E : count-- + addq $17, 1, $17 # E : src++ + nop # E : + + stb $1, 0($4) # L : store + addq $4, 1, $4 # E : dest++ + bgt $18, $misalign_byte # U : more to go? + nop + + +$nomoredata: + ret $31, ($26), 1 # L0 : + nop # E : + nop # E : + nop # E : + +END(memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/memset.S b/REORG.TODO/sysdeps/alpha/alphaev6/memset.S new file mode 100644 index 0000000000..185821c7eb --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/memset.S @@ -0,0 +1,223 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .arch ev6 + .set noat + .set noreorder + +ENTRY(memset) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + /* + * Serious stalling happens. The only way to mitigate this is to + * undertake a major re-write to interleave the constant materialization + * with other parts of the fall-through code. This is important, even + * though it makes maintenance tougher. + * Do this later. + */ + and $17, 255, $1 # E : 00000000000000ch + insbl $17, 1, $2 # U : 000000000000ch00 + mov $16, $0 # E : return value + ble $18, $end # U : zero length requested? + + addq $18, $16, $6 # E : max address to write to + or $1, $2, $17 # E : 000000000000chch + insbl $1, 2, $3 # U : 0000000000ch0000 + insbl $1, 3, $4 # U : 00000000ch000000 + + or $3, $4, $3 # E : 00000000chch0000 + inswl $17, 4, $5 # U : 0000chch00000000 + xor $16, $6, $1 # E : will complete write be within one quadword? + inswl $17, 6, $2 # U : chch000000000000 + + or $17, $3, $17 # E : 00000000chchchch + or $2, $5, $2 # E : chchchch00000000 + bic $1, 7, $1 # E : fit within a single quadword? + and $16, 7, $3 # E : Target addr misalignment + + or $17, $2, $17 # E : chchchchchchchch + beq $1, $within_quad # U : + nop # E : + beq $3, $aligned # U : target is 0mod8 + + /* + * Target address is misaligned, and won't fit within a quadword. + */ + ldq_u $4, 0($16) # L : Fetch first partial + mov $16, $5 # E : Save the address + insql $17, $16, $2 # U : Insert new bytes + subq $3, 8, $3 # E : Invert (for addressing uses) + + addq $18, $3, $18 # E : $18 is new count ($3 is negative) + mskql $4, $16, $4 # U : clear relevant parts of the quad + subq $16, $3, $16 # E : $16 is new aligned destination + or $2, $4, $1 # E : Final bytes + + nop + stq_u $1,0($5) # L : Store result + nop + nop + + .align 4 +$aligned: + /* + * We are now guaranteed to be quad aligned, with at least + * one partial quad to write. + */ + + sra $18, 3, $3 # U : Number of remaining quads to write + and $18, 7, $18 # E : Number of trailing bytes to write + mov $16, $5 # E : Save dest address + beq $3, $no_quad # U : tail stuff only + + /* + * It's worth the effort to unroll this and use wh64 if possible. + * At this point, entry values are: + * $16 Current destination address + * $5 A copy of $16 + * $6 The max quadword address to write to + * $18 Number trailer bytes + * $3 Number quads to write + */ + + and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop) + subq $3, 16, $4 # E : Only try to unroll if > 128 bytes + subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64) + blt $4, $loop # U : + + /* + * We know we've got at least 16 quads, minimum of one trip + * through unrolled loop. Do a quad at a time to get us 0mod64 + * aligned. + */ + + nop # E : + nop # E : + nop # E : + beq $1, $bigalign # U : + +$alignmod64: + stq $17, 0($5) # L : + subq $3, 1, $3 # E : For consistency later + addq $1, 8, $1 # E : Increment towards zero for alignment + addq $5, 8, $4 # E : Initial wh64 address (filler instruction) + + nop + nop + addq $5, 8, $5 # E : Inc address + blt $1, $alignmod64 # U : + +$bigalign: + /* + * $3 - number quads left to go + * $5 - target address (aligned 0mod64) + * $17 - mask of stuff to store + * Scratch registers available: $7, $2, $4, $1 + * We know that we'll be taking a minimum of one trip through. + * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle + * Assumes the wh64 needs to be for 2 trips through the loop in the future. + * The wh64 is issued on for the starting destination address for trip +2 + * through the loop, and if there are less than two trips left, the target + * address will be for the current trip. + */ + +$do_wh64: + wh64 ($4) # L1 : memory subsystem write hint + subq $3, 24, $2 # E : For determining future wh64 addresses + stq $17, 0($5) # L : + nop # E : + + addq $5, 128, $4 # E : speculative target of next wh64 + stq $17, 8($5) # L : + stq $17, 16($5) # L : + addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr) + + stq $17, 24($5) # L : + stq $17, 32($5) # L : + cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle + nop + + stq $17, 40($5) # L : + stq $17, 48($5) # L : + subq $3, 16, $2 # E : Repeat the loop at least once more? + nop + + stq $17, 56($5) # L : + addq $5, 64, $5 # E : + subq $3, 8, $3 # E : + bge $2, $do_wh64 # U : + + nop + nop + nop + beq $3, $no_quad # U : Might have finished already + + .align 4 + /* + * Simple loop for trailing quadwords, or for small amounts + * of data (where we can't use an unrolled loop and wh64) + */ +$loop: + stq $17, 0($5) # L : + subq $3, 1, $3 # E : Decrement number quads left + addq $5, 8, $5 # E : Inc address + bne $3, $loop # U : more? + +$no_quad: + /* + * Write 0..7 trailing bytes. + */ + nop # E : + beq $18, $end # U : All done? + ldq $7, 0($5) # L : + mskqh $7, $6, $2 # U : Mask final quad + + insqh $17, $6, $4 # U : New bits + or $2, $4, $1 # E : Put it all together + stq $1, 0($5) # L : And back to memory + ret $31,($26),1 # L0 : + +$within_quad: + ldq_u $1, 0($16) # L : + insql $17, $16, $2 # U : New bits + mskql $1, $16, $4 # U : Clear old + or $2, $4, $2 # E : New result + + mskql $2, $6, $4 # U : + mskqh $1, $6, $2 # U : + or $2, $4, $1 # E : + stq_u $1, 0($16) # L : + +$end: + nop + nop + nop + ret $31,($26),1 # L0 : + + END(memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/stxcpy.S b/REORG.TODO/sysdeps/alpha/alphaev6/stxcpy.S new file mode 100644 index 0000000000..84f19581d1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/stxcpy.S @@ -0,0 +1,314 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy a null-terminated string from SRC to DST. + + This is an internal routine used by strcpy, stpcpy, and strcat. + As such, it uses special linkage conventions to make implementation + of these public functions more efficient. + + On input: + t9 = return address + a0 = DST + a1 = SRC + + On output: + t8 = bitmask (with one bit set) indicating the last byte written + a0 = unaligned address of the last *word* written + + Furthermore, v0, a3-a5, t11, and t12 are untouched. +*/ + + +#include <sysdep.h> + + .arch ev6 + .set noat + .set noreorder + + .text + .type __stxcpy, @function + .globl __stxcpy + .usepv __stxcpy, no + + cfi_startproc + cfi_return_column (t9) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + .align 4 +stxcpy_aligned: + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # E : build a mask against false zero + mskqh t2, a1, t2 # U : detection in the src word (stall) + mskqh t1, a1, t3 # U : + ornot t1, t2, t2 # E : (stall) + + mskql t0, a1, t0 # U : assemble the first output word + cmpbge zero, t2, t10 # E : bits set iff null found + or t0, t3, t1 # E : (stall) + bne t10, $a_eos # U : (stall) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == a source word not containing a null. */ + /* Nops here to separate store quads from load quads */ + +$a_loop: + stq_u t1, 0(a0) # L : + addq a0, 8, a0 # E : + nop + nop + + ldq_u t1, 0(a1) # L : Latency=3 + addq a1, 8, a1 # E : + cmpbge zero, t1, t10 # E : (3 cycle stall) + beq t10, $a_loop # U : (stall for t10) + + /* Take care of the final (partial) word store. + On entry to this basic block we have: + t1 == the source word containing the null + t10 == the cmpbge mask that found it. */ +$a_eos: + negq t10, t6 # E : find low bit set + and t10, t6, t8 # E : (stall) + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t8, 0x80, t6 # E : (stall) + bne t6, 1f # U : (stall) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t0, 0(a0) # L : Latency=3 + subq t8, 1, t6 # E : + zapnot t1, t6, t1 # U : clear src bytes >= null (stall) + or t8, t6, t10 # E : (stall) + + zap t0, t10, t0 # E : clear dst bytes <= null + or t0, t1, t1 # E : (stall) + nop + nop + +1: stq_u t1, 0(a0) # L : + ret (t9) # L0 : Latency=3 + nop + nop + + .align 4 +__stxcpy: + /* Are source and destination co-aligned? */ + xor a0, a1, t0 # E : + unop # E : + and t0, 7, t0 # E : (stall) + bne t0, $unaligned # U : (stall) + + /* We are co-aligned; take care of a partial first word. */ + ldq_u t1, 0(a1) # L : load first src word + and a0, 7, t0 # E : take care not to load a word ... + addq a1, 8, a1 # E : + beq t0, stxcpy_aligned # U : ... if we wont need it (stall) + + ldq_u t0, 0(a0) # L : + br stxcpy_aligned # L0 : Latency=3 + nop + nop + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 4 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, for masking back in, if needed else 0 + t1 == the low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # L : + addq a1, 8, a1 # E : + extql t1, a1, t1 # U : (stall on a1) + extqh t2, a1, t4 # U : (stall on a1) + + mskql t0, a0, t0 # U : + or t1, t4, t1 # E : + mskqh t1, a0, t1 # U : (stall on t1) + or t0, t1, t1 # E : (stall on t1) + + or t1, t6, t6 # E : + cmpbge zero, t6, t10 # E : (stall) + lda t6, -1 # E : for masking just below + bne t10, $u_final # U : (stall) + + mskql t6, a1, t6 # U : mask out the bits we have + or t6, t2, t2 # E : already extracted before (stall) + cmpbge zero, t2, t10 # E : testing eos (stall) + bne t10, $u_late_head_exit # U : (stall) + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + stq_u t1, 0(a0) # L : store first output word + addq a0, 8, a0 # E : + extql t2, a1, t0 # U : position ho-bits of lo word + ldq_u t2, 8(a1) # U : read next high-order source word + + addq a1, 8, a1 # E : + cmpbge zero, t2, t10 # E : (stall for t2) + nop # E : + bne t10, $u_eos # U : (stall) + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + extqh t2, a1, t1 # U : extract high bits for current word + addq a1, 8, a1 # E : (stall) + extql t2, a1, t3 # U : extract low bits for next time (stall) + addq a0, 8, a0 # E : + + or t0, t1, t1 # E : current dst word now complete + ldq_u t2, 0(a1) # L : Latency=3 load high word for next time + stq_u t1, -8(a0) # L : save the current word (stall) + mov t3, t0 # E : + + cmpbge zero, t2, t10 # E : test new word for eos + beq t10, $u_loop # U : (stall) + nop + nop + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + extqh t2, a1, t1 # U : + or t0, t1, t1 # E : first (partial) source word complete (stall) + cmpbge zero, t1, t10 # E : is the null in this first bit? (stall) + bne t10, $u_final # U : (stall) + +$u_late_head_exit: + stq_u t1, 0(a0) # L : the null was in the high-order bits + addq a0, 8, a0 # E : + extql t2, a1, t1 # U : + cmpbge zero, t1, t10 # E : (stall) + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t1 == assembled source word + t10 == cmpbge mask that found the null. */ +$u_final: + negq t10, t6 # E : isolate low bit set + and t6, t10, t8 # E : (stall) + and t8, 0x80, t6 # E : avoid dest word load if we can (stall) + bne t6, 1f # U : (stall) + + ldq_u t0, 0(a0) # E : + subq t8, 1, t6 # E : + or t6, t8, t10 # E : (stall) + zapnot t1, t6, t1 # U : kill source bytes >= null (stall) + + zap t0, t10, t0 # U : kill dest bytes <= null (2 cycle data stall) + or t0, t1, t1 # E : (stall) + nop + nop + +1: stq_u t1, 0(a0) # L : + ret (t9) # L0 : Latency=3 + nop + nop + + /* Unaligned copy entry point. */ + .align 4 +$unaligned: + + ldq_u t1, 0(a1) # L : load first source word + and a0, 7, t4 # E : find dest misalignment + and a1, 7, t5 # E : find src misalignment + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + mov zero, t0 # E : + + mov zero, t6 # E : + beq t4, 1f # U : + ldq_u t0, 0(a0) # L : + lda t6, -1 # E : + + mskql t6, a0, t6 # U : + nop + nop + nop +1: + subq a1, t4, a1 # E : sub dest misalignment from src addr + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + cmplt t4, t5, t8 # E : + beq t8, $u_head # U : + lda t2, -1 # E : mask out leading garbage in source + + mskqh t2, t5, t2 # U : + ornot t1, t2, t3 # E : (stall) + cmpbge zero, t3, t10 # E : is there a zero? (stall) + beq t10, $u_head # U : (stall) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # L : + negq t10, t6 # E : build bitmask of bytes <= zero + and t6, t10, t8 # E : (stall) + and a1, 7, t5 # E : + + subq t8, 1, t6 # E : + or t6, t8, t10 # E : (stall) + srl t8, t5, t8 # U : adjust final null return value + zapnot t2, t10, t2 # U : prepare source word; mirror changes (stall) + + and t1, t2, t1 # E : to source validity mask + extql t2, a1, t2 # U : + extql t1, a1, t1 # U : (stall) + andnot t0, t2, t0 # .. e1 : zero place for source to reside (stall) + + or t0, t1, t1 # e1 : and put it there + stq_u t1, 0(a0) # .. e0 : (stall) + ret (t9) # e1 : + + cfi_endproc diff --git a/REORG.TODO/sysdeps/alpha/alphaev6/stxncpy.S b/REORG.TODO/sysdeps/alpha/alphaev6/stxncpy.S new file mode 100644 index 0000000000..ad094cc1df --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev6/stxncpy.S @@ -0,0 +1,392 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than COUNT bytes of the null-terminated string from + SRC to DST. + + This is an internal routine used by strncpy, stpncpy, and strncat. + As such, it uses special linkage conventions to make implementation + of these public functions more efficient. + + On input: + t9 = return address + a0 = DST + a1 = SRC + a2 = COUNT + + Furthermore, COUNT may not be zero. + + On output: + t0 = last word written + t8 = bitmask (with one bit set) indicating the last byte written + t10 = bitmask (with one bit set) indicating the byte position of + the end of the range specified by COUNT + a0 = unaligned address of the last *word* written + a2 = the number of full words left in COUNT + + Furthermore, v0, a3-a5, t11, and t12 are untouched. +*/ + +#include <sysdep.h> + + .arch ev6 + .set noat + .set noreorder + + .text + .type __stxncpy, @function + .globl __stxncpy + .usepv __stxncpy, no + + cfi_startproc + cfi_return_column (t9) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + .align 4 +stxncpy_aligned: + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # E : build a mask against false zero + mskqh t2, a1, t2 # U : detection in the src word (stall) + mskqh t1, a1, t3 # U : + ornot t1, t2, t2 # E : (stall) + + mskql t0, a1, t0 # U : assemble the first output word + cmpbge zero, t2, t7 # E : bits set iff null found + or t0, t3, t0 # E : (stall) + beq a2, $a_eoc # U : + + bne t7, $a_eos # U : + nop + nop + nop + + /* On entry to this basic block: + t0 == a source word not containing a null. */ + + /* + * nops here to: + * separate store quads from load quads + * limit of 1 bcond/quad to permit training + */ +$a_loop: + stq_u t0, 0(a0) # L : + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + nop + + ldq_u t0, 0(a1) # L : + addq a1, 8, a1 # E : + cmpbge zero, t0, t7 # E : + beq a2, $a_eoc # U : + + beq t7, $a_loop # U : + nop + nop + nop + + /* Take care of the final (partial) word store. At this point + the end-of-count bit is set in t7 iff it applies. + + On entry to this basic block we have: + t0 == the source word containing the null + t7 == the cmpbge mask that found it. */ +$a_eos: + negq t7, t8 # E : find low bit set + and t7, t8, t8 # E : (stall) + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t8, 0x80, t6 # E : (stall) + bne t6, 1f # U : (stall) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t1, 0(a0) # L : + subq t8, 1, t6 # E : + or t8, t6, t7 # E : (stall) + zapnot t0, t7, t0 # U : clear src bytes > null (stall) + + zap t1, t7, t1 # .. e1 : clear dst bytes <= null + or t0, t1, t0 # e1 : (stall) + nop + nop + +1: stq_u t0, 0(a0) # L : + ret (t9) # L0 : Latency=3 + nop + nop + + /* Add the end-of-count bit to the eos detection bitmask. */ +$a_eoc: + or t10, t7, t7 # E : + br $a_eos # L0 : Latency=3 + nop + nop + + .align 4 +__stxncpy: + /* Are source and destination co-aligned? */ + lda t2, -1 # E : + xor a0, a1, t1 # E : + and a0, 7, t0 # E : find dest misalignment + nop # E : + + srl t2, 1, t2 # U : + and t1, 7, t1 # E : + cmovlt a2, t2, a2 # E : bound count to LONG_MAX (stall) + nop # E : + + addq a2, t0, a2 # E : bias count by dest misalignment + subq a2, 1, a2 # E : (stall) + and a2, 7, t2 # E : (stall) + lda t10, 1 # E : + + srl a2, 3, a2 # U : a2 = loop counter = (count - 1)/8 + sll t10, t2, t10 # U : t10 = bitmask of last count byte + nop # E : + bne t1, $unaligned # U : (stall) + + /* We are co-aligned; take care of a partial first word. */ + ldq_u t1, 0(a1) # L : load first src word + addq a1, 8, a1 # E : + beq t0, stxncpy_aligned # U : avoid loading dest word if not needed + ldq_u t0, 0(a0) # L : + + br stxncpy_aligned # U : + nop + nop + nop + + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 4 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, unmasked + t1 == the shifted low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # L : Latency=3 load second src word + addq a1, 8, a1 # E : + mskql t0, a0, t0 # U : mask trailing garbage in dst + extqh t2, a1, t4 # U : (3 cycle stall on t2) + + or t1, t4, t1 # E : first aligned src word complete (stall) + mskqh t1, a0, t1 # U : mask leading garbage in src (stall) + or t0, t1, t0 # E : first output word complete (stall) + or t0, t6, t6 # E : mask original data for zero test (stall) + + cmpbge zero, t6, t7 # E : + beq a2, $u_eocfin # U : + lda t6, -1 # E : + nop + + bne t7, $u_final # U : + mskql t6, a1, t6 # U : mask out bits already seen + stq_u t0, 0(a0) # L : store first output word + or t6, t2, t2 # E : + + cmpbge zero, t2, t7 # E : find nulls in second partial + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + bne t7, $u_late_head_exit # U : + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + extql t2, a1, t1 # U : position hi-bits of lo word + beq a2, $u_eoc # U : + ldq_u t2, 8(a1) # L : read next high-order source word + addq a1, 8, a1 # E : + + extqh t2, a1, t0 # U : position lo-bits of hi word (stall) + cmpbge zero, t2, t7 # E : + nop + bne t7, $u_eos # U : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 4 +$u_loop: + or t0, t1, t0 # E : current dst word now complete + subq a2, 1, a2 # E : decrement word count + extql t2, a1, t1 # U : extract high bits for next time + addq a0, 8, a0 # E : + + stq_u t0, -8(a0) # L : save the current word + beq a2, $u_eoc # U : + ldq_u t2, 8(a1) # L : Latency=3 load high word for next time + addq a1, 8, a1 # E : + + extqh t2, a1, t0 # U : extract low bits (2 cycle stall) + cmpbge zero, t2, t7 # E : test new word for eos + nop + beq t7, $u_loop # U : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + or t0, t1, t0 # E : first (partial) source word complete + nop + cmpbge zero, t0, t7 # E : is the null in this first bit? (stall) + bne t7, $u_final # U : (stall) + + stq_u t0, 0(a0) # L : the null was in the high-order bits + addq a0, 8, a0 # E : + subq a2, 1, a2 # E : + nop + +$u_late_head_exit: + extql t2, a1, t0 # U : + cmpbge zero, t0, t7 # E : + or t7, t10, t6 # E : (stall) + cmoveq a2, t6, t7 # E : Latency=2, extra map slot (stall) + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t0 == assembled source word + t7 == cmpbge mask that found the null. */ +$u_final: + negq t7, t6 # E : isolate low bit set + and t6, t7, t8 # E : (stall) + and t8, 0x80, t6 # E : avoid dest word load if we can (stall) + bne t6, 1f # U : (stall) + + ldq_u t1, 0(a0) # L : + subq t8, 1, t6 # E : + or t6, t8, t7 # E : (stall) + zapnot t0, t7, t0 # U : kill source bytes > null + + zap t1, t7, t1 # U : kill dest bytes <= null + or t0, t1, t0 # E : (stall) + nop + nop + +1: stq_u t0, 0(a0) # L : + ret (t9) # L0 : Latency=3 + + /* Got to end-of-count before end of string. + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word */ +$u_eoc: + and a1, 7, t6 # E : + sll t10, t6, t6 # U : (stall) + and t6, 0xff, t6 # E : (stall) + bne t6, 1f # U : (stall) + + ldq_u t2, 8(a1) # L : load final src word + nop + extqh t2, a1, t0 # U : extract low bits for last word (stall) + or t1, t0, t1 # E : (stall) + +1: cmpbge zero, t1, t7 # E : + mov t1, t0 + +$u_eocfin: # end-of-count, final word + or t10, t7, t7 # E : + br $u_final # L0 : Latency=3 + + /* Unaligned copy entry point. */ + .align 4 +$unaligned: + + ldq_u t1, 0(a1) # L : load first source word + and a0, 7, t4 # E : find dest misalignment + and a1, 7, t5 # E : find src misalignment + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + mov zero, t0 # E : + + mov zero, t6 # E : + beq t4, 1f # U : + ldq_u t0, 0(a0) # L : + lda t6, -1 # E : + + mskql t6, a0, t6 # U : + nop + nop +1: subq a1, t4, a1 # E : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t8 # E : + extql t1, a1, t1 # U : shift src into place + lda t2, -1 # E : for creating masks later + beq t8, $u_head # U : (stall) + + mskqh t2, t5, t2 # U : begin src byte validity mask + cmpbge zero, t1, t7 # E : is there a zero? + extql t2, a1, t2 # U : + or t7, t10, t5 # E : test for end-of-count too + + cmpbge zero, t2, t3 # E : + cmoveq a2, t5, t7 # E : Latency=2, extra map slot + nop # E : keep with cmoveq + andnot t7, t3, t7 # E : (stall) + + beq t7, $u_head # U : + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + ldq_u t0, 0(a0) # L : + negq t7, t6 # E : build bitmask of bytes <= zero + mskqh t1, t4, t1 # U : + + and t6, t7, t8 # E : + subq t8, 1, t6 # E : (stall) + or t6, t8, t7 # E : (stall) + zapnot t2, t7, t2 # U : prepare source word; mirror changes (stall) + + zapnot t1, t7, t1 # U : to source validity mask + andnot t0, t2, t0 # E : zero place for source to reside + or t0, t1, t0 # E : and put it there (stall both t0, t1) + stq_u t0, 0(a0) # L : (stall) + + ret (t9) # L0 : Latency=3 + + cfi_endproc diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/Implies b/REORG.TODO/sysdeps/alpha/alphaev67/Implies new file mode 100644 index 0000000000..49d19c4ad8 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/Implies @@ -0,0 +1 @@ +alpha/alphaev6 diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/ffs.S b/REORG.TODO/sysdeps/alpha/alphaev67/ffs.S new file mode 100644 index 0000000000..6715ae9234 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/ffs.S @@ -0,0 +1,51 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds the first bit set in an integer. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + + +ENTRY(__ffs) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + zap $16, 0xF0, $16 + cttz $16, $0 + addq $0, 1, $0 + cmoveq $16, 0, $0 + + nop + nop + nop + ret + +END(__ffs) + +weak_alias (__ffs, ffs) +libc_hidden_def (__ffs) +libc_hidden_builtin_def (ffs) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/ffsll.S b/REORG.TODO/sysdeps/alpha/alphaev67/ffsll.S new file mode 100644 index 0000000000..b469bba063 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/ffsll.S @@ -0,0 +1,44 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds the first bit set in a long. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(ffsl) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + cttz $16, $0 + addq $0, 1, $0 + cmoveq $16, 0, $0 + ret + +END(ffsl) + +weak_extern (ffsl) +weak_alias (ffsl, ffsll) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/fpu/Implies b/REORG.TODO/sysdeps/alpha/alphaev67/fpu/Implies new file mode 100644 index 0000000000..9e3f12d0ac --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/fpu/Implies @@ -0,0 +1 @@ +alpha/alphaev6/fpu diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/rawmemchr.S b/REORG.TODO/sysdeps/alpha/alphaev67/rawmemchr.S new file mode 100644 index 0000000000..cddfe2a9f4 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/rawmemchr.S @@ -0,0 +1,92 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return pointer to first occurrence of CH in STR. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(__rawmemchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u t0, 0(a0) # L : load first quadword Latency=3 + and a1, 0xff, t3 # E : 00000000000000ch + insbl a1, 1, t5 # U : 000000000000ch00 + insbl a1, 7, a2 # U : ch00000000000000 + + insbl t3, 6, a3 # U : 00ch000000000000 + or t5, t3, a1 # E : 000000000000chch + andnot a0, 7, v0 # E : align our loop pointer + lda t4, -1 # E : build garbage mask + + mskqh t4, a0, t4 # U : only want relevant part of first quad + or a2, a3, a2 # E : chch000000000000 + inswl a1, 2, t5 # E : 00000000chch0000 + inswl a1, 4, a3 # E : 0000chch00000000 + + or a1, a2, a1 # E : chch00000000chch + or a3, t5, t5 # E : 0000chchchch0000 + cmpbge zero, t4, t4 # E : bits set iff byte is garbage + nop # E : + + /* This quad is _very_ serialized. Lots of stalling happens */ + or t5, a1, a1 # E : chchchchchchchch + xor t0, a1, t1 # E : make bytes == c zero + cmpbge zero, t1, t0 # E : bits set iff byte == c + andnot t0, t4, t0 # E : clear garbage bits + + cttz t0, a2 # U0 : speculative (in case we get a match) + nop # E : + nop # E : + bne t0, $found # U : + + /* + * Yuk. This loop is going to stall like crazy waiting for the + * data to be loaded. Not much can be done about it unless it's + * unrolled multiple times, which is generally unsafe. + */ +$loop: + ldq t0, 8(v0) # L : Latency=3 + addq v0, 8, v0 # E : + xor t0, a1, t1 # E : + cmpbge zero, t1, t0 # E : bits set iff byte == c + + cttz t0, a2 # U0 : speculative (in case we get a match) + nop # E : + nop # E : + beq t0, $loop # U : + +$found: + negq t0, t1 # E : clear all but least set bit + and t0, t1, t0 # E : + addq v0, a2, v0 # E : Add in the bit number from above + ret # L0 : + + END(__rawmemchr) + +libc_hidden_def (__rawmemchr) +weak_alias (__rawmemchr, rawmemchr) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/stpcpy.S b/REORG.TODO/sysdeps/alpha/alphaev67/stpcpy.S new file mode 100644 index 0000000000..1f277779e0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/stpcpy.S @@ -0,0 +1,53 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@redhat.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy SRC to DEST returning the address of the terminating 0 in DEST. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + .text + +ENTRY(__stpcpy) + ldgp gp, 0(pv) +#ifdef PROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + .prologue 1 + + .align 4 + mov a0, v0 + nop + jsr t9, __stxcpy + + # t8 = bitmask (with one bit set) indicating the last byte written + # a0 = unaligned address of the last *word* written + + cttz t8, t8 + andnot a0, 7, a0 + addq a0, t8, v0 + ret + + END(__stpcpy) + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/stpncpy.S b/REORG.TODO/sysdeps/alpha/alphaev67/stpncpy.S new file mode 100644 index 0000000000..1efc86cdb2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/stpncpy.S @@ -0,0 +1,115 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@redhat.com) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than N bytes from SRC to DEST, returning the address of + the terminating '\0' in DEST. */ + +#include <sysdep.h> + + .arch ev6 + .set noat + .set noreorder + .text + +ENTRY(__stpncpy) + ldgp gp, 0(pv) +#ifdef PROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + .prologue 1 + + mov a0, v0 + beq a2, $zerocount + + .align 4 + nop + nop + jsr t9, __stxncpy # do the work of the copy + + cttz t8, t4 + zapnot t0, t8, t5 + andnot a0, 7, a0 + bne a2, $multiword # do we have full words left? + + subq t8, 1, t2 + subq t10, 1, t3 + cmpult zero, t5, t5 + addq a0, t4, v0 + + or t2, t8, t2 + or t3, t10, t3 + addq v0, t5, v0 + andnot t3, t2, t3 + + zap t0, t3, t0 + nop + stq t0, 0(a0) + ret + +$multiword: + subq t8, 1, t7 # clear the final bits in the prev word + cmpult zero, t5, t5 + or t7, t8, t7 + zapnot t0, t7, t0 + + subq a2, 1, a2 + stq t0, 0(a0) + addq a0, 8, a1 + beq a2, 1f # loop over full words remaining + + nop + nop + nop + blbc a2, 0f + + stq zero, 0(a1) + subq a2, 1, a2 + addq a1, 8, a1 + beq a2, 1f + +0: stq zero, 0(a1) + subq a2, 2, a2 + nop + nop + + stq zero, 8(a1) + addq a1, 16, a1 + nop + bne a2, 0b + +1: ldq t0, 0(a1) # clear the leading bits in the final word + subq t10, 1, t7 + addq a0, t4, v0 + nop + + or t7, t10, t7 + addq v0, t5, v0 + zap t0, t7, t0 + stq t0, 0(a1) + +$zerocount: + nop + nop + nop + ret + + END(__stpncpy) + +libc_hidden_def (__stpncpy) +weak_alias (__stpncpy, stpncpy) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strcat.S b/REORG.TODO/sysdeps/alpha/alphaev67/strcat.S new file mode 100644 index 0000000000..1d70061479 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strcat.S @@ -0,0 +1,61 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Append a null-terminated string from SRC to DST. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .text + +ENTRY(strcat) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov $16, $0 # E : set up return value + /* Find the end of the string. */ + ldq_u $1, 0($16) # L : load first quadword (a0 may be misaligned) + lda $2, -1 # E : + insqh $2, $16, $2 # U : + + andnot $16, 7, $16 # E : + or $2, $1, $1 # E : + cmpbge $31, $1, $2 # E : bits set iff byte == 0 + bne $2, $found # U : + +$loop: ldq $1, 8($16) # L : + addq $16, 8, $16 # E : + cmpbge $31, $1, $2 # E : + beq $2, $loop # U : + +$found: cttz $2, $3 # U0 : + addq $16, $3, $16 # E : + /* Now do the append. */ + mov $26, $23 # E : + jmp $31, __stxcpy # L0 : + + END(strcat) +libc_hidden_builtin_def (strcat) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strchr.S b/REORG.TODO/sysdeps/alpha/alphaev67/strchr.S new file mode 100644 index 0000000000..ac1886eb55 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strchr.S @@ -0,0 +1,100 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return the address of a given character within a null-terminated + string, or null if it is not found. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(strchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u t0, 0(a0) # L : load first quadword Latency=3 + and a1, 0xff, t3 # E : 00000000000000ch + insbl a1, 1, t5 # U : 000000000000ch00 + insbl a1, 7, a2 # U : ch00000000000000 + + insbl t3, 6, a3 # U : 00ch000000000000 + or t5, t3, a1 # E : 000000000000chch + andnot a0, 7, v0 # E : align our loop pointer + lda t4, -1 # E : build garbage mask + + mskqh t4, a0, t4 # U : only want relevant part of first quad + or a2, a3, a2 # E : chch000000000000 + inswl a1, 2, t5 # E : 00000000chch0000 + inswl a1, 4, a3 # E : 0000chch00000000 + + or a1, a2, a1 # E : chch00000000chch + or a3, t5, t5 # E : 0000chchchch0000 + cmpbge zero, t0, t2 # E : bits set iff byte == zero + cmpbge zero, t4, t4 # E : bits set iff byte is garbage + + /* This quad is _very_ serialized. Lots of stalling happens */ + or t5, a1, a1 # E : chchchchchchchch + xor t0, a1, t1 # E : make bytes == c zero + cmpbge zero, t1, t3 # E : bits set iff byte == c + or t2, t3, t0 # E : bits set iff char match or zero match + + andnot t0, t4, t0 # E : clear garbage bits + cttz t0, a2 # U0 : speculative (in case we get a match) + nop # E : + bne t0, $found # U : + + /* + * Yuk. This loop is going to stall like crazy waiting for the + * data to be loaded. Not much can be done about it unless it's + * unrolled multiple times, which is generally unsafe. + */ +$loop: + ldq t0, 8(v0) # L : Latency=3 + addq v0, 8, v0 # E : + xor t0, a1, t1 # E : + cmpbge zero, t0, t2 # E : bits set iff byte == 0 + + cmpbge zero, t1, t3 # E : bits set iff byte == c + or t2, t3, t0 # E : + cttz t3, a2 # U0 : speculative (in case we get a match) + beq t0, $loop # U : + +$found: + negq t0, t1 # E : clear all but least set bit + and t0, t1, t0 # E : + and t0, t3, t1 # E : bit set iff byte was the char + addq v0, a2, v0 # E : Add in the bit number from above + + cmoveq t1, $31, v0 # E : Two mapping slots, latency = 2 + nop + nop + ret # L0 : + + END(strchr) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strlen.S b/REORG.TODO/sysdeps/alpha/alphaev67/strlen.S new file mode 100644 index 0000000000..ead08998a1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strlen.S @@ -0,0 +1,60 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds length of a 0-terminated string. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(strlen) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u $1, 0($16) # L : load first quadword ($16 may be misaligned) + lda $2, -1($31) # E : + insqh $2, $16, $2 # U : + andnot $16, 7, $0 # E : + + or $2, $1, $1 # E : + cmpbge $31, $1, $2 # E : $2 <- bitmask: bit i == 1 <==> i-th byte == 0 + nop # E : + bne $2, $found # U : + +$loop: ldq $1, 8($0) # L : + addq $0, 8, $0 # E : addr += 8 + cmpbge $31, $1, $2 # E : + beq $2, $loop # U : + +$found: + cttz $2, $3 # U0 : + addq $0, $3, $0 # E : + subq $0, $16, $0 # E : + ret $31, ($26) # L0 : + + END(strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strncat.S b/REORG.TODO/sysdeps/alpha/alphaev67/strncat.S new file mode 100644 index 0000000000..58aac54d64 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strncat.S @@ -0,0 +1,87 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Append no more than COUNT characters from the null-terminated string SRC + to the null-terminated string DST. Always null-terminate the new DST. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .text + +ENTRY(strncat) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov a0, v0 # set up return value + beq a2, $zerocount # U : + /* Find the end of the string. */ + ldq_u t0, 0(a0) # L : load first quadword (a0 may be misaligned) + lda t1, -1 # E : + + insqh t1, v0, t1 # U : + andnot a0, 7, a0 # E : + nop # E : + or t1, t0, t0 # E : + + nop # E : + nop # E : + cmpbge zero, t0, t1 # E : bits set iff byte == 0 + bne t1, $found # U : + +$loop: ldq t0, 8(a0) # L : + addq a0, 8, a0 # E : + cmpbge zero, t0, t1 # E : + beq t1, $loop # U : + +$found: cttz t1, t2 # U0 : + addq a0, t2, a0 # E : + jsr t9, __stxncpy # L0 : Now do the append. + + /* Worry about the null termination. */ + + cttz t10, t2 # U0: byte offset of end-of-count. + bic a0, 7, a0 # E : word align the last write address. + zapnot t0, t8, t1 # U : was last byte a null? + nop # E : + + bne t1, 0f # U : + nop # E : + nop # E : + ret # L0 : + +0: addq t2, a0, a0 # E : address of end-of-count + stb zero, 1(a0) # L : + nop # E : + ret # L0 : + +$zerocount: + nop # E : + nop # E : + nop # E : + ret # L0 : + + END(strncat) diff --git a/REORG.TODO/sysdeps/alpha/alphaev67/strrchr.S b/REORG.TODO/sysdeps/alpha/alphaev67/strrchr.S new file mode 100644 index 0000000000..f2fb4cf677 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/alphaev67/strrchr.S @@ -0,0 +1,116 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return the address of the last occurrence of a given character + within a null-terminated string, or null if it is not found. */ + +#include <sysdep.h> + + .arch ev6 + .set noreorder + .set noat + +ENTRY(strrchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + and a1, 0xff, t2 # E : 00000000000000ch + insbl a1, 1, t4 # U : 000000000000ch00 + insbl a1, 2, t5 # U : 0000000000ch0000 + ldq_u t0, 0(a0) # L : load first quadword Latency=3 + + mov zero, t6 # E : t6 is last match aligned addr + or t2, t4, a1 # E : 000000000000chch + sll t5, 8, t3 # U : 00000000ch000000 + mov zero, t8 # E : t8 is last match byte compare mask + + andnot a0, 7, v0 # E : align source addr + or t5, t3, t3 # E : 00000000chch0000 + sll a1, 32, t2 # U : 0000chch00000000 + sll a1, 48, t4 # U : chch000000000000 + + or t4, a1, a1 # E : chch00000000chch + or t2, t3, t2 # E : 0000chchchch0000 + or a1, t2, a1 # E : chchchchchchchch + lda t5, -1 # E : build garbage mask + + cmpbge zero, t0, t1 # E : bits set iff byte == zero + mskqh t5, a0, t4 # E : Complete garbage mask + xor t0, a1, t2 # E : make bytes == c zero + cmpbge zero, t4, t4 # E : bits set iff byte is garbage + + cmpbge zero, t2, t3 # E : bits set iff byte == c + andnot t1, t4, t1 # E : clear garbage from null test + andnot t3, t4, t3 # E : clear garbage from char test + bne t1, $eos # U : did we already hit the terminator? + + /* Character search main loop */ +$loop: + ldq t0, 8(v0) # L : load next quadword + cmovne t3, v0, t6 # E : save previous comparisons match + nop # : Latency=2, extra map slot (keep nop with cmov) + nop + + cmovne t3, t3, t8 # E : Latency=2, extra map slot + nop # : keep with cmovne + addq v0, 8, v0 # E : + xor t0, a1, t2 # E : + + cmpbge zero, t0, t1 # E : bits set iff byte == zero + cmpbge zero, t2, t3 # E : bits set iff byte == c + beq t1, $loop # U : if we havnt seen a null, loop + nop + + /* Mask out character matches after terminator */ +$eos: + negq t1, t4 # E : isolate first null byte match + and t1, t4, t4 # E : + subq t4, 1, t5 # E : build a mask of the bytes upto... + or t4, t5, t4 # E : ... and including the null + + and t3, t4, t3 # E : mask out char matches after null + cmovne t3, t3, t8 # E : save it, if match found Latency=2, extra map slot + nop # : Keep with cmovne + nop + + cmovne t3, v0, t6 # E : + nop # : Keep with cmovne + /* Locate the address of the last matched character */ + ctlz t8, t2 # U0 : Latency=3 (0x40 for t8=0) + nop + + cmoveq t8, 0x3f, t2 # E : Compensate for case when no match is seen + nop # E : hide the cmov latency (2) behind ctlz latency + lda t5, 0x3f($31) # E : + subq t5, t2, t5 # E : Normalize leading zero count + + addq t6, t5, v0 # E : and add to quadword address + ret # L0 : Latency=3 + nop + nop + +END(strrchr) + +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/alpha/atomic-machine.h b/REORG.TODO/sysdeps/alpha/atomic-machine.h new file mode 100644 index 0000000000..2cb2290837 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/atomic-machine.h @@ -0,0 +1,370 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +typedef int8_t atomic8_t; +typedef uint8_t uatomic8_t; +typedef int_fast8_t atomic_fast8_t; +typedef uint_fast8_t uatomic_fast8_t; + +typedef int16_t atomic16_t; +typedef uint16_t uatomic16_t; +typedef int_fast16_t atomic_fast16_t; +typedef uint_fast16_t uatomic_fast16_t; + +typedef int32_t atomic32_t; +typedef uint32_t uatomic32_t; +typedef int_fast32_t atomic_fast32_t; +typedef uint_fast32_t uatomic_fast32_t; + +typedef int64_t atomic64_t; +typedef uint64_t uatomic64_t; +typedef int_fast64_t atomic_fast64_t; +typedef uint_fast64_t uatomic_fast64_t; + +typedef intptr_t atomicptr_t; +typedef uintptr_t uatomicptr_t; +typedef intmax_t atomic_max_t; +typedef uintmax_t uatomic_max_t; + +#define __HAVE_64B_ATOMICS 1 +#define USE_ATOMIC_COMPILER_BUILTINS 0 + +/* XXX Is this actually correct? */ +#define ATOMIC_EXCHANGE_USES_CAS 1 + + +#ifdef UP +# define __MB /* nothing */ +#else +# define __MB " mb\n" +#endif + + +/* Compare and exchange. For all of the "xxx" routines, we expect a + "__prev" and a "__cmp" variable to be provided by the enclosing scope, + in which values are returned. */ + +#define __arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2) \ +({ \ + unsigned long __tmp, __snew, __addr64; \ + __asm__ __volatile__ ( \ + mb1 \ + " andnot %[__addr8],7,%[__addr64]\n" \ + " insbl %[__new],%[__addr8],%[__snew]\n" \ + "1: ldq_l %[__tmp],0(%[__addr64])\n" \ + " extbl %[__tmp],%[__addr8],%[__prev]\n" \ + " cmpeq %[__prev],%[__old],%[__cmp]\n" \ + " beq %[__cmp],2f\n" \ + " mskbl %[__tmp],%[__addr8],%[__tmp]\n" \ + " or %[__snew],%[__tmp],%[__tmp]\n" \ + " stq_c %[__tmp],0(%[__addr64])\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + "2:" \ + : [__prev] "=&r" (__prev), \ + [__snew] "=&r" (__snew), \ + [__tmp] "=&r" (__tmp), \ + [__cmp] "=&r" (__cmp), \ + [__addr64] "=&r" (__addr64) \ + : [__addr8] "r" (mem), \ + [__old] "Ir" ((uint64_t)(uint8_t)(uint64_t)(old)), \ + [__new] "r" (new) \ + : "memory"); \ +}) + +#define __arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2) \ +({ \ + unsigned long __tmp, __snew, __addr64; \ + __asm__ __volatile__ ( \ + mb1 \ + " andnot %[__addr16],7,%[__addr64]\n" \ + " inswl %[__new],%[__addr16],%[__snew]\n" \ + "1: ldq_l %[__tmp],0(%[__addr64])\n" \ + " extwl %[__tmp],%[__addr16],%[__prev]\n" \ + " cmpeq %[__prev],%[__old],%[__cmp]\n" \ + " beq %[__cmp],2f\n" \ + " mskwl %[__tmp],%[__addr16],%[__tmp]\n" \ + " or %[__snew],%[__tmp],%[__tmp]\n" \ + " stq_c %[__tmp],0(%[__addr64])\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + "2:" \ + : [__prev] "=&r" (__prev), \ + [__snew] "=&r" (__snew), \ + [__tmp] "=&r" (__tmp), \ + [__cmp] "=&r" (__cmp), \ + [__addr64] "=&r" (__addr64) \ + : [__addr16] "r" (mem), \ + [__old] "Ir" ((uint64_t)(uint16_t)(uint64_t)(old)), \ + [__new] "r" (new) \ + : "memory"); \ +}) + +#define __arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2) \ +({ \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldl_l %[__prev],%[__mem]\n" \ + " cmpeq %[__prev],%[__old],%[__cmp]\n" \ + " beq %[__cmp],2f\n" \ + " mov %[__new],%[__cmp]\n" \ + " stl_c %[__cmp],%[__mem]\n" \ + " beq %[__cmp],1b\n" \ + mb2 \ + "2:" \ + : [__prev] "=&r" (__prev), \ + [__cmp] "=&r" (__cmp) \ + : [__mem] "m" (*(mem)), \ + [__old] "Ir" ((uint64_t)(atomic32_t)(uint64_t)(old)), \ + [__new] "Ir" (new) \ + : "memory"); \ +}) + +#define __arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2) \ +({ \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldq_l %[__prev],%[__mem]\n" \ + " cmpeq %[__prev],%[__old],%[__cmp]\n" \ + " beq %[__cmp],2f\n" \ + " mov %[__new],%[__cmp]\n" \ + " stq_c %[__cmp],%[__mem]\n" \ + " beq %[__cmp],1b\n" \ + mb2 \ + "2:" \ + : [__prev] "=&r" (__prev), \ + [__cmp] "=&r" (__cmp) \ + : [__mem] "m" (*(mem)), \ + [__old] "Ir" ((uint64_t)(old)), \ + [__new] "Ir" (new) \ + : "memory"); \ +}) + +/* For all "bool" routines, we return FALSE if exchange succesful. */ + +#define __arch_compare_and_exchange_bool_8_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2); \ + !__cmp; }) + +#define __arch_compare_and_exchange_bool_16_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2); \ + !__cmp; }) + +#define __arch_compare_and_exchange_bool_32_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2); \ + !__cmp; }) + +#define __arch_compare_and_exchange_bool_64_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2); \ + !__cmp; }) + +/* For all "val" routines, return the old value whether exchange + successful or not. */ + +#define __arch_compare_and_exchange_val_8_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2); \ + (typeof (*mem))__prev; }) + +#define __arch_compare_and_exchange_val_16_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2); \ + (typeof (*mem))__prev; }) + +#define __arch_compare_and_exchange_val_32_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2); \ + (typeof (*mem))__prev; }) + +#define __arch_compare_and_exchange_val_64_int(mem, new, old, mb1, mb2) \ +({ unsigned long __prev; int __cmp; \ + __arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2); \ + (typeof (*mem))__prev; }) + +/* Compare and exchange with "acquire" semantics, ie barrier after. */ + +#define atomic_compare_and_exchange_bool_acq(mem, new, old) \ + __atomic_bool_bysize (__arch_compare_and_exchange_bool, int, \ + mem, new, old, "", __MB) + +#define atomic_compare_and_exchange_val_acq(mem, new, old) \ + __atomic_val_bysize (__arch_compare_and_exchange_val, int, \ + mem, new, old, "", __MB) + +/* Compare and exchange with "release" semantics, ie barrier before. */ + +#define atomic_compare_and_exchange_val_rel(mem, new, old) \ + __atomic_val_bysize (__arch_compare_and_exchange_val, int, \ + mem, new, old, __MB, "") + + +/* Atomically store value and return the previous value. */ + +#define __arch_exchange_8_int(mem, value, mb1, mb2) \ +({ \ + unsigned long __tmp, __addr64, __sval; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + " andnot %[__addr8],7,%[__addr64]\n" \ + " insbl %[__value],%[__addr8],%[__sval]\n" \ + "1: ldq_l %[__tmp],0(%[__addr64])\n" \ + " extbl %[__tmp],%[__addr8],%[__ret]\n" \ + " mskbl %[__tmp],%[__addr8],%[__tmp]\n" \ + " or %[__sval],%[__tmp],%[__tmp]\n" \ + " stq_c %[__tmp],0(%[__addr64])\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__sval] "=&r" (__sval), \ + [__tmp] "=&r" (__tmp), \ + [__addr64] "=&r" (__addr64) \ + : [__addr8] "r" (mem), \ + [__value] "r" (value) \ + : "memory"); \ + __ret; }) + +#define __arch_exchange_16_int(mem, value, mb1, mb2) \ +({ \ + unsigned long __tmp, __addr64, __sval; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + " andnot %[__addr16],7,%[__addr64]\n" \ + " inswl %[__value],%[__addr16],%[__sval]\n" \ + "1: ldq_l %[__tmp],0(%[__addr64])\n" \ + " extwl %[__tmp],%[__addr16],%[__ret]\n" \ + " mskwl %[__tmp],%[__addr16],%[__tmp]\n" \ + " or %[__sval],%[__tmp],%[__tmp]\n" \ + " stq_c %[__tmp],0(%[__addr64])\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__sval] "=&r" (__sval), \ + [__tmp] "=&r" (__tmp), \ + [__addr64] "=&r" (__addr64) \ + : [__addr16] "r" (mem), \ + [__value] "r" (value) \ + : "memory"); \ + __ret; }) + +#define __arch_exchange_32_int(mem, value, mb1, mb2) \ +({ \ + signed int __tmp; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldl_l %[__ret],%[__mem]\n" \ + " mov %[__val],%[__tmp]\n" \ + " stl_c %[__tmp],%[__mem]\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__tmp] "=&r" (__tmp) \ + : [__mem] "m" (*(mem)), \ + [__val] "Ir" (value) \ + : "memory"); \ + __ret; }) + +#define __arch_exchange_64_int(mem, value, mb1, mb2) \ +({ \ + unsigned long __tmp; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldq_l %[__ret],%[__mem]\n" \ + " mov %[__val],%[__tmp]\n" \ + " stq_c %[__tmp],%[__mem]\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__tmp] "=&r" (__tmp) \ + : [__mem] "m" (*(mem)), \ + [__val] "Ir" (value) \ + : "memory"); \ + __ret; }) + +#define atomic_exchange_acq(mem, value) \ + __atomic_val_bysize (__arch_exchange, int, mem, value, "", __MB) + +#define atomic_exchange_rel(mem, value) \ + __atomic_val_bysize (__arch_exchange, int, mem, value, __MB, "") + + +/* Atomically add value and return the previous (unincremented) value. */ + +#define __arch_exchange_and_add_8_int(mem, value, mb1, mb2) \ + ({ __builtin_trap (); 0; }) + +#define __arch_exchange_and_add_16_int(mem, value, mb1, mb2) \ + ({ __builtin_trap (); 0; }) + +#define __arch_exchange_and_add_32_int(mem, value, mb1, mb2) \ +({ \ + signed int __tmp; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldl_l %[__ret],%[__mem]\n" \ + " addl %[__ret],%[__val],%[__tmp]\n" \ + " stl_c %[__tmp],%[__mem]\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__tmp] "=&r" (__tmp) \ + : [__mem] "m" (*(mem)), \ + [__val] "Ir" ((signed int)(value)) \ + : "memory"); \ + __ret; }) + +#define __arch_exchange_and_add_64_int(mem, value, mb1, mb2) \ +({ \ + unsigned long __tmp; __typeof(*mem) __ret; \ + __asm__ __volatile__ ( \ + mb1 \ + "1: ldq_l %[__ret],%[__mem]\n" \ + " addq %[__ret],%[__val],%[__tmp]\n" \ + " stq_c %[__tmp],%[__mem]\n" \ + " beq %[__tmp],1b\n" \ + mb2 \ + : [__ret] "=&r" (__ret), \ + [__tmp] "=&r" (__tmp) \ + : [__mem] "m" (*(mem)), \ + [__val] "Ir" ((unsigned long)(value)) \ + : "memory"); \ + __ret; }) + +/* ??? Barrier semantics for atomic_exchange_and_add appear to be + undefined. Use full barrier for now, as that's safe. */ +#define atomic_exchange_and_add(mem, value) \ + __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, __MB, __MB) + + +/* ??? Blah, I'm lazy. Implement these later. Can do better than the + compare-and-exchange loop provided by generic code. + +#define atomic_decrement_if_positive(mem) +#define atomic_bit_test_set(mem, bit) + +*/ + +#ifndef UP +# define atomic_full_barrier() __asm ("mb" : : : "memory"); +# define atomic_read_barrier() __asm ("mb" : : : "memory"); +# define atomic_write_barrier() __asm ("wmb" : : : "memory"); +#endif diff --git a/REORG.TODO/sysdeps/alpha/backtrace.c b/REORG.TODO/sysdeps/alpha/backtrace.c new file mode 100644 index 0000000000..27ce597b39 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/backtrace.c @@ -0,0 +1 @@ +#include <sysdeps/x86_64/backtrace.c> diff --git a/REORG.TODO/sysdeps/alpha/bb_init_func.S b/REORG.TODO/sysdeps/alpha/bb_init_func.S new file mode 100644 index 0000000000..a3064d8abc --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bb_init_func.S @@ -0,0 +1,86 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* __bb_init_func is invoked at the beginning of each function, before + any registers have been saved. It is therefore safe to use any + caller-saved (call-used) registers (except for argument registers + a1-a5). */ + +#include <sysdep.h> + +/* + * These offsets should match with "struct bb" declared in gcc/libgcc2.c. + */ +#define ZERO_WORD 0x00 +#define NEXT 0x20 + + .set noat + .set noreorder + +ENTRY(__bb_init_func) + .prologue 0 + + ldq t0, ZERO_WORD(a0) /* t0 <- blocks->zero_word */ + beq t0, init /* not initialized yet -> */ + ret + +END(__bb_init_func) + + .ent init +init: + .frame sp, 0x38, ra, 0 + subq sp, 0x38, sp + .prologue 0 + + stq pv, 0x30(sp) + br pv, 1f +1: ldgp gp, 0(pv) + + ldiq t1, __bb_head + lda t3, _gmonparam + ldq t2, 0(t1) + ldl t3, 0(t3) /* t3 = _gmonparam.state */ + lda t0, 1 + stq t0, ZERO_WORD(a0) /* blocks->zero_word = 1 */ + stq t2, NEXT(a0) /* blocks->next = __bb_head */ + stq a0, 0(t1) + bne t2, $leave + beq t3, $leave /* t3 == GMON_PROF_ON? yes -> */ + + /* also need to initialize destructor: */ + stq ra, 0x00(sp) + lda a0, __bb_exit_func + stq a1, 0x08(sp) + lda pv, atexit + stq a2, 0x10(sp) + stq a3, 0x18(sp) + stq a4, 0x20(sp) + stq a5, 0x28(sp) + jsr ra, (pv), atexit + ldq ra, 0x00(sp) + ldq a1, 0x08(sp) + ldq a2, 0x10(sp) + ldq a3, 0x18(sp) + ldq a4, 0x20(sp) + ldq a5, 0x28(sp) + +$leave: ldq pv, 0x30(sp) + addq sp, 0x38, sp + ret + + .end init diff --git a/REORG.TODO/sysdeps/alpha/bits/endian.h b/REORG.TODO/sysdeps/alpha/bits/endian.h new file mode 100644 index 0000000000..8a16e14e24 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bits/endian.h @@ -0,0 +1,7 @@ +/* Alpha is little-endian. */ + +#ifndef _ENDIAN_H +# error "Never use <bits/endian.h> directly; include <endian.h> instead." +#endif + +#define __BYTE_ORDER __LITTLE_ENDIAN diff --git a/REORG.TODO/sysdeps/alpha/bits/link.h b/REORG.TODO/sysdeps/alpha/bits/link.h new file mode 100644 index 0000000000..f9b8938f07 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bits/link.h @@ -0,0 +1,68 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LINK_H +# error "Never include <bits/link.h> directly; use <link.h> instead." +#endif + + +/* Registers for entry into PLT on Alpha. */ +typedef struct La_alpha_regs +{ + uint64_t lr_r26; + uint64_t lr_sp; + uint64_t lr_r16; + uint64_t lr_r17; + uint64_t lr_r18; + uint64_t lr_r19; + uint64_t lr_r20; + uint64_t lr_r21; + double lr_f16; + double lr_f17; + double lr_f18; + double lr_f19; + double lr_f20; + double lr_f21; +} La_alpha_regs; + +/* Return values for calls from PLT on Alpha. */ +typedef struct La_alpha_retval +{ + uint64_t lrv_r0; + uint64_t lrv_r1; + double lrv_f0; + double lrv_f1; +} La_alpha_retval; + + +__BEGIN_DECLS + +extern Elf64_Addr la_alpha_gnu_pltenter (Elf64_Sym *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_alpha_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_alpha_gnu_pltexit (Elf64_Sym *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_alpha_regs *__inregs, + La_alpha_retval *__outregs, + const char *symname); + +__END_DECLS diff --git a/REORG.TODO/sysdeps/alpha/bits/mathdef.h b/REORG.TODO/sysdeps/alpha/bits/mathdef.h new file mode 100644 index 0000000000..f375bfda7f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bits/mathdef.h @@ -0,0 +1,44 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _COMPLEX_H +# error "Never use <bits/mathdef.h> directly; include <complex.h> instead" +#endif + +#if defined _COMPLEX_H && !defined _COMPLEX_H_MATHDEF +# define _COMPLEX_H_MATHDEF 1 +# if defined(__GNUC__) && !__GNUC_PREREQ(3,4) + +/* Due to an ABI change, we need to remap the complex float symbols. */ +# define _Mdouble_ float +# define __MATHCALL(function, args) \ + __MATHDECL (_Complex float, function, args) +# define __MATHDECL(type, function, args) \ + __MATHDECL_1(type, function##f, args, __c1_##function##f); \ + __MATHDECL_1(type, __##function##f, args, __c1_##function##f) +# define __MATHDECL_1(type, function, args, alias) \ + extern type function args __asm__(#alias) __THROW + +# include <bits/cmathcalls.h> + +# undef _Mdouble_ +# undef __MATHCALL +# undef __MATHDECL +# undef __MATHDECL_1 + +# endif /* GNUC before 3.4 */ +#endif /* COMPLEX_H */ diff --git a/REORG.TODO/sysdeps/alpha/bits/setjmp.h b/REORG.TODO/sysdeps/alpha/bits/setjmp.h new file mode 100644 index 0000000000..d7eb751de8 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bits/setjmp.h @@ -0,0 +1,61 @@ +/* Define the machine-dependent type `jmp_buf'. Alpha version. + Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_SETJMP_H +#define _BITS_SETJMP_H 1 + +#if !defined _SETJMP_H && !defined _PTHREAD_H +# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead." +#endif + +/* The previous bits/setjmp.h had __jmp_buf defined as a structure. + We use an array of 'long int' instead, to make writing the + assembler easier. Naturally, user code should not depend on + either representation. */ + +/* + * Integer registers: + * $0 is the return value (va); + * $1-$8, $22-$25, $28 are call-used (t0-t7, t8-t11, at); + * $9-$14 we save here (s0-s5); + * $15 is the FP and we save it here (fp or s6); + * $16-$21 are input arguments (call-used) (a0-a5); + * $26 is the return PC and we save it here (ra); + * $27 is the procedure value (i.e., the address of __setjmp) (pv or t12); + * $29 is the global pointer, which the caller will reconstruct + * from the return address restored in $26 (gp); + * $30 is the stack pointer and we save it here (sp); + * $31 is always zero (zero). + * + * Floating-point registers: + * $f0 is the floating return value; + * $f1, $f10-$f15, $f22-$f30 are call-used; + * $f2-$f9 we save here; + * $f16-$21 are input args (call-used); + * $f31 is always zero. + * + * Note that even on Alpha hardware that does not have an FPU (there + * isn't such a thing currently) it is required to implement the FP + * registers. + */ + +#ifndef __ASSEMBLY__ +typedef long int __jmp_buf[17]; +#endif + +#endif /* bits/setjmp.h */ diff --git a/REORG.TODO/sysdeps/alpha/bsd-_setjmp.S b/REORG.TODO/sysdeps/alpha/bsd-_setjmp.S new file mode 100644 index 0000000000..4e6a2da560 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bsd-_setjmp.S @@ -0,0 +1 @@ +/* _setjmp is in setjmp.S */ diff --git a/REORG.TODO/sysdeps/alpha/bsd-setjmp.S b/REORG.TODO/sysdeps/alpha/bsd-setjmp.S new file mode 100644 index 0000000000..1da848d2f1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bsd-setjmp.S @@ -0,0 +1 @@ +/* setjmp is in setjmp.S */ diff --git a/REORG.TODO/sysdeps/alpha/bzero.S b/REORG.TODO/sysdeps/alpha/bzero.S new file mode 100644 index 0000000000..9cea9fb59d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/bzero.S @@ -0,0 +1,110 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Fill a block of memory with zeros. Optimized for the Alpha architecture: + + - memory accessed as aligned quadwords only + - destination memory not read unless needed for good cache behaviour + - basic blocks arranged to optimize branch prediction for full-quadword + aligned memory blocks. + - partial head and tail quadwords constructed with byte-mask instructions + + This is generally scheduled for the EV5 (got to look out for my own + interests :-), but with EV4 needs in mind. There *should* be no more + stalls for the EV4 than there are for the EV5. +*/ + + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + .type __bzero, @function + .globl __bzero + .usepv __bzero, USEPV_PROF + + cfi_startproc + + /* On entry to this basic block: + t3 == loop counter + t4 == bytes in partial final word + a0 == possibly misaligned destination pointer */ + + .align 3 +bzero_loop: + beq t3, $tail # + blbc t3, 0f # skip single store if count even + + stq_u zero, 0(a0) # e0 : store one word + subq t3, 1, t3 # .. e1 : + addq a0, 8, a0 # e0 : + beq t3, $tail # .. e1 : + +0: stq_u zero, 0(a0) # e0 : store two words + subq t3, 2, t3 # .. e1 : + stq_u zero, 8(a0) # e0 : + addq a0, 16, a0 # .. e1 : + bne t3, 0b # e1 : + +$tail: bne t4, 1f # is there a tail to do? + ret # no + +1: ldq_u t0, 0(a0) # yes, load original data + mskqh t0, t4, t0 # + stq_u t0, 0(a0) # + ret # + +__bzero: +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + mov a0, v0 # e0 : move return value in place + beq a1, $done # .. e1 : early exit for zero-length store + and a0, 7, t1 # e0 : + addq a1, t1, a1 # e1 : add dest misalignment to count + srl a1, 3, t3 # e0 : loop = count >> 3 + and a1, 7, t4 # .. e1 : find number of bytes in tail + unop # : + beq t1, bzero_loop # e1 : aligned head, jump right in + + ldq_u t0, 0(a0) # e0 : load original data to mask into + cmpult a1, 8, t2 # .. e1 : is this a sub-word set? + bne t2, $oneq # e1 : + + mskql t0, a0, t0 # e0 : we span words. finish this partial + subq t3, 1, t3 # e0 : + addq a0, 8, a0 # .. e1 : + stq_u t0, -8(a0) # e0 : + br bzero_loop # .. e1 : + + .align 3 +$oneq: + mskql t0, a0, t2 # e0 : + mskqh t0, a1, t3 # e0 : + or t2, t3, t0 # e1 : + stq_u t0, 0(a0) # e0 : + +$done: ret + + cfi_endproc +weak_alias (__bzero, bzero) diff --git a/REORG.TODO/sysdeps/alpha/configure b/REORG.TODO/sysdeps/alpha/configure new file mode 100644 index 0000000000..464b596527 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/configure @@ -0,0 +1,8 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/alpha. + +# With required gcc+binutils, we can always access static and hidden +# symbols in a position independent way. +$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h + +# work around problem with autoconf and empty lines at the end of files diff --git a/REORG.TODO/sysdeps/alpha/configure.ac b/REORG.TODO/sysdeps/alpha/configure.ac new file mode 100644 index 0000000000..38e52e71ac --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/configure.ac @@ -0,0 +1,7 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/alpha. + +# With required gcc+binutils, we can always access static and hidden +# symbols in a position independent way. +AC_DEFINE(PI_STATIC_AND_HIDDEN) +# work around problem with autoconf and empty lines at the end of files diff --git a/REORG.TODO/sysdeps/alpha/crti.S b/REORG.TODO/sysdeps/alpha/crti.S new file mode 100644 index 0000000000..932b337e29 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/crti.S @@ -0,0 +1,99 @@ +/* Special .init and .fini section support for Alpha. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* crti.S puts a function prologue at the beginning of the .init and + .fini sections and defines global symbols for those addresses, so + they can be called as functions. The symbols _init and _fini are + magic and cause the linker to emit DT_INIT and DT_FINI. + + This differs from what would be generated for ordinary code in that + we save and restore the GP within the function. In order for linker + relaxation to work, the value in the GP register on exit from a function + must be valid for the function entry point. Normally, a function is + contained within one object file and this is not an issue, provided + that the function reloads the gp after making any function calls. + However, _init and _fini are constructed from pieces of many object + files, all of which may have different GP values. So we must reload + the GP value from crti.o in crtn.o. */ + +#include <libc-symbols.h> +#include <sysdep.h> + +#ifndef PREINIT_FUNCTION +# define PREINIT_FUNCTION __gmon_start__ +#endif + +#ifndef PREINIT_FUNCTION_WEAK +# define PREINIT_FUNCTION_WEAK 1 +#endif + +#if PREINIT_FUNCTION_WEAK + weak_extern (PREINIT_FUNCTION) +#else + .hidden PREINIT_FUNCTION +#endif + + .section .init, "ax", @progbits + .globl _init + .type _init, @function + .usepv _init, std +_init: + ldgp $29, 0($27) + subq $30, 16, $30 +#if PREINIT_FUNCTION_WEAK + lda $27, PREINIT_FUNCTION +#endif + stq $26, 0($30) + stq $29, 8($30) +#if PREINIT_FUNCTION_WEAK + beq $27, 1f + jsr $26, ($27), PREINIT_FUNCTION + ldq $29, 8($30) +1: +#else + bsr $26, PREINIT_FUNCTION !samegp +#endif + .p2align 3 + + .section .fini, "ax", @progbits + .globl _fini + .type _fini,@function + .usepv _fini,std +_fini: + ldgp $29, 0($27) + subq $30, 16, $30 + stq $26, 0($30) + stq $29, 8($30) + .p2align 3 diff --git a/REORG.TODO/sysdeps/alpha/crtn.S b/REORG.TODO/sysdeps/alpha/crtn.S new file mode 100644 index 0000000000..cb310778e1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/crtn.S @@ -0,0 +1,49 @@ +/* Special .init and .fini section support for Alpha. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* crtn.S puts function epilogues in the .init and .fini sections + corresponding to the prologues in crti.S. */ + + .section .init, "ax", @progbits + ldq $26, 0($30) + ldq $29, 8($30) + addq $30, 16, $30 + ret + + .section .fini, "ax", @progbits + ldq $26, 0($30) + ldq $29, 8($30) + addq $30, 16, $30 + ret diff --git a/REORG.TODO/sysdeps/alpha/div.S b/REORG.TODO/sysdeps/alpha/div.S new file mode 100644 index 0000000000..a323379067 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/div.S @@ -0,0 +1,87 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + +#undef FRAME +#ifdef __alpha_fix__ +#define FRAME 0 +#else +#define FRAME 16 +#endif + + .set noat + + .align 4 + .globl div + .ent div +div: + .frame sp, FRAME, ra +#if FRAME > 0 + lda sp, -FRAME(sp) +#endif +#ifdef PROF + .set macro + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .set nomacro + .prologue 1 +#else + .prologue 0 +#endif + + beq $18, $divbyzero + excb + mf_fpcr $f10 + + _ITOFT2 $17, $f0, 0, $18, $f1, 8 + + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + cvttq/c $f0, $f0 + excb + mt_fpcr $f10 + _FTOIT $f0, $0, 0 + + mull $0, $18, $1 + subl $17, $1, $1 + + stl $0, 0(a0) + stl $1, 4(a0) + mov a0, v0 + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret + +$divbyzero: + mov a0, v0 + lda a0, GEN_INTDIV + call_pal PAL_gentrap + stl zero, 0(v0) + stl zero, 4(v0) + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret + + .end div diff --git a/REORG.TODO/sysdeps/alpha/div_libc.h b/REORG.TODO/sysdeps/alpha/div_libc.h new file mode 100644 index 0000000000..96eb95bc7e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/div_libc.h @@ -0,0 +1,163 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Common bits for implementing software divide. */ + +#include <sysdep.h> +#ifdef __linux__ +# include <asm/gentrap.h> +# include <asm/pal.h> +#else +# include <machine/pal.h> +#endif + +/* These are not normal C functions. Argument registers are t10 and t11; + the result goes in t12; the return address is in t9. Only t12 and AT + may be clobbered. */ +#define X t10 +#define Y t11 +#define RV t12 +#define RA t9 + +/* The secureplt format does not allow the division routines to be called + via plt; there aren't enough registers free to be clobbered. Avoid + setting the symbol type to STT_FUNC, so that the linker won't be tempted + to create a plt entry. */ +#define funcnoplt notype + +/* None of these functions should use implicit anything. */ + .set nomacro + .set noat + +/* Code fragment to invoke _mcount for profiling. This should be invoked + directly after allocation of the stack frame. */ +.macro CALL_MCOUNT +#ifdef PROF + stq ra, 0(sp) + stq pv, 8(sp) + stq gp, 16(sp) + cfi_rel_offset (ra, 0) + cfi_rel_offset (pv, 8) + cfi_rel_offset (gp, 16) + br AT, 1f + .set macro +1: ldgp gp, 0(AT) + mov RA, ra + lda AT, _mcount + jsr AT, (AT), _mcount + .set nomacro + ldq ra, 0(sp) + ldq pv, 8(sp) + ldq gp, 16(sp) + cfi_restore (ra) + cfi_restore (pv) + cfi_restore (gp) + /* Realign subsequent code with what we'd have without this + macro at all. This means aligned with one arithmetic insn + used within the bundle. */ + .align 4 + nop +#endif +.endm + +/* In order to make the below work, all top-level divide routines must + use the same frame size. */ +#define FRAME 64 + +/* Code fragment to generate an integer divide-by-zero fault. When + building libc.so, we arrange for there to be one copy of this code + placed late in the dso, such that all branches are forward. When + building libc.a, we use multiple copies to avoid having an out of + range branch. Users should jump to DIVBYZERO. */ + +.macro DO_DIVBYZERO +#ifdef PIC +#define DIVBYZERO __divbyzero + .section .gnu.linkonce.t.divbyzero, "ax", @progbits + .globl __divbyzero + .type __divbyzero, @function + .usepv __divbyzero, no + .hidden __divbyzero +#else +#define DIVBYZERO $divbyzero +#endif + + .align 4 +DIVBYZERO: + cfi_startproc + cfi_return_column (RA) + cfi_def_cfa_offset (FRAME) + + mov a0, RV + unop + lda a0, GEN_INTDIV + call_pal PAL_gentrap + + mov RV, a0 + clr RV + lda sp, FRAME(sp) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + cfi_endproc + .size DIVBYZERO, .-DIVBYZERO +.endm + +/* Like the ev6 instructions, but fall back to stack use on prior machines. */ + + .arch ev6 + +.macro _ITOFS gr, fr, slot +#ifdef __alpha_fix__ + itofs \gr, \fr +#else + stl \gr, \slot(sp) + lds \fr, \slot(sp) +#endif +.endm + +.macro _ITOFT gr, fr, slot +#ifdef __alpha_fix__ + itoft \gr, \fr +#else + stq \gr, \slot(sp) + ldt \fr, \slot(sp) +#endif +.endm + +.macro _FTOIT fr, gr, slot +#ifdef __alpha_fix__ + ftoit \fr, \gr +#else + stt \fr, \slot(sp) + ldq \gr, \slot(sp) +#endif +.endm + +/* Similarly, but move two registers. Schedules better for pre-ev6. */ + +.macro _ITOFT2 gr1, fr1, slot1, gr2, fr2, slot2 +#ifdef __alpha_fix__ + itoft \gr1, \fr1 + itoft \gr2, \fr2 +#else + stq \gr1, \slot1(sp) + stq \gr2, \slot2(sp) + ldt \fr1, \slot1(sp) + ldt \fr2, \slot2(sp) +#endif +.endm diff --git a/REORG.TODO/sysdeps/alpha/divl.S b/REORG.TODO/sysdeps/alpha/divl.S new file mode 100644 index 0000000000..f09433e993 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/divl.S @@ -0,0 +1,83 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + +/* 32-bit signed int divide. This is not a normal C function. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may + be clobbered. + + The FPU can handle all input values except zero. Whee! + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + +#ifndef EXTEND +#define EXTEND(S,D) sextl S, D +#endif + + .text + .align 4 + .globl __divl + .type __divl, @funcnoplt + .usepv __divl, no + + cfi_startproc + cfi_return_column (RA) +__divl: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f2, 16(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f2, 16) + mf_fpcr $f2 + + EXTEND (X, RV) + EXTEND (Y, AT) + _ITOFT2 RV, $f0, 24, AT, $f1, 32 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + cvttq/c $f0, $f0 + excb + mt_fpcr $f2 + _FTOIT $f0, RV, 24 + + ldt $f0, 0(sp) + ldt $f1, 8(sp) + ldt $f2, 16(sp) + lda sp, FRAME(sp) + cfi_restore ($f0) + cfi_restore ($f1) + cfi_restore ($f2) + cfi_def_cfa_offset (0) + sextl RV, RV + ret $31, (RA), 1 + + cfi_endproc + .size __divl, .-__divl + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/divlu.S b/REORG.TODO/sysdeps/alpha/divlu.S new file mode 100644 index 0000000000..5c54bb54c0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/divlu.S @@ -0,0 +1,4 @@ +#define UNSIGNED +#define EXTEND(S,D) zapnot S, 15, D +#define __divl __divlu +#include <divl.S> diff --git a/REORG.TODO/sysdeps/alpha/divq.S b/REORG.TODO/sysdeps/alpha/divq.S new file mode 100644 index 0000000000..f9acc1c70e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/divq.S @@ -0,0 +1,273 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + + +/* 64-bit signed long divide. These are not normal C functions. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may + be clobbered. + + Theory of operation here is that we can use the FPU divider for virtually + all operands that we see: all dividend values between -2**53 and 2**53-1 + can be computed directly. Note that divisor values need not be checked + against that range because the rounded fp value will be close enough such + that the quotient is < 1, which will properly be truncated to zero when we + convert back to integer. + + When the dividend is outside the range for which we can compute exact + results, we use the fp quotent as an estimate from which we begin refining + an exact integral value. This reduces the number of iterations in the + shift-and-subtract loop significantly. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + + .text + .align 4 + .globl __divq + .type __divq, @funcnoplt + .usepv __divq, no + + cfi_startproc + cfi_return_column (RA) +__divq: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + + /* Get the fp divide insn issued as quickly as possible. After + that's done, we have at least 22 cycles until its results are + ready -- all the time in the world to figure out how we're + going to use the results. */ + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f3, 48(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f3, 48) + mf_fpcr $f3 + + _ITOFT2 X, $f0, 16, Y, $f1, 24 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + + /* Check to see if X fit in the double as an exact value. */ + sll X, (64-53), AT + ldt $f1, 8(sp) + sra AT, (64-53), AT + cmpeq X, AT, AT + beq AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert and clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f3 + _FTOIT $f0, RV, 16 + + ldt $f0, 0(sp) + ldt $f3, 48(sp) + cfi_restore ($f1) + cfi_remember_state + cfi_restore ($f0) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + lda sp, FRAME(sp) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + stq t0, 16(sp) + stq t1, 24(sp) + stq t2, 32(sp) + stq t5, 40(sp) + cfi_rel_offset (t0, 16) + cfi_rel_offset (t1, 24) + cfi_rel_offset (t2, 32) + cfi_rel_offset (t5, 40) + +#define Q RV /* quotient */ +#define R t0 /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + /* The fixup code below can only handle unsigned values. */ + or X, Y, AT + mov $31, t5 + blt AT, $fix_sign_in +$fix_sign_in_ret1: + cvttq/c $f0, $f0 + + _FTOIT $f0, Q, 8 + .align 3 +$fix_sign_in_ret2: + ldt $f0, 0(sp) + stq t3, 0(sp) + cfi_restore ($f0) + cfi_rel_offset (t3, 0) + + mulq Q, Y, QY + excb + stq t4, 8(sp) + mt_fpcr $f3 + cfi_rel_offset (t4, 8) + + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + ldq t0, 16(sp) + ldq t1, 24(sp) + ldq t2, 32(sp) + bne t5, $fix_sign_out + +$fix_sign_out_ret: + ldq t3, 0(sp) + ldq t4, 8(sp) + ldq t5, 40(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore (t0) + cfi_restore (t1) + cfi_restore (t2) + cfi_restore (t3) + cfi_restore (t4) + cfi_restore (t5) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 +$fix_sign_in: + /* If we got here, then X|Y is negative. Need to adjust everything + such that we're doing unsigned division in the fixup loop. */ + /* T5 records the changes we had to make: + bit 0: set if result should be negative. + bit 2: set if X was negated. + bit 3: set if Y was negated. + */ + xor X, Y, AT + cmplt AT, 0, t5 + cmplt X, 0, AT + negq X, t0 + + s4addq AT, t5, t5 + cmovne AT, t0, X + cmplt Y, 0, AT + negq Y, t0 + + s8addq AT, t5, t5 + cmovne AT, t0, Y + unop + blbc t5, $fix_sign_in_ret1 + + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + .align 3 + negq Q, Q + br $fix_sign_in_ret2 + + .align 4 +$fix_sign_out: + /* Now we get to undo what we did above. */ + /* ??? Is this really faster than just increasing the size of + the stack frame and storing X and Y in memory? */ + and t5, 8, AT + negq Y, t4 + cmovne AT, t4, Y + + and t5, 4, AT + negq X, t4 + cmovne AT, t4, X + + negq RV, t4 + cmovlbs t5, t4, RV + + br $fix_sign_out_ret + + cfi_endproc + .size __divq, .-__divq + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/divqu.S b/REORG.TODO/sysdeps/alpha/divqu.S new file mode 100644 index 0000000000..15101fa246 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/divqu.S @@ -0,0 +1,256 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + + +/* 64-bit unsigned long divide. These are not normal C functions. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may be + clobbered. + + Theory of operation here is that we can use the FPU divider for virtually + all operands that we see: all dividend values between -2**53 and 2**53-1 + can be computed directly. Note that divisor values need not be checked + against that range because the rounded fp value will be close enough such + that the quotient is < 1, which will properly be truncated to zero when we + convert back to integer. + + When the dividend is outside the range for which we can compute exact + results, we use the fp quotent as an estimate from which we begin refining + an exact integral value. This reduces the number of iterations in the + shift-and-subtract loop significantly. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + + .text + .align 4 + .globl __divqu + .type __divqu, @funcnoplt + .usepv __divqu, no + + cfi_startproc + cfi_return_column (RA) +__divqu: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + + /* Get the fp divide insn issued as quickly as possible. After + that's done, we have at least 22 cycles until its results are + ready -- all the time in the world to figure out how we're + going to use the results. */ + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f3, 48(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f3, 48) + mf_fpcr $f3 + + _ITOFT2 X, $f0, 16, Y, $f1, 24 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + blt X, $x_is_neg + divt/c $f0, $f1, $f0 + + /* Check to see if Y was mis-converted as signed value. */ + ldt $f1, 8(sp) + blt Y, $y_is_neg + + /* Check to see if X fit in the double as an exact value. */ + srl X, 53, AT + bne AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert and clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f3 + _FTOIT $f0, RV, 16 + + ldt $f0, 0(sp) + ldt $f3, 48(sp) + cfi_remember_state + cfi_restore ($f0) + cfi_restore ($f1) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + lda sp, FRAME(sp) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state +$x_is_neg: + /* If we get here, X is so big that bit 63 is set, which made the + conversion come out negative. Fix it up lest we not even get + a good estimate. */ + ldah AT, 0x5f80 /* 2**64 as float. */ + stt $f2, 24(sp) + cfi_rel_offset ($f2, 24) + _ITOFS AT, $f2, 16 + + .align 4 + addt $f0, $f2, $f0 + unop + divt/c $f0, $f1, $f0 + unop + + /* Ok, we've now the divide issued. Continue with other checks. */ + ldt $f1, 8(sp) + unop + ldt $f2, 24(sp) + blt Y, $y_is_neg + cfi_restore ($f1) + cfi_restore ($f2) + cfi_remember_state /* for y_is_neg */ + + .align 4 +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + stq t0, 16(sp) + stq t1, 24(sp) + stq t2, 32(sp) + stq t3, 40(sp) + cfi_rel_offset (t0, 16) + cfi_rel_offset (t1, 24) + cfi_rel_offset (t2, 32) + cfi_rel_offset (t3, 40) + +#define Q RV /* quotient */ +#define R t0 /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + mulq Q, Y, QY + + .align 4 + stq t4, 8(sp) + excb + ldt $f0, 0(sp) + mt_fpcr $f3 + cfi_rel_offset (t4, 8) + cfi_restore ($f0) + + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + ldq t4, 8(sp) + ldq t0, 16(sp) + ldq t1, 24(sp) + ldq t2, 32(sp) + + ldq t3, 40(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore (t0) + cfi_restore (t1) + cfi_restore (t2) + cfi_restore (t3) + cfi_restore (t4) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 + cfi_restore_state +$y_is_neg: + /* If we get here, Y is so big that bit 63 is set. The results + from the divide will be completely wrong. Fortunately, the + quotient must be either 0 or 1, so just compute it directly. */ + cmpule Y, X, RV + excb + mt_fpcr $f3 + ldt $f0, 0(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_restore ($f0) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + cfi_endproc + .size __divqu, .-__divqu + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/dl-dtprocnum.h b/REORG.TODO/sysdeps/alpha/dl-dtprocnum.h new file mode 100644 index 0000000000..67845cdd62 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-dtprocnum.h @@ -0,0 +1,3 @@ +/* Number of extra dynamic section entries for this architecture. By + default there are none. */ +#define DT_THISPROCNUM DT_ALPHA_NUM diff --git a/REORG.TODO/sysdeps/alpha/dl-machine.h b/REORG.TODO/sysdeps/alpha/dl-machine.h new file mode 100644 index 0000000000..7580cd29b6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-machine.h @@ -0,0 +1,529 @@ +/* Machine-dependent ELF dynamic relocation inline functions. Alpha version. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This was written in the absence of an ABI -- don't expect + it to remain unchanged. */ + +#ifndef dl_machine_h +#define dl_machine_h 1 + +#define ELF_MACHINE_NAME "alpha" + +#include <string.h> + + +/* Mask identifying addresses reserved for the user program, + where the dynamic linker should not map anything. */ +#define ELF_MACHINE_USER_ADDRESS_MASK 0x120000000UL + +/* Translate a processor specific dynamic tag to the index in l_info array. */ +#define DT_ALPHA(x) (DT_ALPHA_##x - DT_LOPROC + DT_NUM) + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_ALPHA; +} + +/* Return the link-time address of _DYNAMIC. The multiple-got-capable + linker no longer allocates the first .got entry for this. But not to + worry, no special tricks are needed. */ +static inline Elf64_Addr +elf_machine_dynamic (void) +{ +#ifndef NO_AXP_MULTI_GOT_LD + return (Elf64_Addr) &_DYNAMIC; +#else + register Elf64_Addr *gp __asm__ ("$29"); + return gp[-4096]; +#endif +} + +/* Return the run-time load address of the shared object. */ + +static inline Elf64_Addr +elf_machine_load_address (void) +{ + /* This relies on the compiler using gp-relative addresses for static symbols. */ + static void *dot = ˙ + return (void *)&dot - dot; +} + +/* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + +static inline int +elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) +{ + extern char _dl_runtime_resolve_new[] attribute_hidden; + extern char _dl_runtime_profile_new[] attribute_hidden; + extern char _dl_runtime_resolve_old[] attribute_hidden; + extern char _dl_runtime_profile_old[] attribute_hidden; + + struct pltgot { + char *resolve; + struct link_map *link; + }; + + struct pltgot *pg; + long secureplt; + char *resolve; + + if (map->l_info[DT_JMPREL] == 0 || !lazy) + return lazy; + + /* Check to see if we're using the read-only plt form. */ + secureplt = map->l_info[DT_ALPHA(PLTRO)] != 0; + + /* If the binary uses the read-only secure plt format, PG points to + the .got.plt section, which is the right place for ld.so to place + its hooks. Otherwise, PG is currently pointing at the start of + the plt; the hooks go at offset 16. */ + pg = (struct pltgot *) D_PTR (map, l_info[DT_PLTGOT]); + pg += !secureplt; + + /* This function will be called to perform the relocation. They're + not declared as functions to convince the compiler to use gp + relative relocations for them. */ + if (secureplt) + resolve = _dl_runtime_resolve_new; + else + resolve = _dl_runtime_resolve_old; + + if (__builtin_expect (profile, 0)) + { + if (secureplt) + resolve = _dl_runtime_profile_new; + else + resolve = _dl_runtime_profile_old; + + if (GLRO(dl_profile) && _dl_name_match_p (GLRO(dl_profile), map)) + { + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = map; + } + } + + pg->resolve = resolve; + pg->link = map; + + return lazy; +} + +/* Initial entry point code for the dynamic linker. + The C function `_dl_start' is the real entry point; + its return value is the user program's entry point. */ + +#define RTLD_START asm ("\ + .section .text \n\ + .set at \n\ + .globl _start \n\ + .ent _start \n\ +_start: \n\ + .frame $31,0,$31,0 \n\ + br $gp, 0f \n\ +0: ldgp $gp, 0($gp) \n\ + .prologue 0 \n\ + /* Pass pointer to argument block to _dl_start. */ \n\ + mov $sp, $16 \n\ + bsr $26, _dl_start !samegp \n\ + .end _start \n\ + /* FALLTHRU */ \n\ + .globl _dl_start_user \n\ + .ent _dl_start_user \n\ +_dl_start_user: \n\ + .frame $31,0,$31,0 \n\ + .prologue 0 \n\ + /* Save the user entry point address in s0. */ \n\ + mov $0, $9 \n\ + /* See if we were run as a command with the executable \n\ + file name as an extra leading argument. */ \n\ + ldah $1, _dl_skip_args($gp) !gprelhigh \n\ + ldl $1, _dl_skip_args($1) !gprellow \n\ + bne $1, $fixup_stack \n\ +$fixup_stack_ret: \n\ + /* The special initializer gets called with the stack \n\ + just as the application's entry point will see it; \n\ + it can switch stacks if it moves these contents \n\ + over. */ \n\ +" RTLD_START_SPECIAL_INIT " \n\ + /* Call _dl_init(_dl_loaded, argc, argv, envp) to run \n\ + initializers. */ \n\ + ldah $16, _rtld_local($gp) !gprelhigh \n\ + ldq $16, _rtld_local($16) !gprellow \n\ + ldq $17, 0($sp) \n\ + lda $18, 8($sp) \n\ + s8addq $17, 8, $19 \n\ + addq $19, $18, $19 \n\ + bsr $26, _dl_init !samegp \n\ + /* Pass our finalizer function to the user in $0. */ \n\ + ldah $0, _dl_fini($gp) !gprelhigh \n\ + lda $0, _dl_fini($0) !gprellow \n\ + /* Jump to the user's entry point. */ \n\ + mov $9, $27 \n\ + jmp ($9) \n\ +$fixup_stack: \n\ + /* Adjust the stack pointer to skip _dl_skip_args words.\n\ + This involves copying everything down, since the \n\ + stack pointer must always be 16-byte aligned. */ \n\ + ldah $7, __GI__dl_argv($gp) !gprelhigh \n\ + ldq $2, 0($sp) \n\ + ldq $5, __GI__dl_argv($7) !gprellow \n\ + subq $31, $1, $6 \n\ + subq $2, $1, $2 \n\ + s8addq $6, $5, $5 \n\ + mov $sp, $4 \n\ + s8addq $1, $sp, $3 \n\ + stq $2, 0($sp) \n\ + stq $5, __GI__dl_argv($7) !gprellow \n\ + /* Copy down argv. */ \n\ +0: ldq $5, 8($3) \n\ + addq $4, 8, $4 \n\ + addq $3, 8, $3 \n\ + stq $5, 0($4) \n\ + bne $5, 0b \n\ + /* Copy down envp. */ \n\ +1: ldq $5, 8($3) \n\ + addq $4, 8, $4 \n\ + addq $3, 8, $3 \n\ + stq $5, 0($4) \n\ + bne $5, 1b \n\ + /* Copy down auxiliary table. */ \n\ +2: ldq $5, 8($3) \n\ + ldq $6, 16($3) \n\ + addq $4, 16, $4 \n\ + addq $3, 16, $3 \n\ + stq $5, -8($4) \n\ + stq $6, 0($4) \n\ + bne $5, 2b \n\ + br $fixup_stack_ret \n\ + .end _dl_start_user \n\ + .set noat \n\ +.previous"); + +#ifndef RTLD_START_SPECIAL_INIT +#define RTLD_START_SPECIAL_INIT /* nothing */ +#endif + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry + or TLS variables, so undefined references should not be allowed + to define the value. + + ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve + to one of the main executable's symbols, as for a COPY reloc. + This is unused on Alpha. */ + +# define elf_machine_type_class(type) \ + (((type) == R_ALPHA_JMP_SLOT \ + || (type) == R_ALPHA_DTPMOD64 \ + || (type) == R_ALPHA_DTPREL64 \ + || (type) == R_ALPHA_TPREL64) * ELF_RTYPE_CLASS_PLT) + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_ALPHA_JMP_SLOT + +/* The alpha never uses Elf64_Rel relocations. */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +/* We define an initialization functions. This is called very early in + * _dl_sysdep_start. */ +#define DL_PLATFORM_INIT dl_platform_init () + +static inline void __attribute__ ((unused)) +dl_platform_init (void) +{ + if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') + /* Avoid an empty string which would disturb us. */ + GLRO(dl_platform) = NULL; +} + +/* Fix up the instructions of a PLT entry to invoke the function + rather than the dynamic linker. */ +static inline Elf64_Addr +elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const Elf64_Rela *reloc, + Elf64_Addr *got_addr, Elf64_Addr value) +{ + const Elf64_Rela *rela_plt; + Elf64_Word *plte; + long int edisp; + + /* Store the value we are going to load. */ + *got_addr = value; + + /* If this binary uses the read-only secure plt format, we're done. */ + if (map->l_info[DT_ALPHA(PLTRO)]) + return value; + + /* Otherwise we have to modify the plt entry in place to do the branch. */ + + /* Recover the PLT entry address by calculating reloc's index into the + .rela.plt, and finding that entry in the .plt. */ + rela_plt = (const Elf64_Rela *) D_PTR (map, l_info[DT_JMPREL]); + plte = (Elf64_Word *) (D_PTR (map, l_info[DT_PLTGOT]) + 32); + plte += 3 * (reloc - rela_plt); + + /* Find the displacement from the plt entry to the function. */ + edisp = (long int) (value - (Elf64_Addr)&plte[3]) / 4; + + if (edisp >= -0x100000 && edisp < 0x100000) + { + /* If we are in range, use br to perfect branch prediction and + elide the dependency on the address load. This case happens, + e.g., when a shared library call is resolved to the same library. */ + + int hi, lo; + hi = value - (Elf64_Addr)&plte[0]; + lo = (short int) hi; + hi = (hi - lo) >> 16; + + /* Emit "lda $27,lo($27)" */ + plte[1] = 0x237b0000 | (lo & 0xffff); + + /* Emit "br $31,function" */ + plte[2] = 0xc3e00000 | (edisp & 0x1fffff); + + /* Think about thread-safety -- the previous instructions must be + committed to memory before the first is overwritten. */ + __asm__ __volatile__("wmb" : : : "memory"); + + /* Emit "ldah $27,hi($27)" */ + plte[0] = 0x277b0000 | (hi & 0xffff); + } + else + { + /* Don't bother with the hint since we already know the hint is + wrong. Eliding it prevents the wrong page from getting pulled + into the cache. */ + + int hi, lo; + hi = (Elf64_Addr)got_addr - (Elf64_Addr)&plte[0]; + lo = (short)hi; + hi = (hi - lo) >> 16; + + /* Emit "ldq $27,lo($27)" */ + plte[1] = 0xa77b0000 | (lo & 0xffff); + + /* Emit "jmp $31,($27)" */ + plte[2] = 0x6bfb0000; + + /* Think about thread-safety -- the previous instructions must be + committed to memory before the first is overwritten. */ + __asm__ __volatile__("wmb" : : : "memory"); + + /* Emit "ldah $27,hi($27)" */ + plte[0] = 0x277b0000 | (hi & 0xffff); + } + + /* At this point, if we've been doing runtime resolution, Icache is dirty. + This will be taken care of in _dl_runtime_resolve. If instead we are + doing this as part of non-lazy startup relocation, that bit of code + hasn't made it into Icache yet, so there's nothing to clean up. */ + + return value; +} + +/* Return the final value of a plt relocation. */ +static inline Elf64_Addr +elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + Elf64_Addr value) +{ + return value + reloc->r_addend; +} + +/* Names of the architecture-specific auditing callback functions. */ +#define ARCH_LA_PLTENTER alpha_gnu_pltenter +#define ARCH_LA_PLTEXIT alpha_gnu_pltexit + +#endif /* !dl_machine_h */ + +#ifdef RESOLVE_MAP + +/* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ +auto inline void +__attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, + void *const reloc_addr_arg, + int skip_ifunc) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; + unsigned long int const r_type = ELF64_R_TYPE (reloc->r_info); + +#if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC && !defined SHARED + /* This is defined in rtld.c, but nowhere in the static libc.a; make the + reference weak so static programs can still link. This declaration + cannot be done when compiling rtld.c (i.e. #ifdef RTLD_BOOTSTRAP) + because rtld.c contains the common defn for _dl_rtld_map, which is + incompatible with a weak decl in the same file. */ + weak_extern (_dl_rtld_map); +#endif + + /* We cannot use a switch here because we cannot locate the switch + jump table until we've self-relocated. */ + +#if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC + if (__builtin_expect (r_type == R_ALPHA_RELATIVE, 0)) + { +# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC + /* Already done in dynamic linker. */ + if (map != &GL(dl_rtld_map)) +# endif + { + /* XXX Make some timings. Maybe it's preferable to test for + unaligned access and only do it the complex way if necessary. */ + Elf64_Addr reloc_addr_val; + + /* Load value without causing unaligned trap. */ + memcpy (&reloc_addr_val, reloc_addr_arg, 8); + reloc_addr_val += map->l_addr; + + /* Store value without causing unaligned trap. */ + memcpy (reloc_addr_arg, &reloc_addr_val, 8); + } + } + else +#endif + if (__builtin_expect (r_type == R_ALPHA_NONE, 0)) + return; + else + { + struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); + Elf64_Addr sym_value; + Elf64_Addr sym_raw_value; + + sym_raw_value = sym_value = reloc->r_addend; + if (sym_map) + { + sym_raw_value += sym->st_value; + sym_value = sym_raw_value + sym_map->l_addr; + } + + if (r_type == R_ALPHA_GLOB_DAT) + *reloc_addr = sym_value; +#ifdef RESOLVE_CONFLICT_FIND_MAP + /* In .gnu.conflict section, R_ALPHA_JMP_SLOT relocations have + R_ALPHA_JMP_SLOT in lower 8 bits and the remaining 24 bits + are .rela.plt index. */ + else if ((r_type & 0xff) == R_ALPHA_JMP_SLOT) + { + /* elf_machine_fixup_plt needs the map reloc_addr points into, + while in _dl_resolve_conflicts map is _dl_loaded. */ + RESOLVE_CONFLICT_FIND_MAP (map, reloc_addr); + reloc = ((const Elf64_Rela *) D_PTR (map, l_info[DT_JMPREL])) + + (r_type >> 8); + elf_machine_fixup_plt (map, 0, reloc, reloc_addr, sym_value); + } +#else + else if (r_type == R_ALPHA_JMP_SLOT) + elf_machine_fixup_plt (map, 0, reloc, reloc_addr, sym_value); +#endif +#ifndef RTLD_BOOTSTRAP + else if (r_type == R_ALPHA_REFQUAD) + { + /* Store value without causing unaligned trap. */ + memcpy (reloc_addr_arg, &sym_value, 8); + } +#endif + else if (r_type == R_ALPHA_DTPMOD64) + { +# ifdef RTLD_BOOTSTRAP + /* During startup the dynamic linker is always index 1. */ + *reloc_addr = 1; +# else + /* Get the information from the link map returned by the + resolv function. */ + if (sym_map != NULL) + *reloc_addr = sym_map->l_tls_modid; +# endif + } + else if (r_type == R_ALPHA_DTPREL64) + { +# ifndef RTLD_BOOTSTRAP + /* During relocation all TLS symbols are defined and used. + Therefore the offset is already correct. */ + *reloc_addr = sym_raw_value; +# endif + } + else if (r_type == R_ALPHA_TPREL64) + { +# ifdef RTLD_BOOTSTRAP + *reloc_addr = sym_raw_value + map->l_tls_offset; +# else + if (sym_map) + { + CHECK_STATIC_TLS (map, sym_map); + *reloc_addr = sym_raw_value + sym_map->l_tls_offset; + } +# endif + } + else + _dl_reloc_bad_type (map, r_type, 0); + } +} + +/* Let do-rel.h know that on Alpha if l_addr is 0, all RELATIVE relocs + can be skipped. */ +#define ELF_MACHINE_REL_RELATIVE 1 + +auto inline void +__attribute__ ((always_inline)) +elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +{ + /* XXX Make some timings. Maybe it's preferable to test for + unaligned access and only do it the complex way if necessary. */ + Elf64_Addr reloc_addr_val; + + /* Load value without causing unaligned trap. */ + memcpy (&reloc_addr_val, reloc_addr_arg, 8); + reloc_addr_val += l_addr; + + /* Store value without causing unaligned trap. */ + memcpy (reloc_addr_arg, &reloc_addr_val, 8); +} + +auto inline void +__attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) +{ + Elf64_Addr * const reloc_addr = (void *)(l_addr + reloc->r_offset); + unsigned long int const r_type = ELF64_R_TYPE (reloc->r_info); + + if (r_type == R_ALPHA_JMP_SLOT) + { + /* Perform a RELATIVE reloc on the .got entry that transfers + to the .plt. */ + *reloc_addr += l_addr; + } + else if (r_type == R_ALPHA_NONE) + return; + else + _dl_reloc_bad_type (map, r_type, 1); +} + +#endif /* RESOLVE_MAP */ diff --git a/REORG.TODO/sysdeps/alpha/dl-procinfo.c b/REORG.TODO/sysdeps/alpha/dl-procinfo.c new file mode 100644 index 0000000000..7a2e18a978 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-procinfo.c @@ -0,0 +1,63 @@ +/* Data for Alpha version of processor capability information. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Aurelien Jarno <aurelien@aurel32.net>, 2008. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This information must be kept in sync with the _DL_PLATFORM_COUNT + definitions in procinfo.h. + + If anything should be added here check whether the size of each string + is still ok with the given array size. + + All the #ifdefs in the definitions are quite irritating but + necessary if we want to avoid duplicating the information. There + are three different modes: + + - PROCINFO_DECL is defined. This means we are only interested in + declarations. + + - PROCINFO_DECL is not defined: + + + if SHARED is defined the file is included in an array + initializer. The .element = { ... } syntax is needed. + + + if SHARED is not defined a normal array initialization is + needed. + */ + +#ifndef PROCINFO_CLASS +#define PROCINFO_CLASS +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_alpha_platforms +#else +PROCINFO_CLASS const char _dl_alpha_platforms[5][5] +#endif +#ifndef PROCINFO_DECL += { + "ev4", "ev5", "ev56", "ev6", "ev67" + } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif + +#undef PROCINFO_DECL +#undef PROCINFO_CLASS diff --git a/REORG.TODO/sysdeps/alpha/dl-procinfo.h b/REORG.TODO/sysdeps/alpha/dl-procinfo.h new file mode 100644 index 0000000000..ed7a66ee49 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-procinfo.h @@ -0,0 +1,60 @@ +/* Alpha version of processor capability information handling macros. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Aurelien Jarno <aurelien@aurel32.net>, 2008. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_PROCINFO_H +#define _DL_PROCINFO_H 1 + +#include <ldsodefs.h> + + +/* Mask to filter out platforms. */ +#define _DL_HWCAP_PLATFORM (-1ULL) + +#define _DL_PLATFORMS_COUNT 5 + +static inline int +__attribute__ ((unused, always_inline)) +_dl_string_platform (const char *str) +{ + int i; + + if (str != NULL) + for (i = 0; i < _DL_PLATFORMS_COUNT; ++i) + { + if (strcmp (str, GLRO(dl_alpha_platforms)[i]) == 0) + return i; + } + return -1; +}; + +/* We cannot provide a general printing function. */ +#define _dl_procinfo(type, word) -1 + +/* There are no hardware capabilities defined. */ +#define _dl_hwcap_string(idx) "" + +/* By default there is no important hardware capability. */ +#define HWCAP_IMPORTANT (0) + +/* We don't have any hardware capabilities. */ +#define _DL_HWCAP_COUNT 0 + +#define _dl_string_hwcap(str) (-1) + +#endif /* dl-procinfo.h */ diff --git a/REORG.TODO/sysdeps/alpha/dl-sysdep.h b/REORG.TODO/sysdeps/alpha/dl-sysdep.h new file mode 100644 index 0000000000..67a90f1bd0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-sysdep.h @@ -0,0 +1,23 @@ +/* System-specific settings for dynamic linker code. Alpha version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include_next <dl-sysdep.h> + +/* _dl_argv cannot be attribute_relro, because _dl_start_user + might write into it after _dl_start returns. */ +#define DL_ARGV_NOT_RELRO 1 diff --git a/REORG.TODO/sysdeps/alpha/dl-tls.h b/REORG.TODO/sysdeps/alpha/dl-tls.h new file mode 100644 index 0000000000..cac369c9d3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-tls.h @@ -0,0 +1,27 @@ +/* Thread-local storage handling in the ELF dynamic linker. Alpha version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + + +/* Type used for the representation of TLS information in the GOT. */ +typedef struct +{ + unsigned long int ti_module; + unsigned long int ti_offset; +} tls_index; + +extern void *__tls_get_addr (tls_index *ti); diff --git a/REORG.TODO/sysdeps/alpha/dl-trampoline.S b/REORG.TODO/sysdeps/alpha/dl-trampoline.S new file mode 100644 index 0000000000..f527705d27 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/dl-trampoline.S @@ -0,0 +1,540 @@ +/* PLT trampolines. Alpha version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .set noat + +.macro savei regno, offset + stq $\regno, \offset($30) + cfi_rel_offset(\regno, \offset) +.endm + +.macro savef regno, offset + stt $f\regno, \offset($30) + cfi_rel_offset(\regno+32, \offset) +.endm + + .align 4 + .globl _dl_runtime_resolve_new + .ent _dl_runtime_resolve_new + +#undef FRAMESIZE +#define FRAMESIZE 14*8 + +_dl_runtime_resolve_new: + .frame $30, FRAMESIZE, $26, 0 + .mask 0x4000000, 0 + + ldah $29, 0($27) !gpdisp!1 + lda $30, -FRAMESIZE($30) + stq $26, 0*8($30) + stq $16, 2*8($30) + + stq $17, 3*8($30) + lda $29, 0($29) !gpdisp!1 + stq $18, 4*8($30) + mov $28, $16 /* link_map from .got.plt */ + + stq $19, 5*8($30) + mov $25, $17 /* offset of reloc entry */ + stq $20, 6*8($30) + mov $26, $18 /* return address */ + + stq $21, 7*8($30) + stt $f16, 8*8($30) + stt $f17, 9*8($30) + stt $f18, 10*8($30) + + stt $f19, 11*8($30) + stt $f20, 12*8($30) + stt $f21, 13*8($30) + .prologue 2 + + bsr $26, _dl_fixup !samegp + mov $0, $27 + + ldq $26, 0*8($30) + ldq $16, 2*8($30) + ldq $17, 3*8($30) + ldq $18, 4*8($30) + ldq $19, 5*8($30) + ldq $20, 6*8($30) + ldq $21, 7*8($30) + ldt $f16, 8*8($30) + ldt $f17, 9*8($30) + ldt $f18, 10*8($30) + ldt $f19, 11*8($30) + ldt $f20, 12*8($30) + ldt $f21, 13*8($30) + lda $30, FRAMESIZE($30) + jmp $31, ($27), 0 + .end _dl_runtime_resolve_new + + .globl _dl_runtime_profile_new + .type _dl_runtime_profile_new, @function + +#undef FRAMESIZE +#define FRAMESIZE 20*8 + + /* We save the registers in a different order than desired by + .mask/.fmask, so we have to use explicit cfi directives. */ + cfi_startproc + +_dl_runtime_profile_new: + ldah $29, 0($27) !gpdisp!2 + lda $30, -FRAMESIZE($30) + savei 26, 0*8 + stq $16, 2*8($30) + + stq $17, 3*8($30) + lda $29, 0($29) !gpdisp!2 + stq $18, 4*8($30) + lda $1, FRAMESIZE($30) /* incoming sp value */ + + stq $1, 1*8($30) + stq $19, 5*8($30) + stq $20, 6*8($30) + mov $28, $16 /* link_map from .got.plt */ + + stq $21, 7*8($30) + mov $25, $17 /* offset of reloc entry */ + stt $f16, 8*8($30) + mov $26, $18 /* return address */ + + stt $f17, 9*8($30) + mov $30, $19 /* La_alpha_regs address */ + stt $f18, 10*8($30) + lda $20, 14*8($30) /* framesize address */ + + stt $f19, 11*8($30) + stt $f20, 12*8($30) + stt $f21, 13*8($30) + stq $28, 16*8($30) + stq $25, 17*8($30) + + bsr $26, _dl_profile_fixup !samegp + mov $0, $27 + + /* Discover if we're wrapping this call. */ + ldq $18, 14*8($30) + bge $18, 1f + + ldq $26, 0*8($30) + ldq $16, 2*8($30) + ldq $17, 3*8($30) + ldq $18, 4*8($30) + ldq $19, 5*8($30) + ldq $20, 6*8($30) + ldq $21, 7*8($30) + ldt $f16, 8*8($30) + ldt $f17, 9*8($30) + ldt $f18, 10*8($30) + ldt $f19, 11*8($30) + ldt $f20, 12*8($30) + ldt $f21, 13*8($30) + lda $30, FRAMESIZE($30) + jmp $31, ($27), 0 + +1: + /* Create a frame pointer and allocate a new argument frame. */ + savei 15, 15*8 + mov $30, $15 + cfi_def_cfa_register (15) + addq $18, 15, $18 + bic $18, 15, $18 + subq $30, $18, $30 + + /* Save the call destination around memcpy. */ + stq $0, 14*8($30) + + /* Copy the stack arguments into place. */ + lda $16, 0($30) + lda $17, FRAMESIZE($15) + jsr $26, memcpy + ldgp $29, 0($26) + + /* Reload the argument registers. */ + ldq $27, 14*8($30) + ldq $16, 2*8($15) + ldq $17, 3*8($15) + ldq $18, 4*8($15) + ldq $19, 5*8($15) + ldq $20, 6*8($15) + ldq $21, 7*8($15) + ldt $f16, 8*8($15) + ldt $f17, 9*8($15) + ldt $f18, 10*8($15) + ldt $f19, 11*8($15) + ldt $f20, 12*8($15) + ldt $f21, 13*8($15) + + jsr $26, ($27), 0 + ldgp $29, 0($26) + + /* Set up for call to _dl_call_pltexit. */ + ldq $16, 16*8($15) + ldq $17, 17*8($15) + stq $0, 16*8($15) + lda $18, 0($15) + stq $1, 17*8($15) + lda $19, 16*8($15) + stt $f0, 18*8($15) + stt $f1, 19*8($15) + bsr $26, _dl_call_pltexit !samegp + + mov $15, $30 + cfi_def_cfa_register (30) + ldq $26, 0($30) + ldq $15, 15*8($30) + lda $30, FRAMESIZE($30) + ret + + cfi_endproc + .size _dl_runtime_profile_new, .-_dl_runtime_profile_new + + .align 4 + .globl _dl_runtime_resolve_old + .ent _dl_runtime_resolve_old + +#undef FRAMESIZE +#define FRAMESIZE 44*8 + +_dl_runtime_resolve_old: + lda $30, -FRAMESIZE($30) + .frame $30, FRAMESIZE, $26 + /* Preserve all registers that C normally doesn't. */ + stq $26, 0*8($30) + stq $0, 1*8($30) + stq $1, 2*8($30) + stq $2, 3*8($30) + stq $3, 4*8($30) + stq $4, 5*8($30) + stq $5, 6*8($30) + stq $6, 7*8($30) + stq $7, 8*8($30) + stq $8, 9*8($30) + stq $16, 10*8($30) + stq $17, 11*8($30) + stq $18, 12*8($30) + stq $19, 13*8($30) + stq $20, 14*8($30) + stq $21, 15*8($30) + stq $22, 16*8($30) + stq $23, 17*8($30) + stq $24, 18*8($30) + stq $25, 19*8($30) + stq $29, 20*8($30) + stt $f0, 21*8($30) + stt $f1, 22*8($30) + stt $f10, 23*8($30) + stt $f11, 24*8($30) + stt $f12, 25*8($30) + stt $f13, 26*8($30) + stt $f14, 27*8($30) + stt $f15, 28*8($30) + stt $f16, 29*8($30) + stt $f17, 30*8($30) + stt $f18, 31*8($30) + stt $f19, 32*8($30) + stt $f20, 33*8($30) + stt $f21, 34*8($30) + stt $f22, 35*8($30) + stt $f23, 36*8($30) + stt $f24, 37*8($30) + stt $f25, 38*8($30) + stt $f26, 39*8($30) + stt $f27, 40*8($30) + stt $f28, 41*8($30) + stt $f29, 42*8($30) + stt $f30, 43*8($30) + .mask 0x27ff01ff, -FRAMESIZE + .fmask 0xfffffc03, -FRAMESIZE+21*8 + /* Set up our GP. */ + br $29, .+4 + ldgp $29, 0($29) + .prologue 0 + /* Set up the arguments for _dl_fixup: + $16 = link_map out of plt0 + $17 = offset of reloc entry = ($28 - $27 - 20) /12 * 24 + $18 = return address + */ + subq $28, $27, $17 + ldq $16, 8($27) + subq $17, 20, $17 + mov $26, $18 + addq $17, $17, $17 + bsr $26, _dl_fixup !samegp + + /* Move the destination address into position. */ + mov $0, $27 + /* Restore program registers. */ + ldq $26, 0*8($30) + ldq $0, 1*8($30) + ldq $1, 2*8($30) + ldq $2, 3*8($30) + ldq $3, 4*8($30) + ldq $4, 5*8($30) + ldq $5, 6*8($30) + ldq $6, 7*8($30) + ldq $7, 8*8($30) + ldq $8, 9*8($30) + ldq $16, 10*8($30) + ldq $17, 11*8($30) + ldq $18, 12*8($30) + ldq $19, 13*8($30) + ldq $20, 14*8($30) + ldq $21, 15*8($30) + ldq $22, 16*8($30) + ldq $23, 17*8($30) + ldq $24, 18*8($30) + ldq $25, 19*8($30) + ldq $29, 20*8($30) + ldt $f0, 21*8($30) + ldt $f1, 22*8($30) + ldt $f10, 23*8($30) + ldt $f11, 24*8($30) + ldt $f12, 25*8($30) + ldt $f13, 26*8($30) + ldt $f14, 27*8($30) + ldt $f15, 28*8($30) + ldt $f16, 29*8($30) + ldt $f17, 30*8($30) + ldt $f18, 31*8($30) + ldt $f19, 32*8($30) + ldt $f20, 33*8($30) + ldt $f21, 34*8($30) + ldt $f22, 35*8($30) + ldt $f23, 36*8($30) + ldt $f24, 37*8($30) + ldt $f25, 38*8($30) + ldt $f26, 39*8($30) + ldt $f27, 40*8($30) + ldt $f28, 41*8($30) + ldt $f29, 42*8($30) + ldt $f30, 43*8($30) + /* Flush the Icache after having modified the .plt code. */ + imb + /* Clean up and turn control to the destination */ + lda $30, FRAMESIZE($30) + jmp $31, ($27) + + .end _dl_runtime_resolve_old + + .globl _dl_runtime_profile_old + .usepv _dl_runtime_profile_old, no + .type _dl_runtime_profile_old, @function + + /* We save the registers in a different order than desired by + .mask/.fmask, so we have to use explicit cfi directives. */ + cfi_startproc + +#undef FRAMESIZE +#define FRAMESIZE 50*8 + + .align 4 +_dl_runtime_profile_old: + lda $30, -FRAMESIZE($30) + cfi_adjust_cfa_offset (FRAMESIZE) + + /* Preserve all argument registers. This also constructs the + La_alpha_regs structure. */ + savei 26, 0*8 + savei 16, 2*8 + savei 17, 3*8 + savei 18, 4*8 + savei 19, 5*8 + savei 20, 6*8 + savei 21, 7*8 + lda $16, FRAMESIZE($30) + savef 16, 8*8 + savef 17, 9*8 + savef 18, 10*8 + savef 19, 11*8 + savef 20, 12*8 + savef 21, 13*8 + stq $16, 1*8($30) + + /* Preserve all registers that C normally doesn't. */ + savei 0, 14*8 + savei 1, 15*8 + savei 2, 16*8 + savei 3, 17*8 + savei 4, 18*8 + savei 5, 19*8 + savei 6, 20*8 + savei 7, 21*8 + savei 8, 22*8 + savei 22, 23*8 + savei 23, 24*8 + savei 24, 25*8 + savei 25, 26*8 + savei 29, 27*8 + savef 0, 28*8 + savef 1, 29*8 + savef 10, 30*8 + savef 11, 31*8 + savef 12, 32*8 + savef 13, 33*8 + savef 14, 34*8 + savef 15, 35*8 + savef 22, 36*8 + savef 23, 37*8 + savef 24, 38*8 + savef 25, 39*8 + savef 26, 40*8 + savef 27, 41*8 + savef 28, 42*8 + savef 29, 43*8 + savef 30, 44*8 + + /* Set up our GP. */ + br $29, .+4 + ldgp $29, 0($29) + + /* Set up the arguments for _dl_profile_fixup: + $16 = link_map out of plt0 + $17 = offset of reloc entry = ($28 - $27 - 20) /12 * 24 + $18 = return address + $19 = La_alpha_regs address + $20 = framesize address + */ + subq $28, $27, $17 + ldq $16, 8($27) + subq $17, 20, $17 + mov $26, $18 + addq $17, $17, $17 + lda $19, 0($30) + lda $20, 45*8($30) + stq $16, 48*8($30) + stq $17, 49*8($30) + + bsr $26, _dl_profile_fixup !samegp + + /* Discover if we're wrapping this call. */ + ldq $18, 45*8($30) + bge $18, 1f + + /* Move the destination address into position. */ + mov $0, $27 + /* Restore program registers. */ + ldq $26, 0*8($30) + ldq $16, 2*8($30) + ldq $17, 3*8($30) + ldq $18, 4*8($30) + ldq $19, 5*8($30) + ldq $20, 6*8($30) + ldq $21, 7*8($30) + ldt $f16, 8*8($30) + ldt $f17, 9*8($30) + ldt $f18, 10*8($30) + ldt $f19, 11*8($30) + ldt $f20, 12*8($30) + ldt $f21, 13*8($30) + ldq $0, 14*8($30) + ldq $1, 15*8($30) + ldq $2, 16*8($30) + ldq $3, 17*8($30) + ldq $4, 18*8($30) + ldq $5, 19*8($30) + ldq $6, 20*8($30) + ldq $7, 21*8($30) + ldq $8, 22*8($30) + ldq $22, 23*8($30) + ldq $23, 24*8($30) + ldq $24, 25*8($30) + ldq $25, 26*8($30) + ldq $29, 27*8($30) + ldt $f0, 28*8($30) + ldt $f1, 29*8($30) + ldt $f10, 30*8($30) + ldt $f11, 31*8($30) + ldt $f12, 32*8($30) + ldt $f13, 33*8($30) + ldt $f14, 34*8($30) + ldt $f15, 35*8($30) + ldt $f22, 36*8($30) + ldt $f23, 37*8($30) + ldt $f24, 38*8($30) + ldt $f25, 39*8($30) + ldt $f26, 40*8($30) + ldt $f27, 41*8($30) + ldt $f28, 42*8($30) + ldt $f29, 43*8($30) + ldt $f30, 44*8($30) + + /* Clean up and turn control to the destination. */ + lda $30, FRAMESIZE($30) + jmp $31, ($27) + +1: + /* Create a frame pointer and allocate a new argument frame. */ + savei 15, 45*8 + mov $30, $15 + cfi_def_cfa_register (15) + addq $18, 15, $18 + bic $18, 15, $18 + subq $30, $18, $30 + + /* Save the call destination around memcpy. */ + stq $0, 46*8($30) + + /* Copy the stack arguments into place. */ + lda $16, 0($30) + lda $17, FRAMESIZE($15) + jsr $26, memcpy + ldgp $29, 0($26) + + /* Reload the argument registers. */ + ldq $27, 46*8($30) + ldq $16, 2*8($15) + ldq $17, 3*8($15) + ldq $18, 4*8($15) + ldq $19, 5*8($15) + ldq $20, 6*8($15) + ldq $21, 7*8($15) + ldt $f16, 8*8($15) + ldt $f17, 9*8($15) + ldt $f18, 10*8($15) + ldt $f19, 11*8($15) + ldt $f20, 12*8($15) + ldt $f21, 13*8($15) + + jsr $26, ($27), 0 + ldgp $29, 0($26) + + /* Set up for call to _dl_call_pltexit. */ + ldq $16, 48*8($15) + ldq $17, 49*8($15) + stq $0, 46*8($15) + lda $18, 0($15) + stq $1, 47*8($15) + lda $19, 46*8($15) + stt $f0, 48*8($15) + stt $f1, 49*8($15) + bsr $26, _dl_call_pltexit !samegp + + mov $15, $30 + cfi_def_cfa_register (30) + ldq $26, 0($30) + ldq $15, 45*8($30) + lda $30, FRAMESIZE($30) + ret + + cfi_endproc + .size _dl_runtime_profile_old, .-_dl_runtime_profile_old diff --git a/REORG.TODO/sysdeps/alpha/ffs.S b/REORG.TODO/sysdeps/alpha/ffs.S new file mode 100644 index 0000000000..8cd7e5123a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/ffs.S @@ -0,0 +1,91 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds the first bit set in an integer. Optimized for the Alpha + architecture. */ + +#include <sysdep.h> + + .set noreorder + .set noat + + +ENTRY(__ffs) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 + zap $16, 0xF0, $16 + br $ffsl..ng +#else + .prologue 0 + zap $16, 0xF0, $16 + # FALLTHRU +#endif +END(__ffs) + + .align 4 +ENTRY(ffsl) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +$ffsl..ng: +#else + .prologue 0 +#endif + not $16, $1 # e0 : + ldi $2, -1 # .. e1 : + cmpbge $1, $2, $3 # e0 : bit N == 1 for byte N == 0 + clr $0 # .. e1 : + addq $3, 1, $4 # e0 : + bic $4, $3, $3 # e1 : bit N == 1 for first byte N != 0 + and $3, 0xF0, $4 # e0 : + and $3, 0xCC, $5 # .. e1 : + and $3, 0xAA, $6 # e0 : + cmovne $4, 4, $0 # .. e1 : + cmovne $5, 2, $5 # e0 : + cmovne $6, 1, $6 # .. e1 : + addl $0, $5, $0 # e0 : + addl $0, $6, $0 # e1 : $0 == N + extbl $16, $0, $1 # e0 : $1 == byte N + ldi $2, 1 # .. e1 : + negq $1, $3 # e0 : + and $3, $1, $3 # e1 : bit N == least bit set of byte N + and $3, 0xF0, $4 # e0 : + and $3, 0xCC, $5 # .. e1 : + and $3, 0xAA, $6 # e0 : + cmovne $4, 5, $2 # .. e1 : + cmovne $5, 2, $5 # e0 : + cmovne $6, 1, $6 # .. e1 : + s8addl $0, $2, $0 # e0 : mult byte ofs by 8 and sum + addl $5, $6, $5 # .. e1 : + addl $0, $5, $0 # e0 : + nop # .. e1 : + cmoveq $16, 0, $0 # e0 : trap input == 0 case. + ret # .. e1 : 18 + +END(ffsl) + +weak_alias (__ffs, ffs) +libc_hidden_def (__ffs) +libc_hidden_builtin_def (ffs) +weak_extern (ffsl) +weak_alias (ffsl, ffsll) diff --git a/REORG.TODO/sysdeps/alpha/ffsll.S b/REORG.TODO/sysdeps/alpha/ffsll.S new file mode 100644 index 0000000000..b2f46d899c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/ffsll.S @@ -0,0 +1 @@ +/* This function is defined in ffs.S. */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/Versions b/REORG.TODO/sysdeps/alpha/fpu/Versions new file mode 100644 index 0000000000..c9b0e03a91 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/Versions @@ -0,0 +1,23 @@ +libc { + GLIBC_2.0 { + # functions used in other libraries + __ieee_get_fp_control; __ieee_set_fp_control; + } +} +libm { + GLIBC_2.3.4 { + # functions implementing old complex float abi + __c1_cabsf; __c1_cacosf; __c1_cacoshf; __c1_cargf; __c1_casinf; + __c1_casinhf; __c1_catanf; __c1_catanhf; __c1_ccosf; __c1_ccoshf; + __c1_cexpf; __c1_cimagf; __c1_clog10f; __c1_clogf; __c1_conjf; + __c1_cpowf; __c1_cprojf; __c1_crealf; __c1_csinf; __c1_csinhf; + __c1_csqrtf; __c1_ctanf; __c1_ctanhf; + + # functions implementing new complex float abi + cabsf; cacosf; cacoshf; cargf; casinf; + casinhf; catanf; catanhf; ccosf; ccoshf; + cexpf; cimagf; clog10f; clogf; conjf; + cpowf; cprojf; crealf; csinf; csinhf; + csqrtf; ctanf; ctanhf; + } +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/bits/fenv.h b/REORG.TODO/sysdeps/alpha/fpu/bits/fenv.h new file mode 100644 index 0000000000..94ca4a4da0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/bits/fenv.h @@ -0,0 +1,141 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_H +# error "Never use <bits/fenv.h> directly; include <fenv.h> instead." +#endif + + +/* Define the bits representing the exception. + + Note that these are the bit positions as defined by the OSF/1 + ieee_{get,set}_control_word interface and not by the hardware fpcr. + + See the Alpha Architecture Handbook section 4.7.7.3 for details, + but in summary, trap shadows mean the hardware register can acquire + extra exception bits so for proper IEEE support the tracking has to + be done in software -- in this case with kernel support. + + As to why the system call interface isn't in the same format as + the hardware register, only those crazy folks at DEC can tell you. */ + +enum + { +#ifdef __USE_GNU + FE_DENORMAL = +#define FE_DENORMAL (1 << 22) + FE_DENORMAL, +#endif + + FE_INEXACT = +#define FE_INEXACT (1 << 21) + FE_INEXACT, + + FE_UNDERFLOW = +#define FE_UNDERFLOW (1 << 20) + FE_UNDERFLOW, + + FE_OVERFLOW = +#define FE_OVERFLOW (1 << 19) + FE_OVERFLOW, + + FE_DIVBYZERO = +#define FE_DIVBYZERO (1 << 18) + FE_DIVBYZERO, + + FE_INVALID = +#define FE_INVALID (1 << 17) + FE_INVALID, + + FE_ALL_EXCEPT = +#define FE_ALL_EXCEPT (0x3f << 17) + FE_ALL_EXCEPT + }; + +/* Alpha chips support all four defined rouding modes. + + Note that code must be compiled to use dynamic rounding (/d) instructions + to see these changes. For gcc this is -mfp-rounding-mode=d; for DEC cc + this is -fprm d. The default for both is static rounding to nearest. + + These are shifted down 58 bits from the hardware fpcr because the + functions are declared to take integers. */ + +enum + { + FE_TOWARDZERO = +#define FE_TOWARDZERO 0 + FE_TOWARDZERO, + + FE_DOWNWARD = +#define FE_DOWNWARD 1 + FE_DOWNWARD, + + FE_TONEAREST = +#define FE_TONEAREST 2 + FE_TONEAREST, + + FE_UPWARD = +#define FE_UPWARD 3 + FE_UPWARD, + }; + +#ifdef __USE_GNU +/* On later hardware, and later kernels for earlier hardware, we can forcibly + underflow denormal inputs and outputs. This can speed up certain programs + significantly, usually without affecting accuracy. */ +enum + { + FE_MAP_DMZ = 1UL << 12, /* Map denorm inputs to zero */ +#define FE_MAP_DMZ FE_MAP_DMZ + + FE_MAP_UMZ = 1UL << 13, /* Map underflowed outputs to zero */ +#define FE_MAP_UMZ FE_MAP_UMZ + }; +#endif + +/* Type representing exception flags. */ +typedef unsigned long int fexcept_t; + +/* Type representing floating-point environment. */ +typedef unsigned long int fenv_t; + +/* If the default argument is used we use this value. Note that due to + architecture-specified page mappings, no user-space pointer will ever + have its two high bits set. Co-opt one. */ +#define FE_DFL_ENV ((const fenv_t *) 0x8800000000000000UL) + +#ifdef __USE_GNU +/* Floating-point environment where none of the exceptions are masked. */ +# define FE_NOMASK_ENV ((const fenv_t *) 0x880000000000003eUL) + +/* Floating-point environment with (processor-dependent) non-IEEE floating + point. In this case, mapping denormals to zero. */ +# define FE_NONIEEE_ENV ((const fenv_t *) 0x8800000000003000UL) +#endif + +/* The system calls to talk to the kernel's FP code. */ +extern unsigned long int __ieee_get_fp_control (void) __THROW; +extern void __ieee_set_fp_control (unsigned long int __value) __THROW; + +#if __GLIBC_USE (IEC_60559_BFP_EXT) +/* Type representing floating-point control modes. */ +typedef unsigned long int femode_t; + +/* Default floating-point control modes. */ +# define FE_DFL_MODE ((const femode_t *) 0x8800000000000000UL) +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/bits/mathinline.h b/REORG.TODO/sysdeps/alpha/fpu/bits/mathinline.h new file mode 100644 index 0000000000..00c8c42a83 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/bits/mathinline.h @@ -0,0 +1,125 @@ +/* Inline math functions for Alpha. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David Mosberger-Tang. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/mathinline.h> directly; include <math.h> instead." +#endif + +#ifndef __extern_inline +# define __MATH_INLINE __inline +#else +# define __MATH_INLINE __extern_inline +#endif + +#if defined __USE_ISOC99 && defined __GNUC__ && !__GNUC_PREREQ(3,0) +# undef isgreater +# undef isgreaterequal +# undef isless +# undef islessequal +# undef islessgreater +# undef isunordered +# define isunordered(u, v) \ + (__extension__ \ + ({ double __r, __u = (u), __v = (v); \ + __asm ("cmptun/su %1,%2,%0\n\ttrapb" \ + : "=&f" (__r) : "f" (__u), "f"(__v)); \ + __r != 0; })) +#endif /* ISO C99 */ + +#if (!defined __NO_MATH_INLINES || defined __LIBC_INTERNAL_MATH_INLINES) \ + && defined __OPTIMIZE__ + +#if !__GNUC_PREREQ (4, 0) +# define __inline_copysign(NAME, TYPE) \ +__MATH_INLINE TYPE \ +__NTH (NAME (TYPE __x, TYPE __y)) \ +{ \ + TYPE __z; \ + __asm ("cpys %1, %2, %0" : "=f" (__z) : "f" (__y), "f" (__x)); \ + return __z; \ +} + +__inline_copysign (__copysignf, float) +__inline_copysign (copysignf, float) +__inline_copysign (__copysign, double) +__inline_copysign (copysign, double) + +# undef __inline_copysign +#endif + + +#if !__GNUC_PREREQ (2, 8) +# define __inline_fabs(NAME, TYPE) \ +__MATH_INLINE TYPE \ +__NTH (NAME (TYPE __x)) \ +{ \ + TYPE __z; \ + __asm ("cpys $f31, %1, %0" : "=f" (__z) : "f" (__x)); \ + return __z; \ +} + +__inline_fabs (__fabsf, float) +__inline_fabs (fabsf, float) +__inline_fabs (__fabs, double) +__inline_fabs (fabs, double) + +# undef __inline_fabs +#endif + +#ifdef __USE_ISOC99 + +/* Test for negative number. Used in the signbit() macro. */ +__MATH_INLINE int +__NTH (__signbitf (float __x)) +{ +#if !__GNUC_PREREQ (4, 0) + __extension__ union { float __f; int __i; } __u = { __f: __x }; + return __u.__i < 0; +#else + return __builtin_signbitf (__x); +#endif +} + +__MATH_INLINE int +__NTH (__signbit (double __x)) +{ +#if !__GNUC_PREREQ (4, 0) + __extension__ union { double __d; long __i; } __u = { __d: __x }; + return __u.__i < 0; +#else + return __builtin_signbit (__x); +#endif +} + +__MATH_INLINE int +__NTH (__signbitl (long double __x)) +{ +#if !__GNUC_PREREQ (4, 0) + __extension__ union { + long double __d; + long __i[sizeof(long double)/sizeof(long)]; + } __u = { __d: __x }; + return __u.__i[sizeof(long double)/sizeof(long) - 1] < 0; +#else + return __builtin_signbitl (__x); +#endif +} +#endif /* C99 */ + +#endif /* __NO_MATH_INLINES */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/cabsf.c b/REORG.TODO/sysdeps/alpha/fpu/cabsf.c new file mode 100644 index 0000000000..2ffd6a327d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/cabsf.c @@ -0,0 +1,41 @@ +/* Return the complex absolute value of float complex value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cabsf __cabsf_not_defined +#define cabsf cabsf_not_defined + +#include <complex.h> +#include <math.h> +#include "cfloat-compat.h" + +#undef __cabsf +#undef cabsf + +float +__c1_cabsf (c1_cfloat_decl (z)) +{ + return __hypotf (c1_cfloat_real (z), c1_cfloat_imag (z)); +} + +float +__c2_cabsf (c2_cfloat_decl (z)) +{ + return __hypotf (c2_cfloat_real (z), c2_cfloat_imag (z)); +} + +cfloat_versions (cabsf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/cargf.c b/REORG.TODO/sysdeps/alpha/fpu/cargf.c new file mode 100644 index 0000000000..6bff8a57f9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/cargf.c @@ -0,0 +1,41 @@ +/* Compute argument of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cargf __cargf_not_defined +#define cargf cargf_not_defined + +#include <complex.h> +#include <math.h> +#include "cfloat-compat.h" + +#undef __cargf +#undef cargf + +float +__c1_cargf (c1_cfloat_decl (x)) +{ + return __atan2f (c1_cfloat_imag (x), c1_cfloat_real (x)); +} + +float +__c2_cargf (c2_cfloat_decl (x)) +{ + return __atan2f (c2_cfloat_imag (x), c2_cfloat_real (x)); +} + +cfloat_versions (cargf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/cfloat-compat.h b/REORG.TODO/sysdeps/alpha/fpu/cfloat-compat.h new file mode 100644 index 0000000000..484cdd0df2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/cfloat-compat.h @@ -0,0 +1,58 @@ +/* Compatibility macros for old and new Alpha complex float ABI. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* The behaviour of complex float changed between GCC 3.3 and 3.4. + + In 3.3 and before (below, complex version 1, or "c1"), complex float + values were packed into one floating point register. + + In 3.4 and later (below, complex version 2, or "c2"), GCC changed to + follow the official Tru64 ABI, which passes the components of a complex + as separate parameters. */ + +typedef union { double d; _Complex float cf; } c1_compat; +# define c1_cfloat_decl(x) double x +# define c1_cfloat_real(x) __real__ c1_cfloat_value (x) +# define c1_cfloat_imag(x) __imag__ c1_cfloat_value (x) +# define c1_cfloat_value(x) (((c1_compat *)(void *)&x)->cf) +# define c1_cfloat_rettype double +# define c1_cfloat_return(x) ({ c1_compat _; _.cf = (x); _.d; }) + +# define c2_cfloat_decl(x) _Complex float x +# define c2_cfloat_real(x) __real__ x +# define c2_cfloat_imag(x) __imag__ x +# define c2_cfloat_value(x) x +# define c2_cfloat_rettype _Complex float +# define c2_cfloat_return(x) x + +/* Get the proper symbol versions defined for each function. */ + +#include <shlib-compat.h> + +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_3_4) +#define cfloat_versions_compat(func) \ + compat_symbol (libm, __c1_##func, func, GLIBC_2_1) +#else +#define cfloat_versions_compat(func) +#endif + +#define cfloat_versions(func) \ + cfloat_versions_compat(func); \ + versioned_symbol (libm, __c2_##func, func, GLIBC_2_3_4); \ + extern typeof(__c2_##func) __##func attribute_hidden; \ + strong_alias (__c2_##func, __##func) diff --git a/REORG.TODO/sysdeps/alpha/fpu/cimagf.c b/REORG.TODO/sysdeps/alpha/fpu/cimagf.c new file mode 100644 index 0000000000..6318f12297 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/cimagf.c @@ -0,0 +1,40 @@ +/* Return imaginary part of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cimagf __cimagf_not_defined +#define cimagf cimagf_not_defined + +#include <complex.h> +#include "cfloat-compat.h" + +#undef __cimagf +#undef cimagf + +float +__c1_cimagf (c1_cfloat_decl (z)) +{ + return c1_cfloat_imag (z); +} + +float +__c2_cimagf (c2_cfloat_decl (z)) +{ + return c2_cfloat_imag (z); +} + +cfloat_versions (cimagf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/conjf.c b/REORG.TODO/sysdeps/alpha/fpu/conjf.c new file mode 100644 index 0000000000..802898a5cb --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/conjf.c @@ -0,0 +1,42 @@ +/* Return complex conjugate of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __conjf __conjf_not_defined +#define conjf conjf_not_defined + +#include <complex.h> +#include "cfloat-compat.h" + +#undef __conjf +#undef conjf + +c1_cfloat_rettype +__c1_conjf (c1_cfloat_decl (z)) +{ + _Complex float r = ~ c1_cfloat_value (z); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_conjf (c2_cfloat_decl (z)) +{ + _Complex float r = ~ c2_cfloat_value (z); + return c2_cfloat_return (r); +} + +cfloat_versions (conjf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/crealf.c b/REORG.TODO/sysdeps/alpha/fpu/crealf.c new file mode 100644 index 0000000000..fdaaf2e59e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/crealf.c @@ -0,0 +1,40 @@ +/* Return real part of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __crealf __crealf_not_defined +#define crealf crealf_not_defined + +#include <complex.h> +#include "cfloat-compat.h" + +#undef __crealf +#undef crealf + +float +__c1_crealf (c1_cfloat_decl (z)) +{ + return c1_cfloat_real (z); +} + +float +__c2_crealf (c2_cfloat_decl (z)) +{ + return c2_cfloat_real (z); +} + +cfloat_versions (crealf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/e_sqrt.c b/REORG.TODO/sysdeps/alpha/fpu/e_sqrt.c new file mode 100644 index 0000000000..ec9d0d12f2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/e_sqrt.c @@ -0,0 +1,187 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <shlib-compat.h> + +#if !defined(_IEEE_FP_INEXACT) + +/* + * This version is much faster than generic sqrt implementation, but + * it doesn't handle the inexact flag. It doesn't handle exceptional + * values either, but will defer to the full ieee754_sqrt routine which + * can. + */ + +/* Careful with rearranging this without consulting the assembly below. */ +const static struct sqrt_data_struct { + unsigned long dn, up, half, almost_three_half; + unsigned long one_and_a_half, two_to_minus_30, one, nan; + const int T2[64]; +} sqrt_data __attribute__((used)) = { + 0x3fefffffffffffff, /* __dn = nextafter(1,-Inf) */ + 0x3ff0000000000001, /* __up = nextafter(1,+Inf) */ + 0x3fe0000000000000, /* half */ + 0x3ff7ffffffc00000, /* almost_three_half = 1.5-2^-30 */ + 0x3ff8000000000000, /* one_and_a_half */ + 0x3e10000000000000, /* two_to_minus_30 */ + 0x3ff0000000000000, /* one */ + 0xffffffffffffffff, /* nan */ + + { 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, + 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, + 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, + 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, + 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, + 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, + 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, + 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd } +}; + +asm ("\ + /* Define offsets into the structure defined in C above. */ \n\ + $DN = 0*8 \n\ + $UP = 1*8 \n\ + $HALF = 2*8 \n\ + $ALMOST_THREE_HALF = 3*8 \n\ + $NAN = 7*8 \n\ + $T2 = 8*8 \n\ + \n\ + /* Stack variables. */ \n\ + $K = 0 \n\ + $Y = 8 \n\ + \n\ + .text \n\ + .align 5 \n\ + .globl __ieee754_sqrt \n\ + .ent __ieee754_sqrt \n\ +__ieee754_sqrt: \n\ + ldgp $29, 0($27) \n\ + subq $sp, 16, $sp \n\ + .frame $sp, 16, $26, 0\n" +#ifdef PROF +" lda $28, _mcount \n\ + jsr $28, ($28), _mcount\n" +#endif +" .prologue 1 \n\ + \n\ + .align 4 \n\ + stt $f16, $K($sp) # e0 : \n\ + mult $f31, $f31, $f31 # .. fm : \n\ + lda $4, sqrt_data # e0 : \n\ + fblt $f16, $fixup # .. fa : \n\ + \n\ + ldah $2, 0x5fe8 # e0 : \n\ + ldq $3, $K($sp) # .. e1 : \n\ + ldt $f12, $HALF($4) # e0 : \n\ + ldt $f18, $ALMOST_THREE_HALF($4) # .. e1 : \n\ + \n\ + sll $3, 52, $5 # e0 : \n\ + lda $6, 0x7fd # .. e1 : \n\ + fnop # .. fa : \n\ + fnop # .. fm : \n\ + \n\ + subq $5, 1, $5 # e1 : \n\ + srl $3, 33, $1 # .. e0 : \n\ + cmpule $5, $6, $5 # e0 : \n\ + beq $5, $fixup # .. e1 : \n\ + \n\ + mult $f16, $f12, $f11 # fm : $f11 = x * 0.5 \n\ + subl $2, $1, $2 # .. e0 : \n\ + addt $f12, $f12, $f17 # .. fa : $f17 = 1.0 \n\ + srl $2, 12, $1 # e0 : \n\ + \n\ + and $1, 0xfc, $1 # e0 : \n\ + addq $1, $4, $1 # e1 : \n\ + ldl $1, $T2($1) # e0 : \n\ + addt $f12, $f17, $f15 # .. fa : $f15 = 1.5 \n\ + \n\ + subl $2, $1, $2 # e0 : \n\ + ldt $f14, $DN($4) # .. e1 : \n\ + sll $2, 32, $2 # e0 : \n\ + stq $2, $Y($sp) # e0 : \n\ + \n\ + ldt $f13, $Y($sp) # e0 : \n\ + mult/su $f11, $f13, $f10 # fm 2: $f10 = (x * 0.5) * y \n\ + mult $f10, $f13, $f10 # fm 4: $f10 = ((x*0.5)*y)*y \n\ + subt $f15, $f10, $f1 # fa 4: $f1 = (1.5-0.5*x*y*y) \n\ + \n\ + mult $f13, $f1, $f13 # fm 4: yp = y*(1.5-0.5*x*y^2)\n\ + mult/su $f11, $f13, $f1 # fm 4: $f11 = x * 0.5 * yp \n\ + mult $f1, $f13, $f11 # fm 4: $f11 = (x*0.5*yp)*yp \n\ + subt $f18, $f11, $f1 # fa 4: $f1=(1.5-2^-30)-x/2*yp^2\n\ + \n\ + mult $f13, $f1, $f13 # fm 4: ypp = $f13 = yp*$f1 \n\ + subt $f15, $f12, $f1 # .. fa : $f1 = (1.5 - 0.5) \n\ + ldt $f15, $UP($4) # .. e0 : \n\ + mult/su $f16, $f13, $f10 # fm 4: z = $f10 = x * ypp \n\ + \n\ + mult $f10, $f13, $f11 # fm 4: $f11 = z*ypp \n\ + mult $f10, $f12, $f12 # fm : $f12 = z*0.5 \n\ + subt $f1, $f11, $f1 # fa 4: $f1 = 1 - z*ypp \n\ + mult $f12, $f1, $f12 # fm 4: $f12 = z/2*(1 - z*ypp)\n\ + \n\ + addt $f10, $f12, $f0 # fa 4: zp=res= z+z/2*(1-z*ypp)\n\ + mult/c $f0, $f14, $f12 # fm 4: zmi = zp * DN \n\ + mult/c $f0, $f15, $f11 # fm : zpl = zp * UP \n\ + mult/c $f0, $f12, $f1 # fm : $f1 = zp * zmi \n\ + \n\ + mult/c $f0, $f11, $f15 # fm : $f15 = zp * zpl \n\ + subt/su $f1, $f16, $f13 # .. fa : y1 = zp*zmi - x \n\ + subt/su $f15, $f16, $f14 # fa 4: y2 = zp*zpl - x \n\ + fcmovge $f13, $f12, $f0 # fa 3: res = (y1>=0)?zmi:res \n\ + \n\ + fcmovlt $f14, $f11, $f0 # fa 4: res = (y2<0)?zpl:res \n\ + addq $sp, 16, $sp # .. e0 : \n\ + ret # .. e1 : \n\ + \n\ + .align 4 \n\ +$fixup: \n\ + addq $sp, 16, $sp \n\ + br __full_ieee754_sqrt !samegp \n\ + \n\ + .end __ieee754_sqrt"); + +/* Avoid the __sqrt_finite alias that dbl-64/e_sqrt.c would give... */ +#undef strong_alias +#define strong_alias(a,b) + +/* ... defining our own. */ +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +asm (".global __sqrt_finite1; __sqrt_finite1 = __ieee754_sqrt"); +#else +asm (".global __sqrt_finite; __sqrt_finite = __ieee754_sqrt"); +#endif + +static double __full_ieee754_sqrt(double) __attribute_used__; +#define __ieee754_sqrt __full_ieee754_sqrt + +#elif SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +# define __sqrt_finite __sqrt_finite1 +#endif /* _IEEE_FP_INEXACT */ + +#include <sysdeps/ieee754/dbl-64/e_sqrt.c> + +/* Work around forgotten symbol in alphaev6 build. */ +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +# undef __sqrt_finite +# undef __ieee754_sqrt +compat_symbol (libm, __sqrt_finite1, __sqrt_finite, GLIBC_2_15); +versioned_symbol (libm, __ieee754_sqrt, __sqrt_finite, GLIBC_2_18); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/e_sqrtf.c b/REORG.TODO/sysdeps/alpha/fpu/e_sqrtf.c new file mode 100644 index 0000000000..ad523f5cf2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/e_sqrtf.c @@ -0,0 +1,14 @@ +#include <shlib-compat.h> + +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +# define __sqrtf_finite __sqrtf_finite1 +#endif + +#include <sysdeps/ieee754/flt-32/e_sqrtf.c> + +/* Work around forgotten symbol in alphaev6 build. */ +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +# undef __sqrtf_finite +compat_symbol (libm, __sqrtf_finite1, __sqrtf_finite, GLIBC_2_15); +versioned_symbol (libm, __ieee754_sqrtf, __sqrtf_finite, GLIBC_2_18); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/fclrexcpt.c b/REORG.TODO/sysdeps/alpha/fpu/fclrexcpt.c new file mode 100644 index 0000000000..9e9be0b206 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fclrexcpt.c @@ -0,0 +1,47 @@ +/* Clear given exceptions in current floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__feclearexcept (int excepts) +{ + unsigned long int swcr; + + /* Get the current state. */ + swcr = __ieee_get_fp_control (); + + /* Clear the relevant bits. */ + swcr &= ~((unsigned long int) excepts & SWCR_STATUS_MASK); + + /* Put the new state in effect. */ + __ieee_set_fp_control (swcr); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feclearexcept, __old_feclearexcept) +compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); +#endif + +libm_hidden_ver (__feclearexcept, feclearexcept) +versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/fedisblxcpt.c b/REORG.TODO/sysdeps/alpha/fpu/fedisblxcpt.c new file mode 100644 index 0000000000..029393e558 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fedisblxcpt.c @@ -0,0 +1,35 @@ +/* Disable floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fedisableexcept (int excepts) +{ + unsigned long int new_exc, old_exc; + + new_exc = __ieee_get_fp_control (); + + old_exc = (new_exc & SWCR_ENABLE_MASK) << SWCR_ENABLE_SHIFT; + new_exc &= ~((excepts >> SWCR_ENABLE_SHIFT) & SWCR_ENABLE_MASK); + + __ieee_set_fp_control (new_exc); + + return old_exc; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/feenablxcpt.c b/REORG.TODO/sysdeps/alpha/fpu/feenablxcpt.c new file mode 100644 index 0000000000..8244f02cd3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/feenablxcpt.c @@ -0,0 +1,35 @@ +/* Enable floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +feenableexcept (int excepts) +{ + unsigned long int new_exc, old_exc; + + new_exc = __ieee_get_fp_control (); + + old_exc = (new_exc & SWCR_ENABLE_MASK) << SWCR_ENABLE_SHIFT; + new_exc |= (excepts >> SWCR_ENABLE_SHIFT) & SWCR_ENABLE_MASK; + + __ieee_set_fp_control (new_exc); + + return old_exc; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fegetenv.c b/REORG.TODO/sysdeps/alpha/fpu/fegetenv.c new file mode 100644 index 0000000000..0b242b3c12 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fegetenv.c @@ -0,0 +1,48 @@ +/* Store current floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetenv (fenv_t *envp) +{ + unsigned long int fpcr; + unsigned long int swcr; + + /* Get status from software and hardware. Note that we don't need an + excb because the callsys is an implied trap barrier. */ + swcr = __ieee_get_fp_control (); + __asm__ __volatile__ ("mf_fpcr %0" : "=f" (fpcr)); + + /* Merge the two bits of information. */ + *envp = ((fpcr & FPCR_ROUND_MASK) | (swcr & SWCR_ALL_MASK)); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetenv, __old_fegetenv) +compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fegetenv) +versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2); +libm_hidden_ver(__fegetenv, fegetenv) diff --git a/REORG.TODO/sysdeps/alpha/fpu/fegetexcept.c b/REORG.TODO/sysdeps/alpha/fpu/fegetexcept.c new file mode 100644 index 0000000000..ccb207433e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fegetexcept.c @@ -0,0 +1,30 @@ +/* Get enabled floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fegetexcept (void) +{ + unsigned long int exc; + + exc = __ieee_get_fp_control (); + + return (exc & SWCR_ENABLE_MASK) << SWCR_ENABLE_SHIFT; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fegetmode.c b/REORG.TODO/sysdeps/alpha/fpu/fegetmode.c new file mode 100644 index 0000000000..18ab5d328a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fegetmode.c @@ -0,0 +1,33 @@ +/* Store current floating-point control modes. Alpha version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fegetmode (femode_t *modep) +{ + unsigned long int fpcr; + unsigned long int swcr; + + /* As in fegetenv. */ + swcr = __ieee_get_fp_control (); + __asm__ __volatile__ ("mf_fpcr %0" : "=f" (fpcr)); + *modep = ((fpcr & FPCR_ROUND_MASK) | (swcr & SWCR_ALL_MASK)); + + return 0; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fegetround.c b/REORG.TODO/sysdeps/alpha/fpu/fegetround.c new file mode 100644 index 0000000000..9befd175b7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fegetround.c @@ -0,0 +1,33 @@ +/* Return current rounding direction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetround (void) +{ + unsigned long fpcr; + + __asm__ __volatile__("excb; mf_fpcr %0" : "=f"(fpcr)); + + return (fpcr >> FPCR_ROUND_SHIFT) & 3; +} +libm_hidden_def (__fegetround) +weak_alias (__fegetround, fegetround) +libm_hidden_weak (fegetround) diff --git a/REORG.TODO/sysdeps/alpha/fpu/feholdexcpt.c b/REORG.TODO/sysdeps/alpha/fpu/feholdexcpt.c new file mode 100644 index 0000000000..7f4f487ad5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/feholdexcpt.c @@ -0,0 +1,35 @@ +/* Store current floating-point environment and clear exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__feholdexcept (fenv_t *envp) +{ + /* Save the current state. */ + __fegetenv(envp); + + /* Clear all exception status bits and exception enable bits. */ + __ieee_set_fp_control(*envp & SWCR_MAP_MASK); + + return 0; +} +libm_hidden_def (__feholdexcept) +weak_alias (__feholdexcept, feholdexcept) +libm_hidden_weak (feholdexcept) diff --git a/REORG.TODO/sysdeps/alpha/fpu/fenv_libc.h b/REORG.TODO/sysdeps/alpha/fpu/fenv_libc.h new file mode 100644 index 0000000000..355d6f0659 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fenv_libc.h @@ -0,0 +1,39 @@ +/* Internal libc stuff for floating point environment routines. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_LIBC_H +#define _FENV_LIBC_H 1 + +#include <fenv.h> + +#define FPCR_ROUND_MASK (3UL << 58) +#define FPCR_ROUND_SHIFT 58 + +#define SWCR_MAP_MASK (3UL << 12) +#define SWCR_ENABLE_SHIFT 16 +#define SWCR_ENABLE_MASK (FE_ALL_EXCEPT >> SWCR_ENABLE_SHIFT) +#define SWCR_STATUS_MASK (FE_ALL_EXCEPT) +#define SWCR_ALL_MASK (SWCR_ENABLE_MASK \ + | SWCR_MAP_MASK \ + | SWCR_STATUS_MASK) + +/* These are declared for public consumption in <bits/fenv.h>. */ +libc_hidden_proto(__ieee_set_fp_control) +libc_hidden_proto(__ieee_get_fp_control) + +#endif /* fenv_libc.h */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/fesetenv.c b/REORG.TODO/sysdeps/alpha/fpu/fesetenv.c new file mode 100644 index 0000000000..c115f13a87 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fesetenv.c @@ -0,0 +1,57 @@ +/* Install given floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetenv (const fenv_t *envp) +{ + unsigned long int fpcr; + fenv_t env; + + /* Magic encoding of default values: high bit set (never possible for a + user-space address) is not indirect. And we don't even have to get + rid of it since we mask things around just below. */ + if ((long int) envp >= 0) + env = *envp; + else + env = (unsigned long int) envp; + + /* Reset the rounding mode with the hardware fpcr. Note that the following + system call is an implied trap barrier for our modification. */ + __asm__ __volatile__ ("excb; mf_fpcr %0" : "=f" (fpcr)); + fpcr = (fpcr & ~FPCR_ROUND_MASK) | (env & FPCR_ROUND_MASK); + __asm__ __volatile__ ("mt_fpcr %0" : : "f" (fpcr)); + + /* Reset the exception status and mask with the kernel's FP code. */ + __ieee_set_fp_control (env & SWCR_ALL_MASK); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetenv, __old_fesetenv) +compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fesetenv) +libm_hidden_ver (__fesetenv, fesetenv) +versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/fesetexcept.c b/REORG.TODO/sysdeps/alpha/fpu/fesetexcept.c new file mode 100644 index 0000000000..c84a2dfe37 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fesetexcept.c @@ -0,0 +1,31 @@ +/* Set given exception flags. Alpha version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fesetexcept (int excepts) +{ + unsigned long int tmp; + + tmp = __ieee_get_fp_control (); + tmp |= excepts & SWCR_STATUS_MASK; + __ieee_set_fp_control (tmp); + + return 0; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fesetmode.c b/REORG.TODO/sysdeps/alpha/fpu/fesetmode.c new file mode 100644 index 0000000000..23a7be687b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fesetmode.c @@ -0,0 +1,44 @@ +/* Install given floating-point control modes. Alpha version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fesetmode (const femode_t *modep) +{ + unsigned long int fpcr; + unsigned long int swcr; + femode_t mode; + + /* As in fesetenv. */ + if ((long int) modep >= 0) + mode = *modep; + else + mode = (unsigned long int) modep; + + __asm__ __volatile__ ("excb; mf_fpcr %0" : "=f" (fpcr)); + fpcr = (fpcr & ~FPCR_ROUND_MASK) | (mode & FPCR_ROUND_MASK); + __asm__ __volatile__ ("mt_fpcr %0" : : "f" (fpcr)); + + swcr = __ieee_get_fp_control (); + swcr = ((mode & SWCR_ALL_MASK & ~SWCR_STATUS_MASK) + | (swcr & SWCR_STATUS_MASK)); + __ieee_set_fp_control (swcr); + + return 0; +} diff --git a/REORG.TODO/sysdeps/alpha/fpu/fesetround.c b/REORG.TODO/sysdeps/alpha/fpu/fesetround.c new file mode 100644 index 0000000000..af2b695029 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fesetround.c @@ -0,0 +1,44 @@ +/* Set current rounding direction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetround (int round) +{ + unsigned long fpcr; + + if (round & ~3) + return 1; + + /* Get the current state. */ + __asm__ __volatile__("excb; mf_fpcr %0" : "=f"(fpcr)); + + /* Set the relevant bits. */ + fpcr = ((fpcr & ~FPCR_ROUND_MASK) + | ((unsigned long)round << FPCR_ROUND_SHIFT)); + + /* Put the new state in effect. */ + __asm__ __volatile__("mt_fpcr %0; excb" : : "f"(fpcr)); + + return 0; +} +libm_hidden_def (__fesetround) +weak_alias (__fesetround, fesetround) +libm_hidden_weak (fesetround) diff --git a/REORG.TODO/sysdeps/alpha/fpu/feupdateenv.c b/REORG.TODO/sysdeps/alpha/fpu/feupdateenv.c new file mode 100644 index 0000000000..d77b276b40 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/feupdateenv.c @@ -0,0 +1,50 @@ +/* Install given floating-point environment and raise exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__feupdateenv (const fenv_t *envp) +{ + unsigned long int tmp; + + /* Get the current exception state. */ + tmp = __ieee_get_fp_control (); + + /* Install new environment. */ + __fesetenv (envp); + + /* Raise the saved exception. Incidently for us the implementation + defined format of the values in objects of type fexcept_t is the + same as the ones specified using the FE_* constants. */ + __feraiseexcept (tmp & SWCR_STATUS_MASK); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feupdateenv, __old_feupdateenv) +compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1); +#endif + +libm_hidden_def (__feupdateenv) +libm_hidden_ver (__feupdateenv, feupdateenv) +versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/fgetexcptflg.c b/REORG.TODO/sysdeps/alpha/fpu/fgetexcptflg.c new file mode 100644 index 0000000000..c69b0a1ce5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fgetexcptflg.c @@ -0,0 +1,43 @@ +/* Store current representation for exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetexceptflag (fexcept_t *flagp, int excepts) +{ + unsigned long int tmp; + + /* Get the current state. */ + tmp = __ieee_get_fp_control(); + + /* Return that portion that corresponds to the requested exceptions. */ + *flagp = tmp & excepts & SWCR_STATUS_MASK; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetexceptflag, __old_fegetexceptflag) +compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/fpu_control.h b/REORG.TODO/sysdeps/alpha/fpu/fpu_control.h new file mode 100644 index 0000000000..fbb55e5461 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fpu_control.h @@ -0,0 +1,105 @@ +/* FPU control word bits. Alpha-mapped-to-Intel version. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Olaf Flebbe. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _ALPHA_FPU_CONTROL_H +#define _ALPHA_FPU_CONTROL_H + +/* + * Since many programs seem to hardcode the values passed to __setfpucw() + * (rather than using the manifest constants) we emulate the x87 interface + * here (at least where this makes sense). + * + * 15-13 12 11-10 9-8 7-6 5 4 3 2 1 0 + * | reserved | IC | RC | PC | reserved | PM | UM | OM | ZM | DM | IM + * + * IM: Invalid operation mask + * DM: Denormalized operand mask + * ZM: Zero-divide mask + * OM: Overflow mask + * UM: Underflow mask + * PM: Precision (inexact result) mask + * + * Mask bit is 1 means no interrupt. + * + * PC: Precision control + * 11 - round to extended precision + * 10 - round to double precision + * 00 - round to single precision + * + * RC: Rounding control + * 00 - rounding to nearest + * 01 - rounding down (toward - infinity) + * 10 - rounding up (toward + infinity) + * 11 - rounding toward zero + * + * IC: Infinity control + * That is for 8087 and 80287 only. + * + * The hardware default is 0x037f. I choose 0x1372. + */ + +#include <features.h> + +/* masking of interrupts */ +#define _FPU_MASK_IM 0x01 +#define _FPU_MASK_DM 0x02 +#define _FPU_MASK_ZM 0x04 +#define _FPU_MASK_OM 0x08 +#define _FPU_MASK_UM 0x10 +#define _FPU_MASK_PM 0x20 + +/* precision control -- without effect on Alpha */ +#define _FPU_EXTENDED 0x300 /* RECOMMENDED */ +#define _FPU_DOUBLE 0x200 +#define _FPU_SINGLE 0x0 /* DO NOT USE */ + +/* + * rounding control---notice that on the Alpha this affects only + * instructions with the dynamic rounding mode qualifier (/d). + */ +#define _FPU_RC_NEAREST 0x000 /* RECOMMENDED */ +#define _FPU_RC_DOWN 0x400 +#define _FPU_RC_UP 0x800 +#define _FPU_RC_ZERO 0xC00 + +#define _FPU_RESERVED 0xF0C0 /* Reserved bits in cw */ + + +/* Now two recommended cw */ + +/* Linux default: + - extended precision + - rounding to positive infinity. There is no /p instruction + qualifier. By setting the dynamic rounding mode to +infinity, + one can use /d to get round to +infinity with no extra overhead + (so long as the default isn't changed, of course...) + - no exceptions enabled. */ + +#define _FPU_DEFAULT 0x137f + +/* IEEE: same as above. */ +#define _FPU_IEEE 0x137f + +/* Type of the control word. */ +typedef unsigned int fpu_control_t; + +/* Default control word set at startup. */ +extern fpu_control_t __fpu_control; + +#endif /* _ALPHA_FPU_CONTROL */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/fsetexcptflg.c b/REORG.TODO/sysdeps/alpha/fpu/fsetexcptflg.c new file mode 100644 index 0000000000..f39f6125c7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/fsetexcptflg.c @@ -0,0 +1,46 @@ +/* Set floating-point environment exception handling. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetexceptflag (const fexcept_t *flagp, int excepts) +{ + unsigned long int tmp; + + /* Get the current exception state. */ + tmp = __ieee_get_fp_control (); + + /* Set all the bits that were called for. */ + tmp = (tmp & ~SWCR_STATUS_MASK) | (*flagp & excepts & SWCR_STATUS_MASK); + + /* And store it back. */ + __ieee_set_fp_control (tmp); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetexceptflag, __old_fesetexceptflag) +compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/alpha/fpu/ftestexcept.c b/REORG.TODO/sysdeps/alpha/fpu/ftestexcept.c new file mode 100644 index 0000000000..8a0cf74986 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/ftestexcept.c @@ -0,0 +1,32 @@ +/* Test exception in current environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fetestexcept (int excepts) +{ + unsigned long tmp; + + /* Get current exceptions. */ + tmp = __ieee_get_fp_control(); + + return tmp & excepts & SWCR_STATUS_MASK; +} +libm_hidden_def (fetestexcept) diff --git a/REORG.TODO/sysdeps/alpha/fpu/get-rounding-mode.h b/REORG.TODO/sysdeps/alpha/fpu/get-rounding-mode.h new file mode 100644 index 0000000000..866fb9926c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/get-rounding-mode.h @@ -0,0 +1,35 @@ +/* Determine floating-point rounding mode within libc. Alpha version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef ALPHA_GET_ROUNDING_MODE_H +#define ALPHA_GET_ROUNDING_MODE_H 1 + +#include <fenv.h> +#include <fenv_libc.h> + +/* Return the floating-point rounding mode. */ + +static inline int +get_rounding_mode (void) +{ + unsigned long fpcr; + __asm__ __volatile__("excb; mf_fpcr %0" : "=f"(fpcr)); + return (fpcr >> FPCR_ROUND_SHIFT) & 3; +} + +#endif /* get-rounding-mode.h */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps b/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps new file mode 100644 index 0000000000..6ec37f72ac --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps @@ -0,0 +1,2262 @@ +# Begin of automatic generation + +# Maximal error of functions: +Function: "acos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "acosh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "asin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "asinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "asinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "asinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "atan": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atan2_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "atan2_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "atanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "atanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cabs": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacos_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacos_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "cacos_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacos_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "cacos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cacos_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: Real part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacosh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "cacosh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacosh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "cacosh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacosh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "cacosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "carg": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "carg_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "carg_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "carg_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casin": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casin_downward": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casin_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casin_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casinh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casinh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casinh_downward": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casinh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casinh_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casinh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casinh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "catan_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catan_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catan_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_upward": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "catan_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "catanh": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "catanh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catanh_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catanh_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "catanh_upward": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_upward": +double: 5 +float: 1 +idouble: 5 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "ccos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccos_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccos_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "ccosh_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccosh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccosh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "cexp": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cexp_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cexp_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cexp_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "clog": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "clog10": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "clog10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog10_downward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog10_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog10_towardzero": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog10_upward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog10_upward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog_towardzero": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "cos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "cos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "cosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cosh_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 2 + +Function: "cosh_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 2 + +Function: "cosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 3 + +Function: Real part of "cpow": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "cpow": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cpow_downward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cpow_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow_towardzero": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cpow_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow_upward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cpow_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csin": +ildouble: 1 +ldouble: 1 + +Function: Real part of "csin_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csin_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csin_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csinh": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "csinh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csinh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csinh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csinh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csqrt_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctan": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctan_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "ctan_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Real part of "ctan_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "ctan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Real part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "ctanh": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctanh_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Real part of "ctanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctanh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "erf": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erfc": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "erfc_downward": +double: 5 +float: 6 +idouble: 5 +ifloat: 6 +ildouble: 5 +ldouble: 5 + +Function: "erfc_towardzero": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "erfc_upward": +double: 5 +float: 6 +idouble: 5 +ifloat: 6 +ildouble: 5 +ldouble: 5 + +Function: "exp": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp10": +double: 2 +idouble: 2 +ildouble: 2 +ldouble: 2 + +Function: "exp10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "exp10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "exp10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "exp2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_downward": +double: 1 +idouble: 1 + +Function: "exp_towardzero": +double: 1 +idouble: 1 + +Function: "exp_upward": +double: 1 +idouble: 1 + +Function: "expm1": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "expm1_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "expm1_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "gamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "gamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: "gamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "gamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "hypot": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "j0": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j0_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "j0_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j0_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "j1": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "j1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "j1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "j1_upward": +double: 3 +float: 5 +idouble: 3 +ifloat: 5 +ildouble: 3 +ldouble: 3 + +Function: "jn": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: "jn_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "jn_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "jn_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 7 +ldouble: 7 + +Function: "lgamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "lgamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: "lgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "lgamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "log": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log10_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log1p": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log1p_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log1p_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log1p_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log2": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log2_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "log2_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log2_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log_downward": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log_towardzero": +float: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "pow": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "pow10": +double: 2 +idouble: 2 +ildouble: 2 +ldouble: 2 + +Function: "pow10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "pow10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "pow10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "pow_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "pow_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "pow_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sin_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sincos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sincos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sincos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sincos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "sinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "sinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tan": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "tan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "tan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "tan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "tanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "tanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "tanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "tgamma": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "tgamma_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "y0": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "y0_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "y0_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "y0_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "y1": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "y1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_upward": +double: 7 +float: 2 +idouble: 7 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "yn": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "yn_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "yn_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "yn_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +# end of automatic generation diff --git a/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps-name b/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps-name new file mode 100644 index 0000000000..5219734094 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/libm-test-ulps-name @@ -0,0 +1 @@ +Alpha diff --git a/REORG.TODO/sysdeps/alpha/fpu/math_private.h b/REORG.TODO/sysdeps/alpha/fpu/math_private.h new file mode 100644 index 0000000000..1e97c867c3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/math_private.h @@ -0,0 +1,50 @@ +#ifndef ALPHA_MATH_PRIVATE_H +#define ALPHA_MATH_PRIVATE_H 1 + +/* In bits/mathinline.h we define __isnan et al. + In sysdeps/alpha/fpu/s_isnan.c we move the identifier out of the way + via macro hackery. In both cases, tell math/math_private.h that + we have a local copy of the function. */ + +#ifndef __isnan +# define __isnan __isnan +#endif +#ifndef __isnanf +# define __isnanf __isnanf +#endif + +/* Generic code forces values to memory; we don't need to do that. */ +#define math_opt_barrier(x) \ + ({ __typeof (x) __x = (x); __asm ("" : "+frm" (__x)); __x; }) +#define math_force_eval(x) \ + ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "frm" (__x)); }) + +#include_next <math_private.h> + +#ifdef __alpha_fix__ +extern __always_inline double +__ieee754_sqrt (double d) +{ + double ret; +# ifdef _IEEE_FP_INEXACT + asm ("sqrtt/suid %1,%0" : "=&f"(ret) : "f"(d)); +# else + asm ("sqrtt/sud %1,%0" : "=&f"(ret) : "f"(d)); +# endif + return ret; +} + +extern __always_inline float +__ieee754_sqrtf (float d) +{ + float ret; +# ifdef _IEEE_FP_INEXACT + asm ("sqrts/suid %1,%0" : "=&f"(ret) : "f"(d)); +# else + asm ("sqrts/sud %1,%0" : "=&f"(ret) : "f"(d)); +# endif + return ret; +} +#endif /* FIX */ + +#endif /* ALPHA_MATH_PRIVATE_H */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cacosf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cacosf.c new file mode 100644 index 0000000000..e06b06305f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cacosf.c @@ -0,0 +1,57 @@ +/* Return arc cosine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cacosf __cacosf_not_defined +#define cacosf cacosf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cacosf +#undef cacosf + +static _Complex float internal_cacosf (_Complex float x); + +#define M_DECL_FUNC(f) internal_cacosf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cacos_template.c> + +#include "cfloat-compat.h" + +#undef __cacosf + +c1_cfloat_rettype +__c1_cacosf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_cacosf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cacosf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_cacosf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (cacosf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cacoshf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cacoshf.c new file mode 100644 index 0000000000..d67cffb59e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cacoshf.c @@ -0,0 +1,56 @@ +/* Return arc hyperbole cosine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cacoshf __cacoshf_not_defined +#define cacoshf cacoshf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cacoshf +#undef cacoshf + +static _Complex float internal_cacoshf (_Complex float x); + +#define M_DECL_FUNC(f) internal_cacoshf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cacosh_template.c> +#include "cfloat-compat.h" + +#undef __cacoshf + +c1_cfloat_rettype +__c1_cacoshf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_cacoshf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cacoshf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_cacoshf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (cacoshf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_casinf.c b/REORG.TODO/sysdeps/alpha/fpu/s_casinf.c new file mode 100644 index 0000000000..1baa1d4669 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_casinf.c @@ -0,0 +1,54 @@ +/* Return arc sine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __casinf __casinf_not_defined +#define casinf casinf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __casinf +#undef casinf + +static _Complex float internal_casinf (_Complex float x); + +#define M_DECL_FUNC(f) internal_casinf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_casin_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_casinf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_casinf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_casinf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_casinf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (casinf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_casinhf.c b/REORG.TODO/sysdeps/alpha/fpu/s_casinhf.c new file mode 100644 index 0000000000..4cb3a2fe33 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_casinhf.c @@ -0,0 +1,54 @@ +/* Return arc hyperbole sine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __casinhf __casinhf_not_defined +#define casinhf casinhf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __casinhf +#undef casinhf + +static _Complex float internal_casinhf (_Complex float x); + +#define M_DECL_FUNC(f) internal_casinhf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_casinh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_casinhf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_casinhf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_casinhf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_casinhf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (casinhf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_catanf.c b/REORG.TODO/sysdeps/alpha/fpu/s_catanf.c new file mode 100644 index 0000000000..6d928e077d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_catanf.c @@ -0,0 +1,54 @@ +/* Return arc tangent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __catanf __catanf_not_defined +#define catanf catanf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __catanf +#undef catanf + +static _Complex float internal_catanf (_Complex float x); + +#define M_DECL_FUNC(f) internal_catanf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_catan_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_catanf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_catanf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_catanf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_catanf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (catanf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_catanhf.c b/REORG.TODO/sysdeps/alpha/fpu/s_catanhf.c new file mode 100644 index 0000000000..d8942a057e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_catanhf.c @@ -0,0 +1,54 @@ +/* Return arc hyperbole tangent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __catanhf __catanhf_not_defined +#define catanhf catanhf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __catanhf +#undef catanhf + +static _Complex float internal_catanhf (_Complex float x); + +#define M_DECL_FUNC(f) internal_catanhf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_catanh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_catanhf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_catanhf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_catanhf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_catanhf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (catanhf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ccosf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ccosf.c new file mode 100644 index 0000000000..abc7f10766 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ccosf.c @@ -0,0 +1,54 @@ +/* Return cosine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ccosf __ccosf_not_defined +#define ccosf ccosf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __ccosf +#undef ccosf + +static _Complex float internal_ccosf (_Complex float x); + +#define M_DECL_FUNC(f) internal_ccosf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_ccos_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_ccosf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_ccosf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_ccosf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_ccosf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (ccosf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ccoshf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ccoshf.c new file mode 100644 index 0000000000..65deabd9b2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ccoshf.c @@ -0,0 +1,54 @@ +/* Return hyperbole cosine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ccoshf __ccoshf_not_defined +#define ccoshf ccoshf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __ccoshf +#undef ccoshf + +static _Complex float internal_ccoshf (_Complex float x); + +#define M_DECL_FUNC(f) internal_ccoshf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_ccosh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_ccoshf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_ccoshf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_ccoshf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_ccoshf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (ccoshf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ceil.c b/REORG.TODO/sysdeps/alpha/fpu/s_ceil.c new file mode 100644 index 0000000000..029ee09315 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ceil.c @@ -0,0 +1,56 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +/* Use the -inf rounding mode conversion instructions to implement + ceil, via something akin to -floor(-x). This is much faster than + playing with the fpcr to achieve +inf rounding mode. */ + +double +__ceil (double x) +{ + if (isnan (x)) + return x + x; + + if (isless (fabs (x), 9007199254740992.0)) /* 1 << DBL_MANT_DIG */ + { + double tmp1, new_x; + + new_x = -x; + __asm ( + "cvttq/svm %2,%1\n\t" + "cvtqt/m %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1) + : "f"(new_x)); + + /* Fix up the negation we did above, as well as handling -0 properly. */ + x = copysign(new_x, x); + } + return x; +} + +weak_alias (__ceil, ceil) +#ifdef NO_LONG_DOUBLE +strong_alias (__ceil, __ceill) +weak_alias (__ceil, ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ceilf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ceilf.c new file mode 100644 index 0000000000..8c76c65d7f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ceilf.c @@ -0,0 +1,53 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +/* Use the -inf rounding mode conversion instructions to implement + ceil, via something akin to -floor(-x). This is much faster than + playing with the fpcr to achieve +inf rounding mode. */ + +float +__ceilf (float x) +{ + if (isnanf (x)) + return x + x; + + if (isless (fabsf (x), 16777216.0f)) /* 1 << FLT_MANT_DIG */ + { + /* Note that Alpha S_Floating is stored in registers in a + restricted T_Floating format, so we don't even need to + convert back to S_Floating in the end. The initial + conversion to T_Floating is needed to handle denormals. */ + + float tmp1, tmp2, new_x; + + new_x = -x; + __asm ("cvtst/s %3,%2\n\t" + "cvttq/svm %2,%1\n\t" + "cvtqt/m %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2) + : "f"(new_x)); + + /* Fix up the negation we did above, as well as handling -0 properly. */ + x = copysignf(new_x, x); + } + return x; +} + +weak_alias (__ceilf, ceilf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cexpf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cexpf.c new file mode 100644 index 0000000000..64daf689d2 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cexpf.c @@ -0,0 +1,54 @@ +/* Return exponent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cexpf __cexpf_not_defined +#define cexpf cexpf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cexpf +#undef cexpf + +static _Complex float internal_cexpf (_Complex float x); + +#define M_DECL_FUNC(f) internal_cexpf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cexp_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_cexpf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_cexpf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cexpf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_cexpf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (cexpf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_clog10f.c b/REORG.TODO/sysdeps/alpha/fpu/s_clog10f.c new file mode 100644 index 0000000000..0646a09ca5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_clog10f.c @@ -0,0 +1,64 @@ +/* Return base 10 logarithm of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __clog10f __clog10f_not_defined +#define clog10f clog10f_not_defined + +#include <complex.h> +#include <math.h> + +#undef __clog10f +#undef clog10f + +static _Complex float internal_clog10f (_Complex float x); + +#define M_DECL_FUNC(f) internal_clog10f +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_clog10_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_clog10f (c1_cfloat_decl (x)) +{ + _Complex float r = internal_clog10f (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_clog10f (c2_cfloat_decl (x)) +{ + _Complex float r = internal_clog10f (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +/* Ug. __clog10f was exported from GLIBC_2.1. This is the only + complex function whose double-underscore symbol was exported, + so we get to handle that specially. */ +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_3_4) +strong_alias (__c1_clog10f, __c1_clog10f_2); +compat_symbol (libm, __c1_clog10f, clog10f, GLIBC_2_1); +compat_symbol (libm, __c1_clog10f_2, __clog10f, GLIBC_2_1); +#endif +versioned_symbol (libm, __c2_clog10f, clog10f, GLIBC_2_3_4); +extern typeof(__c2_clog10f) __clog10f attribute_hidden; +strong_alias (__c2_clog10f, __clog10f) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_clogf.c b/REORG.TODO/sysdeps/alpha/fpu/s_clogf.c new file mode 100644 index 0000000000..1a7e234aa9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_clogf.c @@ -0,0 +1,54 @@ +/* Return natural logarithm of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __clogf __clogf_not_defined +#define clogf clogf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __clogf +#undef clogf + +static _Complex float internal_clogf (_Complex float x); + +#define M_DECL_FUNC(f) internal_clogf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_clog_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_clogf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_clogf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_clogf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_clogf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (clogf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_copysign.c b/REORG.TODO/sysdeps/alpha/fpu/s_copysign.c new file mode 100644 index 0000000000..3bd3dd48df --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_copysign.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +double +__copysign (double x, double y) +{ + return __builtin_copysign (x, y); +} + +weak_alias (__copysign, copysign) +#ifdef NO_LONG_DOUBLE +strong_alias (__copysign, __copysignl) +weak_alias (__copysign, copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_copysignf.c b/REORG.TODO/sysdeps/alpha/fpu/s_copysignf.c new file mode 100644 index 0000000000..90b20124a6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_copysignf.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +float +__copysignf (float x, float y) +{ + return __builtin_copysignf (x, y); +} + +weak_alias (__copysignf, copysignf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cpowf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cpowf.c new file mode 100644 index 0000000000..dacf0e12e0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cpowf.c @@ -0,0 +1,54 @@ +/* Return power of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cpowf __cpowf_not_defined +#define cpowf cpowf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cpowf +#undef cpowf + +static _Complex float internal_cpowf (_Complex float x, _Complex float c); + +#define M_DECL_FUNC(f) internal_cpowf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cpow_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_cpowf (c1_cfloat_decl (x), c1_cfloat_decl (c)) +{ + _Complex float r = internal_cpowf (c1_cfloat_value (x), c1_cfloat_value (c)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cpowf (c2_cfloat_decl (x), c2_cfloat_decl (c)) +{ + _Complex float r = internal_cpowf (c2_cfloat_value (x), c2_cfloat_value (c)); + return c2_cfloat_return (r); +} + +cfloat_versions (cpowf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_cprojf.c b/REORG.TODO/sysdeps/alpha/fpu/s_cprojf.c new file mode 100644 index 0000000000..316cc1f551 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_cprojf.c @@ -0,0 +1,54 @@ +/* Return projection of complex float value to Riemann sphere. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __cprojf __cprojf_not_defined +#define cprojf cprojf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __cprojf +#undef cprojf + +static _Complex float internal_cprojf (_Complex float x); + +#define M_DECL_FUNC(f) internal_cprojf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_cproj_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_cprojf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_cprojf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_cprojf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_cprojf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (cprojf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_csinf.c b/REORG.TODO/sysdeps/alpha/fpu/s_csinf.c new file mode 100644 index 0000000000..f884d29fc9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_csinf.c @@ -0,0 +1,54 @@ +/* Return sine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __csinf __csinf_not_defined +#define csinf csinf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __csinf +#undef csinf + +static _Complex float internal_csinf (_Complex float x); + +#define M_DECL_FUNC(f) internal_csinf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_csin_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_csinf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_csinf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_csinf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_csinf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (csinf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_csinhf.c b/REORG.TODO/sysdeps/alpha/fpu/s_csinhf.c new file mode 100644 index 0000000000..071ff1227b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_csinhf.c @@ -0,0 +1,54 @@ +/* Return hyperbole sine of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __csinhf __csinhf_not_defined +#define csinhf csinhf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __csinhf +#undef csinhf + +static _Complex float internal_csinhf (_Complex float x); + +#define M_DECL_FUNC(f) internal_csinhf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_csinh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_csinhf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_csinhf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_csinhf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_csinhf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (csinhf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_csqrtf.c b/REORG.TODO/sysdeps/alpha/fpu/s_csqrtf.c new file mode 100644 index 0000000000..0611f09465 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_csqrtf.c @@ -0,0 +1,54 @@ +/* Return square root of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __csqrtf __csinhf_not_defined +#define csqrtf csqrtf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __csqrtf +#undef csqrtf + +static _Complex float internal_csqrtf (_Complex float x); + +#define M_DECL_FUNC(f) internal_csqrtf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_csqrt_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_csqrtf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_csqrtf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_csqrtf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_csqrtf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (csqrtf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ctanf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ctanf.c new file mode 100644 index 0000000000..7288db23c0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ctanf.c @@ -0,0 +1,54 @@ +/* Return tangent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ctanf __ctanf_not_defined +#define ctanf ctanf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __ctanf +#undef ctanf + +static _Complex float internal_ctanf (_Complex float x); + +#define M_DECL_FUNC(f) internal_ctanf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_ctan_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_ctanf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_ctanf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_ctanf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_ctanf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (ctanf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_ctanhf.c b/REORG.TODO/sysdeps/alpha/fpu/s_ctanhf.c new file mode 100644 index 0000000000..fc0a5f6837 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_ctanhf.c @@ -0,0 +1,54 @@ +/* Return hyperbole tangent of complex float value. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ctanhf __ctanhf_not_defined +#define ctanhf ctanhf_not_defined + +#include <complex.h> +#include <math.h> + +#undef __ctanhf +#undef ctanhf + +static _Complex float internal_ctanhf (_Complex float x); + +#define M_DECL_FUNC(f) internal_ctanhf +#include <math-type-macros-float.h> + +/* Disable any aliasing from base template. */ +#undef declare_mgen_alias +#define declare_mgen_alias(__to, __from) + +#include <math/s_ctanh_template.c> +#include "cfloat-compat.h" + +c1_cfloat_rettype +__c1_ctanhf (c1_cfloat_decl (x)) +{ + _Complex float r = internal_ctanhf (c1_cfloat_value (x)); + return c1_cfloat_return (r); +} + +c2_cfloat_rettype +__c2_ctanhf (c2_cfloat_decl (x)) +{ + _Complex float r = internal_ctanhf (c2_cfloat_value (x)); + return c2_cfloat_return (r); +} + +cfloat_versions (ctanhf); diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fabs.c b/REORG.TODO/sysdeps/alpha/fpu/s_fabs.c new file mode 100644 index 0000000000..abcc6e7c75 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fabs.c @@ -0,0 +1,35 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +double +__fabs (double x) +{ + return __builtin_fabs (x); +} + +weak_alias (__fabs, fabs) +#ifdef NO_LONG_DOUBLE +strong_alias (__fabs, __fabsl) +weak_alias (__fabs, fabsl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __fabs, fabsl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fabsf.c b/REORG.TODO/sysdeps/alpha/fpu/s_fabsf.c new file mode 100644 index 0000000000..5b1105cb00 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fabsf.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +float +__fabsf (float x) +{ + return __builtin_fabsf (x); +} + +weak_alias (__fabsf, fabsf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_floor.c b/REORG.TODO/sysdeps/alpha/fpu/s_floor.c new file mode 100644 index 0000000000..49a0c760a9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_floor.c @@ -0,0 +1,57 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + + +/* Use the -inf rounding mode conversion instructions to implement + floor. We note when the exponent is large enough that the value + must be integral, as this avoids unpleasant integer overflows. */ + +double +__floor (double x) +{ + if (isnan (x)) + return x + x; + + if (isless (fabs (x), 9007199254740992.0)) /* 1 << DBL_MANT_DIG */ + { + double tmp1, new_x; + + __asm ( + "cvttq/svm %2,%1\n\t" + "cvtqt/m %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1) + : "f"(x)); + + /* floor(-0) == -0, and in general we'll always have the same + sign as our input. */ + x = copysign(new_x, x); + } + return x; +} + +weak_alias (__floor, floor) +#ifdef NO_LONG_DOUBLE +strong_alias (__floor, __floorl) +weak_alias (__floor, floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_floorf.c b/REORG.TODO/sysdeps/alpha/fpu/s_floorf.c new file mode 100644 index 0000000000..79cae27720 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_floorf.c @@ -0,0 +1,54 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + + +/* Use the -inf rounding mode conversion instructions to implement + floor. We note when the exponent is large enough that the value + must be integral, as this avoids unpleasant integer overflows. */ + +float +__floorf (float x) +{ + if (isnanf (x)) + return x + x; + + if (isless (fabsf (x), 16777216.0f)) /* 1 << FLT_MANT_DIG */ + { + /* Note that Alpha S_Floating is stored in registers in a + restricted T_Floating format, so we don't even need to + convert back to S_Floating in the end. The initial + conversion to T_Floating is needed to handle denormals. */ + + float tmp1, tmp2, new_x; + + __asm ("cvtst/s %3,%2\n\t" + "cvttq/svm %2,%1\n\t" + "cvtqt/m %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2) + : "f"(x)); + + /* floor(-0) == -0, and in general we'll always have the same + sign as our input. */ + x = copysignf(new_x, x); + } + return x; +} + +weak_alias (__floorf, floorf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fmax.S b/REORG.TODO/sysdeps/alpha/fpu/s_fmax.S new file mode 100644 index 0000000000..8a65ae4963 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fmax.S @@ -0,0 +1,57 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .set noat + .set noreorder + + .text +ENTRY (__fmax) + .prologue 0 + + cmptun/su $f16, $f16, $f10 + cmptun/su $f17, $f17, $f11 + fmov $f17, $f0 + unop + + trapb + fbne $f10, $ret + fmov $f16, $f0 + fbne $f11, $ret + + cmptlt/su $f16, $f17, $f11 + trapb + fcmovne $f11, $f17, $f0 +$ret: ret + +END (__fmax) + +/* Given the in-register format of single-precision, this works there too. */ +strong_alias (__fmax, __fmaxf) +weak_alias (__fmaxf, fmaxf) + +weak_alias (__fmax, fmax) +#ifdef NO_LONG_DOUBLE +strong_alias (__fmax, __fmaxl) +weak_alias (__fmaxl, fmaxl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fmax, fmaxl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fmaxf.S b/REORG.TODO/sysdeps/alpha/fpu/s_fmaxf.S new file mode 100644 index 0000000000..3c2d62bb81 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fmaxf.S @@ -0,0 +1 @@ +/* __fmaxf is in s_fmax.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fmin.S b/REORG.TODO/sysdeps/alpha/fpu/s_fmin.S new file mode 100644 index 0000000000..926bd32ec4 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fmin.S @@ -0,0 +1,57 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .set noat + .set noreorder + + .text +ENTRY (__fmin) + .prologue 0 + + cmptun/su $f16, $f16, $f10 + cmptun/su $f17, $f17, $f11 + fmov $f17, $f0 + unop + + trapb + fbne $f10, $ret + fmov $f16, $f0 + fbne $f11, $ret + + cmptlt/su $f17, $f16, $f11 + trapb + fcmovne $f11, $f17, $f0 +$ret: ret + +END (__fmin) + +/* Given the in-register format of single-precision, this works there too. */ +strong_alias (__fmin, __fminf) +weak_alias (__fminf, fminf) + +weak_alias (__fmin, fmin) +#ifdef NO_LONG_DOUBLE +strong_alias (__fmin, __fminl) +weak_alias (__fminl, fminl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fmin, fminl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_fminf.S b/REORG.TODO/sysdeps/alpha/fpu/s_fminf.S new file mode 100644 index 0000000000..10ab7fe53c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_fminf.S @@ -0,0 +1 @@ +/* __fminf is in s_fmin.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_isnan.c b/REORG.TODO/sysdeps/alpha/fpu/s_isnan.c new file mode 100644 index 0000000000..b56fdbe2c5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_isnan.c @@ -0,0 +1,58 @@ +/* Return 1 if argument is a NaN, else 0. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Ugly kludge to avoid declarations. */ +#define __isnanf not___isnanf +#define isnanf not_isnanf +#define __GI___isnanf not__GI___isnanf + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef __isnanf +#undef isnanf +#undef __GI___isnanf + +int +__isnan (double x) +{ + uint64_t ix; + EXTRACT_WORDS64 (ix, x); + return ix * 2 > 0xffe0000000000000ul; +} + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +weak_alias (__isnan, isnanf) + +/* ??? GCC 4.8 fails to look through chains of aliases with asm names + attached. Work around this for now. */ +hidden_ver (__isnan, __isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif +#if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_isnanf.c b/REORG.TODO/sysdeps/alpha/fpu/s_isnanf.c new file mode 100644 index 0000000000..af41e43850 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_isnanf.c @@ -0,0 +1 @@ +/* In s_isnan.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_llrint.c b/REORG.TODO/sysdeps/alpha/fpu/s_llrint.c new file mode 100644 index 0000000000..5db97be037 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_llrint.c @@ -0,0 +1 @@ +/* In s_lrint.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_llrintf.c b/REORG.TODO/sysdeps/alpha/fpu/s_llrintf.c new file mode 100644 index 0000000000..18f2885ef7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_llrintf.c @@ -0,0 +1 @@ +/* In s_lrintf.c */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_llround.c b/REORG.TODO/sysdeps/alpha/fpu/s_llround.c new file mode 100644 index 0000000000..b212fbd8e5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_llround.c @@ -0,0 +1 @@ +/* In s_lround.c. */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_llroundf.c b/REORG.TODO/sysdeps/alpha/fpu/s_llroundf.c new file mode 100644 index 0000000000..73bdf3103f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_llroundf.c @@ -0,0 +1 @@ +/* In s_lroundf.c. */ diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_lrint.c b/REORG.TODO/sysdeps/alpha/fpu/s_lrint.c new file mode 100644 index 0000000000..2a644c57df --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_lrint.c @@ -0,0 +1,47 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __llrint not___llrint +#define llrint not_llrint +#include <math.h> +#include <math_ldbl_opt.h> +#undef __llrint +#undef llrint + +long int +__lrint (double x) +{ + long ret; + + __asm ("cvttq/svd %1,%0" : "=&f"(ret) : "f"(x)); + + return ret; +} + +strong_alias (__lrint, __llrint) +weak_alias (__lrint, lrint) +weak_alias (__llrint, llrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__lrint, __lrintl) +strong_alias (__lrint, __llrintl) +weak_alias (__lrintl, lrintl) +weak_alias (__llrintl, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1); +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_lrintf.c b/REORG.TODO/sysdeps/alpha/fpu/s_lrintf.c new file mode 100644 index 0000000000..cfcf35caae --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_lrintf.c @@ -0,0 +1,38 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __llrintf not___llrintf +#define llrintf not_llrintf +#include <math.h> +#undef __llrintf +#undef llrintf + +long int +__lrintf (float x) +{ + double tmp; + long ret; + + __asm ("cvtst/s %2,%1\n\tcvttq/svd %1,%0" + : "=&f"(ret), "=&f"(tmp) : "f"(x)); + + return ret; +} + +strong_alias (__lrintf, __llrintf) +weak_alias (__lrintf, lrintf) +weak_alias (__llrintf, llrintf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_lround.c b/REORG.TODO/sysdeps/alpha/fpu/s_lround.c new file mode 100644 index 0000000000..78a067daf1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_lround.c @@ -0,0 +1,47 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __llround not___llround +#define llround not_llround +#include <math.h> +#include <math_ldbl_opt.h> +#undef __llround +#undef llround + +long int +__lround (double x) +{ + double adj, y; + + adj = copysign (0.5, x); + asm("addt/suc %1,%2,%0" : "=&f"(y) : "f"(x), "f"(adj)); + return y; +} + +strong_alias (__lround, __llround) +weak_alias (__lround, lround) +weak_alias (__llround, llround) +#ifdef NO_LONG_DOUBLE +strong_alias (__lround, __lroundl) +strong_alias (__lround, __llroundl) +weak_alias (__lroundl, lroundl) +weak_alias (__llroundl, llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1); +compat_symbol (libm, __llround, llroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_lroundf.c b/REORG.TODO/sysdeps/alpha/fpu/s_lroundf.c new file mode 100644 index 0000000000..37df944224 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_lroundf.c @@ -0,0 +1,37 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __llroundf not___llroundf +#define llroundf not_llroundf +#include <math.h> +#undef __llroundf +#undef llroundf + + +long int +__lroundf (float x) +{ + float adj, y; + + adj = copysignf (0.5f, x); + asm("adds/suc %1,%2,%0" : "=&f"(y) : "f"(x), "f"(adj)); + return y; +} + +strong_alias (__lroundf, __llroundf) +weak_alias (__lroundf, lroundf) +weak_alias (__llroundf, llroundf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_nearbyint.c b/REORG.TODO/sysdeps/alpha/fpu/s_nearbyint.c new file mode 100644 index 0000000000..c3f204fff1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_nearbyint.c @@ -0,0 +1,25 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math_ldbl_opt.h> + +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c> + +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_rint.c b/REORG.TODO/sysdeps/alpha/fpu/s_rint.c new file mode 100644 index 0000000000..fca35cf961 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_rint.c @@ -0,0 +1,51 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + + +double +__rint (double x) +{ + if (isnan (x)) + return x + x; + + if (isless (fabs (x), 9007199254740992.0)) /* 1 << DBL_MANT_DIG */ + { + double tmp1, new_x; + __asm ("cvttq/svid %2,%1\n\t" + "cvtqt/d %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1) + : "f"(x)); + + /* rint(-0.1) == -0, and in general we'll always have the same + sign as our input. */ + x = copysign(new_x, x); + } + return x; +} + +weak_alias (__rint, rint) +#ifdef NO_LONG_DOUBLE +strong_alias (__rint, __rintl) +weak_alias (__rint, rintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __rint, rintl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_rintf.c b/REORG.TODO/sysdeps/alpha/fpu/s_rintf.c new file mode 100644 index 0000000000..b6e8d2dd07 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_rintf.c @@ -0,0 +1,50 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + + +float +__rintf (float x) +{ + if (isnanf (x)) + return x + x; + + if (isless (fabsf (x), 16777216.0f)) /* 1 << FLT_MANT_DIG */ + { + /* Note that Alpha S_Floating is stored in registers in a + restricted T_Floating format, so we don't even need to + convert back to S_Floating in the end. The initial + conversion to T_Floating is needed to handle denormals. */ + + float tmp1, tmp2, new_x; + + __asm ("cvtst/s %3,%2\n\t" + "cvttq/svid %2,%1\n\t" + "cvtqt/d %1,%0\n\t" + : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2) + : "f"(x)); + + /* rint(-0.1) == -0, and in general we'll always have the same + sign as our input. */ + x = copysignf(new_x, x); + } + return x; +} + +weak_alias (__rintf, rintf) diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_trunc.c b/REORG.TODO/sysdeps/alpha/fpu/s_trunc.c new file mode 100644 index 0000000000..68a013d222 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_trunc.c @@ -0,0 +1,51 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + + +/* Use the chopped rounding mode conversion instructions to implement trunc. */ + +double +__trunc (double x) +{ + double two52 = copysign (0x1.0p52, x); + double r, tmp; + + if (isgreaterequal (fabs (x), 0x1.0p52)) + return x; + + __asm ( + "addt/suc %2, %3, %1\n\tsubt/suc %1, %3, %0" + : "=&f"(r), "=&f"(tmp) + : "f"(x), "f"(two52)); + + /* trunc(-0) == -0, and in general we'll always have the same + sign as our input. */ + return copysign (r, x); +} + +weak_alias (__trunc, trunc) +#ifdef NO_LONG_DOUBLE +strong_alias (__trunc, __truncl) +weak_alias (__trunc, truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/alpha/fpu/s_truncf.c b/REORG.TODO/sysdeps/alpha/fpu/s_truncf.c new file mode 100644 index 0000000000..ca47fdc2b5 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/fpu/s_truncf.c @@ -0,0 +1,43 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + + +/* Use the chopped rounding mode conversion instructions to implement trunc. */ + +float +__truncf (float x) +{ + float two23 = copysignf (0x1.0p23, x); + float r, tmp; + + if (isgreaterequal (fabsf (x), 0x1.0p23)) + return x; + + __asm ( + "adds/suc %2, %3, %1\n\tsubs/suc %1, %3, %0" + : "=&f"(r), "=&f"(tmp) + : "f"(x), "f"(two23)); + + /* trunc(-0) == -0, and in general we'll always have the same + sign as our input. */ + return copysignf (r, x); +} + +weak_alias (__truncf, truncf) diff --git a/REORG.TODO/sysdeps/alpha/gccframe.h b/REORG.TODO/sysdeps/alpha/gccframe.h new file mode 100644 index 0000000000..769cf2da65 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/gccframe.h @@ -0,0 +1,21 @@ +/* Definition of object in frame unwind info. alpha version. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define FIRST_PSEUDO_REGISTER 64 + +#include <sysdeps/generic/gccframe.h> diff --git a/REORG.TODO/sysdeps/alpha/hp-timing.h b/REORG.TODO/sysdeps/alpha/hp-timing.h new file mode 100644 index 0000000000..84380f34a4 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/hp-timing.h @@ -0,0 +1,46 @@ +/* High precision, low overhead timing functions. Alpha version. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +/* We always have the timestamp register, but it's got only a 4 second + range. Use it for ld.so profiling only. */ +#define HP_TIMING_AVAIL (0) +#define HP_SMALL_TIMING_AVAIL (1) + +/* We indeed have inlined functions. */ +#define HP_TIMING_INLINE (1) + +/* We use 32 bit values for the times. */ +typedef unsigned int hp_timing_t; + +/* The "rpcc" instruction returns a 32-bit counting half and a 32-bit + "virtual cycle counter displacement". Subtracting the two gives us + a virtual cycle count. */ +#define HP_TIMING_NOW(VAR) \ + do { \ + unsigned long int x_; \ + asm volatile ("rpcc %0" : "=r"(x_)); \ + (VAR) = (int) (x_) - (int) (x_ >> 32); \ + } while (0) + +#include <hp-timing-common.h> + +#endif /* hp-timing.h */ diff --git a/REORG.TODO/sysdeps/alpha/htonl.S b/REORG.TODO/sysdeps/alpha/htonl.S new file mode 100644 index 0000000000..17b25c3424 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/htonl.S @@ -0,0 +1,43 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(htonl) +#ifdef PROF + ldgp gp, 0(pv) + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at + .prologue 1 +#else + .prologue 0 +#endif + + inslh a0, 7, t0 # t0 = 0000000000AABBCC + inswl a0, 3, t1 # t1 = 000000CCDD000000 + or t1, t0, t1 # t1 = 000000CCDDAABBCC + srl t1, 16, t2 # t2 = 0000000000CCDDAA + zapnot t1, 0x0A, t0 # t0 = 00000000DD00BB00 + zapnot t2, 0x05, t3 # t3 = 0000000000CC00AA + addl t0, t3, v0 # v0 = ssssssssDDCCBBAA + ret + + END(htonl) + +weak_alias (htonl, ntohl) diff --git a/REORG.TODO/sysdeps/alpha/htons.S b/REORG.TODO/sysdeps/alpha/htons.S new file mode 100644 index 0000000000..e61322d003 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/htons.S @@ -0,0 +1,39 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(htons) +#ifdef PROF + ldgp gp, 0(pv) + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at + .prologue 1 +#else + .prologue 0 +#endif + + extwh a0, 7, t1 # t1 = bb00 + extbl a0, 1, v0 # v0 = 00aa + bis v0, t1, v0 # v0 = bbaa + ret + + END(htons) + +weak_alias (htons, ntohs) diff --git a/REORG.TODO/sysdeps/alpha/jmpbuf-offsets.h b/REORG.TODO/sysdeps/alpha/jmpbuf-offsets.h new file mode 100644 index 0000000000..a9e752999f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/jmpbuf-offsets.h @@ -0,0 +1,35 @@ +/* Private macros for accessing __jmp_buf contents. Alpha version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define JB_S0 0 +#define JB_S1 1 +#define JB_S2 2 +#define JB_S3 3 +#define JB_S4 4 +#define JB_S5 5 +#define JB_PC 6 +#define JB_FP 7 +#define JB_SP 8 +#define JB_F2 9 +#define JB_F3 10 +#define JB_F4 11 +#define JB_F5 12 +#define JB_F6 13 +#define JB_F7 14 +#define JB_F8 15 +#define JB_F9 16 diff --git a/REORG.TODO/sysdeps/alpha/jmpbuf-unwind.h b/REORG.TODO/sysdeps/alpha/jmpbuf-unwind.h new file mode 100644 index 0000000000..b90c81c481 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/jmpbuf-unwind.h @@ -0,0 +1,47 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <setjmp.h> +#include <jmpbuf-offsets.h> +#include <stdint.h> +#include <unwind.h> +#include <sysdep.h> + +/* Test if longjmp to JMPBUF would unwind the frame containing a local + variable at ADDRESS. */ +#define _JMPBUF_UNWINDS(_jmpbuf, _address, _demangle) \ + ((void *)(_address) < (void *) _demangle ((_jmpbuf)[JB_SP])) + +#define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \ + _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) _Unwind_GetCFA (_context), _adj) + +static inline uintptr_t __attribute__ ((unused)) +_jmpbuf_sp (__jmp_buf regs) +{ + uintptr_t sp = regs[JB_SP]; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (sp); +#endif + return sp; +} + +#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \ + ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj)) + +/* We use the normal longjmp for unwinding. */ +#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val) diff --git a/REORG.TODO/sysdeps/alpha/ldiv.S b/REORG.TODO/sysdeps/alpha/ldiv.S new file mode 100644 index 0000000000..a32264b143 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/ldiv.S @@ -0,0 +1,218 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + +#undef FRAME +#ifdef __alpha_fix__ +#define FRAME 0 +#else +#define FRAME 16 +#endif + +#undef X +#undef Y +#define X $17 +#define Y $18 + + .set noat + + .align 4 + .globl ldiv + .ent ldiv +ldiv: + .frame sp, FRAME, ra +#if FRAME > 0 + lda sp, -FRAME(sp) +#endif +#ifdef PROF + .set macro + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .set nomacro + .prologue 1 +#else + .prologue 0 +#endif + + beq Y, $divbyzero + excb + mf_fpcr $f10 + + _ITOFT2 X, $f0, 0, Y, $f1, 8 + + .align 4 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + unop + + /* Check to see if X fit in the double as an exact value. */ + sll X, (64-53), AT + sra AT, (64-53), AT + cmpeq X, AT, AT + beq AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert and clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f10 + _FTOIT $f0, $0, 0 + +$egress: + mulq $0, Y, $1 + subq X, $1, $1 + + stq $0, 0($16) + stq $1, 8($16) + mov $16, $0 + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret + + .align 4 +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + +#define Q v0 /* quotient */ +#define R t0 /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + /* The fixup code below can only handle unsigned values. */ + or X, Y, AT + mov $31, t5 + blt AT, $fix_sign_in +$fix_sign_in_ret1: + cvttq/c $f0, $f0 + + _FTOIT $f0, Q, 8 +$fix_sign_in_ret2: + mulq Q, Y, QY + excb + mt_fpcr $f10 + + .align 4 + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + negq Q, t4 + cmovlbs t5, t4, Q + br $egress + + .align 4 + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 +$fix_sign_in: + /* If we got here, then X|Y is negative. Need to adjust everything + such that we're doing unsigned division in the fixup loop. */ + /* T5 is true if result should be negative. */ + xor X, Y, AT + cmplt AT, 0, t5 + cmplt X, 0, AT + negq X, t0 + + cmovne AT, t0, X + cmplt Y, 0, AT + negq Y, t0 + + cmovne AT, t0, Y + blbc t5, $fix_sign_in_ret1 + + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + .align 3 + negq Q, Q + br $fix_sign_in_ret2 + +$divbyzero: + mov a0, v0 + lda a0, GEN_INTDIV + call_pal PAL_gentrap + stq zero, 0(v0) + stq zero, 8(v0) + +#if FRAME > 0 + lda sp, FRAME(sp) +#endif + ret + + .end ldiv + +weak_alias (ldiv, lldiv) +weak_alias (ldiv, imaxdiv) diff --git a/REORG.TODO/sysdeps/alpha/ldsodefs.h b/REORG.TODO/sysdeps/alpha/ldsodefs.h new file mode 100644 index 0000000000..09dfed764f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/ldsodefs.h @@ -0,0 +1,42 @@ +/* Run-time dynamic linker data structures for loaded ELF shared objects. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef __LDSODEFS_H + +#include <elf.h> + +struct La_alpha_regs; +struct La_alpha_retval; + +#define ARCH_PLTENTER_MEMBERS \ + Elf64_Addr (*alpha_gnu_pltenter) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_alpha_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); + +#define ARCH_PLTEXIT_MEMBERS \ + unsigned int (*alpha_gnu_pltexit) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + const struct La_alpha_regs *, \ + struct La_alpha_retval *, \ + const char *); + +#include_next <ldsodefs.h> + +#endif diff --git a/REORG.TODO/sysdeps/alpha/libc-tls.c b/REORG.TODO/sysdeps/alpha/libc-tls.c new file mode 100644 index 0000000000..392e6c7289 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/libc-tls.c @@ -0,0 +1,32 @@ +/* Thread-local storage handling in the ELF dynamic linker. Alpha version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <csu/libc-tls.c> +#include <dl-tls.h> + +/* On Alpha, linker optimizations are not required, so __tls_get_addr + can be called even in statically linked binaries. In this case module + must be always 1 and PT_TLS segment exist in the binary, otherwise it + would not link. */ + +void * +__tls_get_addr (tls_index *ti) +{ + dtv_t *dtv = THREAD_DTV (); + return (char *) dtv[1].pointer.val + ti->ti_offset; +} diff --git a/REORG.TODO/sysdeps/alpha/lldiv.S b/REORG.TODO/sysdeps/alpha/lldiv.S new file mode 100644 index 0000000000..80c450a3fc --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/lldiv.S @@ -0,0 +1 @@ +/* lldiv is the same as ldiv on the Alpha. */ diff --git a/REORG.TODO/sysdeps/alpha/lshift.S b/REORG.TODO/sysdeps/alpha/lshift.S new file mode 100644 index 0000000000..a9386e50c0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/lshift.S @@ -0,0 +1,107 @@ + # Alpha 21064 __mpn_lshift -- + + # Copyright (C) 1994-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, + # it would take 4 cycles/limb. It should be possible to get down to 3 + # cycles/limb since both ldq and stq can be paired with the other used + # instructions. But there are many restrictions in the 21064 pipeline that + # makes it hard, if not impossible, to get down to 3 cycles/limb: + + # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. + # 2. Only aligned instruction pairs can be paired. + # 3. The store buffer or silo might not be able to deal with the bandwidth. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_lshift + .ent __mpn_lshift +__mpn_lshift: + .frame $30,0,$26,0 + + s8addq $18,$17,$17 # make r17 point at end of s1 + ldq $4,-8($17) # load first limb + subq $17,8,$17 + subq $31,$19,$7 + s8addq $18,$16,$16 # make r16 point at end of RES + subq $18,1,$18 + and $18,4-1,$20 # number of limbs in first loop + srl $4,$7,$0 # compute function result + + beq $20,.L0 + subq $18,$20,$18 + + .align 3 +.Loop0: + ldq $3,-8($17) + subq $16,8,$16 + subq $17,8,$17 + subq $20,1,$20 + sll $4,$19,$5 + srl $3,$7,$6 + bis $3,$3,$4 + bis $5,$6,$8 + stq $8,0($16) + bne $20,.Loop0 + +.L0: beq $18,.Lend + + .align 3 +.Loop: ldq $3,-8($17) + subq $16,32,$16 + subq $18,4,$18 + sll $4,$19,$5 + srl $3,$7,$6 + + ldq $4,-16($17) + sll $3,$19,$1 + bis $5,$6,$8 + stq $8,24($16) + srl $4,$7,$2 + + ldq $3,-24($17) + sll $4,$19,$5 + bis $1,$2,$8 + stq $8,16($16) + srl $3,$7,$6 + + ldq $4,-32($17) + sll $3,$19,$1 + bis $5,$6,$8 + stq $8,8($16) + srl $4,$7,$2 + + subq $17,32,$17 + bis $1,$2,$8 + stq $8,0($16) + + bgt $18,.Loop + +.Lend: sll $4,$19,$8 + stq $8,-8($16) + ret $31,($26),1 + .end __mpn_lshift diff --git a/REORG.TODO/sysdeps/alpha/machine-gmon.h b/REORG.TODO/sysdeps/alpha/machine-gmon.h new file mode 100644 index 0000000000..6937f6bb99 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/machine-gmon.h @@ -0,0 +1,25 @@ +/* Machine-specific calling sequence for `mcount' profiling function. alpha + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define _MCOUNT_DECL(from, self) \ + void __mcount (u_long from, u_long self) + +/* Call __mcount with our the return PC for our caller, and the return + PC our caller will return to. Empty since we use an assembly stub + instead. */ +#define MCOUNT diff --git a/REORG.TODO/sysdeps/alpha/memchr.c b/REORG.TODO/sysdeps/alpha/memchr.c new file mode 100644 index 0000000000..402088dc9e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/memchr.c @@ -0,0 +1,177 @@ +/* Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +typedef unsigned long word; + +static inline word +ldq_u(const void *s) +{ + return *(const word *)((word)s & -8); +} + +#define unlikely(X) __builtin_expect ((X), 0) +#define prefetch(X) __builtin_prefetch ((void *)(X), 0) + +#define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X)) +#define find(X, Y) cmpbeq0 ((X) ^ (Y)) + +/* Search no more than N bytes of S for C. */ + +void * +__memchr (const void *s, int xc, size_t n) +{ + const word *s_align; + word t, current, found, mask, offset; + + if (unlikely (n == 0)) + return 0; + + current = ldq_u (s); + + /* Replicate low byte of XC into all bytes of C. */ + t = xc & 0xff; /* 0000000c */ + t = (t << 8) | t; /* 000000cc */ + t = (t << 16) | t; /* 0000cccc */ + const word c = (t << 32) | t; /* cccccccc */ + + /* Align the source, and decrement the count by the number + of bytes searched in the first word. */ + s_align = (const word *)((word)s & -8); + { + size_t inc = n + ((word)s & 7); + n = inc | -(inc < n); + } + + /* Deal with misalignment in the first word for the comparison. */ + mask = (1ul << ((word)s & 7)) - 1; + + /* If the entire string fits within one word, we may need masking + at both the front and the back of the string. */ + if (unlikely (n <= 8)) + { + mask |= -1ul << n; + goto last_quad; + } + + found = find (current, c) & ~mask; + if (unlikely (found)) + goto found_it; + + s_align++; + n -= 8; + + /* If the block is sufficiently large, align to cacheline and prefetch. */ + if (unlikely (n >= 256)) + { + /* Prefetch 3 cache lines beyond the one we're working on. */ + prefetch (s_align + 8); + prefetch (s_align + 16); + prefetch (s_align + 24); + + while ((word)s_align & 63) + { + current = *s_align; + found = find (current, c); + if (found) + goto found_it; + s_align++; + n -= 8; + } + + /* Within each cacheline, advance the load for the next word + before the test for the previous word is complete. This + allows us to hide the 3 cycle L1 cache load latency. We + only perform this advance load within a cacheline to prevent + reading across page boundary. */ +#define CACHELINE_LOOP \ + do { \ + word i, next = s_align[0]; \ + for (i = 0; i < 7; ++i) \ + { \ + current = next; \ + next = s_align[1]; \ + found = find (current, c); \ + if (unlikely (found)) \ + goto found_it; \ + s_align++; \ + } \ + current = next; \ + found = find (current, c); \ + if (unlikely (found)) \ + goto found_it; \ + s_align++; \ + n -= 64; \ + } while (0) + + /* While there's still lots more data to potentially be read, + continue issuing prefetches for the 4th cacheline out. */ + while (n >= 256) + { + prefetch (s_align + 24); + CACHELINE_LOOP; + } + + /* Up to 3 cache lines remaining. Continue issuing advanced + loads, but stop prefetching. */ + while (n >= 64) + CACHELINE_LOOP; + + /* We may have exhausted the buffer. */ + if (n == 0) + return NULL; + } + + /* Quadword aligned loop. */ + current = *s_align; + while (n > 8) + { + found = find (current, c); + if (unlikely (found)) + goto found_it; + current = *++s_align; + n -= 8; + } + + /* The last word may need masking at the tail of the compare. */ + mask = -1ul << n; + last_quad: + found = find (current, c) & ~mask; + if (found == 0) + return NULL; + + found_it: +#ifdef __alpha_cix__ + offset = __builtin_alpha_cttz (found); +#else + /* Extract LSB. */ + found &= -found; + + /* Binary search for the LSB. */ + offset = (found & 0x0f ? 0 : 4); + offset += (found & 0x33 ? 0 : 2); + offset += (found & 0x55 ? 0 : 1); +#endif + + return (void *)((word)s_align + offset); +} + +#ifdef weak_alias +weak_alias (__memchr, memchr) +#endif +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/alpha/memset.S b/REORG.TODO/sysdeps/alpha/memset.S new file mode 100644 index 0000000000..2e061b90e6 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/memset.S @@ -0,0 +1,127 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Fill a block of memory with a character. Optimized for the Alpha + architecture: + + - memory accessed as aligned quadwords only + - destination memory not read unless needed for good cache behaviour + - basic blocks arranged to optimize branch prediction for full-quadword + aligned memory blocks. + - partial head and tail quadwords constructed with byte-mask instructions + + This is generally scheduled for the EV5 (got to look out for my own + interests :-), but with EV4 needs in mind. There *should* be no more + stalls for the EV4 than there are for the EV5. +*/ + + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + .type memset, @function + .globl memset + .usepv memset, USEPV_PROF + + cfi_startproc + + /* On entry to this basic block: + t3 == loop counter + t4 == bytes in partial final word + a0 == possibly misaligned destination pointer + a1 == replicated source character */ + + .align 3 +memset_loop: + beq t3, $tail + blbc t3, 0f # skip single store if count even + + stq_u a1, 0(a0) # e0 : store one word + subq t3, 1, t3 # .. e1 : + addq a0, 8, a0 # e0 : + beq t3, $tail # .. e1 : + +0: stq_u a1, 0(a0) # e0 : store two words + subq t3, 2, t3 # .. e1 : + stq_u a1, 8(a0) # e0 : + addq a0, 16, a0 # .. e1 : + bne t3, 0b # e1 : + +$tail: bne t4, 1f # is there a tail to do? + ret # no + + .align 3 +1: ldq_u t0, 0(a0) # e1 : yes, load original data + mskql a1, t4, t1 # .. e0 : + mskqh t0, t4, t0 # e0 : + or t0, t1, t0 # e1 (stall) + stq_u t0, 0(a0) # e0 : + ret # .. e1 : + +memset: +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + and a1, 0xff, a1 # e0 : zero extend input character + mov a0, v0 # .. e1 : move return value in place + sll a1, 8, t0 # e0 : begin replicating the char + beq a2, $done # .. e1 : early exit for zero-length store + or t0, a1, a1 # e0 : + and a0, 7, t1 # .. e1 : dest misalignment + sll a1, 16, t0 # e0 : + addq a2, t1, a2 # .. e1 : add dest misalignment to count + or t0, a1, a1 # e0 : + srl a2, 3, t3 # .. e1 : loop = count >> 3 + sll a1, 32, t0 # e0 : + and a2, 7, t4 # .. e1 : find number of bytes in tail + or t0, a1, a1 # e0 : character replication done + + beq t1, memset_loop # .. e1 : aligned head, jump right in + + ldq_u t0, 0(a0) # e1 : load original data to mask into + mskqh a1, a0, t1 # .. e0 : + + cmpult a2, 8, t2 # e0 : is this a sub-word set? + bne t2, $oneq # .. e1 (zdb) + + mskql t0, a0, t0 # e0 : we span words. finish this partial + subq t3, 1, t3 # .. e1 : + addq a0, 8, a0 # e0 : + or t0, t1, t0 # .. e1 : + stq_u t0, -8(a0) # e0 : + br memset_loop # .. e1 : + + .align 3 +$oneq: + mskql t1, a2, t1 # e0 : entire operation within one word + mskql t0, a0, t2 # e0 : + mskqh t0, a2, t3 # e0 : + or t1, t2, t0 # .. e1 : + or t0, t3, t0 # e1 : + stq_u t0, 0(a0) # e0 (stall) + +$done: ret + + cfi_endproc +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/alpha/memusage.h b/REORG.TODO/sysdeps/alpha/memusage.h new file mode 100644 index 0000000000..83a0ea531b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/memusage.h @@ -0,0 +1,20 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define GETSP() ({ register uintptr_t stack_ptr asm ("$30"); stack_ptr; }) + +#include <sysdeps/generic/memusage.h> diff --git a/REORG.TODO/sysdeps/alpha/mul_1.S b/REORG.TODO/sysdeps/alpha/mul_1.S new file mode 100644 index 0000000000..099e22b441 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/mul_1.S @@ -0,0 +1,83 @@ + # Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store + # the result in a second limb vector. + + # Copyright (C) 1992-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5. + + # To improve performance for long multiplications, we would use + # 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use + # these instructions without slowing down the general code: 1. We can + # only have two prefetches in operation at any time in the Alpha + # architecture. 2. There will seldom be any special alignment + # between RES_PTR and S1_PTR. Maybe we can simply divide the current + # loop into an inner and outer loop, having the inner loop handle + # exactly one prefetch block? + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_mul_1 + .ent __mpn_mul_1 2 +__mpn_mul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + bic $31,$31,$4 # clear cy_limb + umulh $2,$19,$0 # $0 = prod_high + beq $18,Lend1 # jump if size was == 1 + ldq $2,8($17) # $2 = s1_limb + subq $18,1,$18 # size-- + stq $3,0($16) + beq $18,Lend2 # jump if size was == 2 + + .align 3 +Loop: mulq $2,$19,$3 # $3 = prod_low + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,16($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + stq $3,8($16) + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $16,8,$16 # res_ptr++ + bne $18,Loop + +Lend2: mulq $2,$19,$3 # $3 = prod_low + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + stq $3,8($16) + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +Lend1: stq $3,0($16) + ret $31,($26),1 + + .end __mpn_mul_1 diff --git a/REORG.TODO/sysdeps/alpha/nptl/Makefile b/REORG.TODO/sysdeps/alpha/nptl/Makefile new file mode 100644 index 0000000000..90f5fcf350 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/Makefile @@ -0,0 +1,20 @@ +# Copyright (C) 2003-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library. If not, see +# <http://www.gnu.org/licenses/>. + +ifeq ($(subdir),csu) +gen-as-const-headers += tcb-offsets.sym +endif diff --git a/REORG.TODO/sysdeps/alpha/nptl/bits/pthreadtypes-arch.h b/REORG.TODO/sysdeps/alpha/nptl/bits/pthreadtypes-arch.h new file mode 100644 index 0000000000..b6f6cb1347 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/bits/pthreadtypes-arch.h @@ -0,0 +1,59 @@ +/* Machine-specific pthread type layouts. Alpha version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_PTHREADTYPES_ARCH_H +#define _BITS_PTHREADTYPES_ARCH_H 1 + +#define __SIZEOF_PTHREAD_ATTR_T 56 +#define __SIZEOF_PTHREAD_MUTEX_T 40 +#define __SIZEOF_PTHREAD_MUTEXATTR_T 4 +#define __SIZEOF_PTHREAD_COND_T 48 +#define __SIZEOF_PTHREAD_CONDATTR_T 4 +#define __SIZEOF_PTHREAD_RWLOCK_T 56 +#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 +#define __SIZEOF_PTHREAD_BARRIER_T 32 +#define __SIZEOF_PTHREAD_BARRIERATTR_T 4 + +/* Definitions for internal mutex struct. */ +#define __PTHREAD_COMPAT_PADDING_MID +#define __PTHREAD_COMPAT_PADDING_END +#define __PTHREAD_MUTEX_LOCK_ELISION 0 + +#define __LOCK_ALIGNMENT +#define __ONCE_ALIGNMENT + +struct __pthread_rwlock_arch_t +{ + unsigned int __readers; + unsigned int __writers; + unsigned int __wrphase_futex; + unsigned int __writers_futex; + unsigned int __pad3; + unsigned int __pad4; + int __cur_writer; + int __shared; + unsigned long int __pad1; + unsigned long int __pad2; + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned int __flags; +}; + +#define __PTHREAD_RWLOCK_ELISION_EXTRA 0 + +#endif /* bits/pthreadtypes.h */ diff --git a/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_lock.S b/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_lock.S new file mode 100644 index 0000000000..5a28bdce3a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_lock.S @@ -0,0 +1,44 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@twiddle.net>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + + + .text + .align 4 + + .globl pthread_spin_lock + .ent pthread_spin_lock +pthread_spin_lock: + .frame $sp, 0, $26, 0 + .prologue 0 + +0: ldl_l $1, 0($16) + lda $2, 1 + lda $0, 0 + bne $1, 1f + + stl_c $2, 0($16) + beq $2, 1f + mb + ret + +1: ldl $1, 0($16) + bne $1, 1b + unop + br 0b + + .end pthread_spin_lock diff --git a/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_trylock.S b/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_trylock.S new file mode 100644 index 0000000000..a5eab1353d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/pthread_spin_trylock.S @@ -0,0 +1,45 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@twiddle.net>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + + +#define _ERRNO_H 1 +#include <bits/errno.h> + + .text + .align 4 + + .globl pthread_spin_trylock + .ent pthread_spin_trylock +pthread_spin_trylock: + .frame $sp, 0, $26, 0 + .prologue 0 + +0: ldl_l $1, 0($16) + lda $2, 1 + lda $0, EBUSY + bne $1, 1f + + stl_c $2, 0($16) + beq $2, 2f + mb + lda $0, 0 + +1: ret +2: br 0b + + .end pthread_spin_trylock diff --git a/REORG.TODO/sysdeps/alpha/nptl/pthreaddef.h b/REORG.TODO/sysdeps/alpha/nptl/pthreaddef.h new file mode 100644 index 0000000000..581dd60a39 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/pthreaddef.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Default stack size. */ +#define ARCH_STACK_DEFAULT_SIZE (4 * 1024 * 1024) + +/* Required stack pointer alignment at beginning. The ABI requires 16. */ +#define STACK_ALIGN 16 + +/* Minimal stack size after allocating thread descriptor and guard size. */ +#define MINIMAL_REST_STACK 4096 + +/* Alignment requirement for TCB. */ +#define TCB_ALIGNMENT 16 + +/* Location of current stack frame. */ +#define CURRENT_STACK_FRAME __builtin_frame_address (0) diff --git a/REORG.TODO/sysdeps/alpha/nptl/tcb-offsets.sym b/REORG.TODO/sysdeps/alpha/nptl/tcb-offsets.sym new file mode 100644 index 0000000000..1005621b37 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/tcb-offsets.sym @@ -0,0 +1,13 @@ +#include <sysdep.h> +#include <tls.h> + +-- + +-- Abuse tls.h macros to derive offsets relative to the thread register. +-- # define __builtin_thread_pointer() ((void *) 0) +-- # define thread_offsetof(mem) ((void *) &THREAD_SELF->mem - (void *) 0) +-- Ho hum, this doesn't work in gcc4, so Know Things about THREAD_SELF +#define thread_offsetof(mem) (long)(offsetof(struct pthread, mem) - sizeof(struct pthread)) + +MULTIPLE_THREADS_OFFSET thread_offsetof (header.multiple_threads) +TID_OFFSET thread_offsetof (tid) diff --git a/REORG.TODO/sysdeps/alpha/nptl/tls.h b/REORG.TODO/sysdeps/alpha/nptl/tls.h new file mode 100644 index 0000000000..c16f5da04a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nptl/tls.h @@ -0,0 +1,132 @@ +/* Definition for thread-local data handling. NPTL/Alpha version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _TLS_H +#define _TLS_H 1 + +# include <dl-sysdep.h> + +#ifndef __ASSEMBLER__ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <dl-dtv.h> + +/* Get system call information. */ +# include <sysdep.h> + +/* The TP points to the start of the thread blocks. */ +# define TLS_DTV_AT_TP 1 +# define TLS_TCB_AT_TP 0 + +/* Get the thread descriptor definition. */ +# include <nptl/descr.h> + +typedef struct +{ + dtv_t *dtv; + void *__private; +} tcbhead_t; + +/* This is the size of the initial TCB. */ +# define TLS_INIT_TCB_SIZE sizeof (tcbhead_t) + +/* Alignment requirements for the initial TCB. */ +# define TLS_INIT_TCB_ALIGN 16 + +/* This is the size of the TCB. */ +# define TLS_TCB_SIZE sizeof (tcbhead_t) + +/* This is the size we need before TCB. */ +# define TLS_PRE_TCB_SIZE sizeof (struct pthread) + +/* Alignment requirements for the TCB. */ +# define TLS_TCB_ALIGN 16 + +/* Install the dtv pointer. The pointer passed is to the element with + index -1 which contain the length. */ +# define INSTALL_DTV(tcbp, dtvp) \ + (((tcbhead_t *) (tcbp))->dtv = (dtvp) + 1) + +/* Install new dtv for current thread. */ +# define INSTALL_NEW_DTV(dtv) \ + (THREAD_DTV() = (dtv)) + +/* Return dtv of given thread descriptor. */ +# define GET_DTV(tcbp) \ + (((tcbhead_t *) (tcbp))->dtv) + +/* Code to initially initialize the thread pointer. This might need + special attention since 'errno' is not yet available and if the + operation can cause a failure 'errno' must not be touched. */ +# define TLS_INIT_TP(tcbp) \ + (__builtin_set_thread_pointer ((void *)(tcbp)), NULL) + +/* Value passed to 'clone' for initialization of the thread register. */ +# define TLS_DEFINE_INIT_TP(tp, pd) void *tp = (pd) + 1 + +/* Return the address of the dtv for the current thread. */ +# define THREAD_DTV() \ + (((tcbhead_t *) __builtin_thread_pointer ())->dtv) + +/* Return the thread descriptor for the current thread. */ +# define THREAD_SELF \ + ((struct pthread *)__builtin_thread_pointer () - 1) + +/* Magic for libthread_db to know how to do THREAD_SELF. */ +# define DB_THREAD_SELF \ + REGISTER (64, 64, 32 * 8, -sizeof (struct pthread)) + +/* Access to data in the thread descriptor is easy. */ +#define THREAD_GETMEM(descr, member) \ + descr->member +#define THREAD_GETMEM_NC(descr, member, idx) \ + descr->member[idx] +#define THREAD_SETMEM(descr, member, value) \ + descr->member = (value) +#define THREAD_SETMEM_NC(descr, member, idx, value) \ + descr->member[idx] = (value) + +/* Get and set the global scope generation counter in struct pthread. */ +#define THREAD_GSCOPE_FLAG_UNUSED 0 +#define THREAD_GSCOPE_FLAG_USED 1 +#define THREAD_GSCOPE_FLAG_WAIT 2 +#define THREAD_GSCOPE_RESET_FLAG() \ + do \ + { int __res \ + = atomic_exchange_rel (&THREAD_SELF->header.gscope_flag, \ + THREAD_GSCOPE_FLAG_UNUSED); \ + if (__res == THREAD_GSCOPE_FLAG_WAIT) \ + lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \ + } \ + while (0) +#define THREAD_GSCOPE_SET_FLAG() \ + do \ + { \ + THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED; \ + atomic_write_barrier (); \ + } \ + while (0) +#define THREAD_GSCOPE_WAIT() \ + GL(dl_wait_lookup_done) () + +#else /* __ASSEMBLER__ */ +# include <tcb-offsets.h> +#endif /* __ASSEMBLER__ */ + +#endif /* tls.h */ diff --git a/REORG.TODO/sysdeps/alpha/nscd-types.h b/REORG.TODO/sysdeps/alpha/nscd-types.h new file mode 100644 index 0000000000..c222ef1204 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/nscd-types.h @@ -0,0 +1,21 @@ +/* Types for the NSCD implementation. Alpha version. + Copyright (c) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +typedef int64_t nscd_ssize_t; diff --git a/REORG.TODO/sysdeps/alpha/preconfigure b/REORG.TODO/sysdeps/alpha/preconfigure new file mode 100644 index 0000000000..cb02cffc3a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/preconfigure @@ -0,0 +1,3 @@ +case "$machine" in +alpha*) base_machine=alpha machine=alpha/$machine +esac diff --git a/REORG.TODO/sysdeps/alpha/rawmemchr.S b/REORG.TODO/sysdeps/alpha/rawmemchr.S new file mode 100644 index 0000000000..d32d037bfa --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/rawmemchr.S @@ -0,0 +1,89 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return pointer to first occurrence of CH in STR. */ + +#include <sysdep.h> + + .set noreorder + .set noat + +ENTRY(__rawmemchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + zapnot a1, 1, a1 # e0 : zero extend the search character + ldq_u t0, 0(a0) # .. e1 : load first quadword + sll a1, 8, t5 # e0 : replicate the search character + andnot a0, 7, v0 # .. e1 : align our loop pointer + + or t5, a1, a1 # e0 : + lda t4, -1 # .. e1 : build garbage mask + sll a1, 16, t5 # e0 : + unop # : + + mskqh t4, a0, t4 # e0 : + or t5, a1, a1 # .. e1 : + sll a1, 32, t5 # e0 : + cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage + + or t5, a1, a1 # e0 : + xor t0, a1, t1 # .. e1 : make bytes == c zero + cmpbge zero, t1, t3 # e0 : bits set iff byte == c + unop # : + + andnot t3, t4, t0 # e0 : clear garbage bits + fnop # .. fa : + unop # : + bne t0, $found # .. e1 (zdb) + + .align 4 +$loop: + ldq t0, 8(v0) # e0 : + addq v0, 8, v0 # .. e1 : + nop # e0 : + xor t0, a1, t1 # .. e1 (ev5 data stall) + + cmpbge zero, t1, t0 # e0 : bits set iff byte == c + beq t0, $loop # .. e1 (zdb) + +$found: + negq t0, t1 # e0 : clear all but least set bit + and t0, t1, t0 # e1 (stall) + and t0, 0xf0, t2 # e0 : binary search for that set bit + and t0, 0xcc, t3 # .. e1 : + + and t0, 0xaa, t4 # e0 : + cmovne t2, 4, t2 # .. e1 : + cmovne t3, 2, t3 # e0 : + cmovne t4, 1, t4 # .. e1 : + + addq t2, t3, t2 # e0 : + addq v0, t4, v0 # .. e1 : + addq v0, t2, v0 # e0 : + ret # .. e1 : + + END(__rawmemchr) + +libc_hidden_def (__rawmemchr) +weak_alias (__rawmemchr, rawmemchr) diff --git a/REORG.TODO/sysdeps/alpha/reml.S b/REORG.TODO/sysdeps/alpha/reml.S new file mode 100644 index 0000000000..60fc6b8cad --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/reml.S @@ -0,0 +1,86 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@twiddle.net> + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + +/* 32-bit signed int remainder. This is not a normal C function. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may + be clobbered. + + The FPU can handle the division for all input values except zero. + All we have to do is compute the remainder via multiply-and-subtract. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + +#ifndef EXTEND +#define EXTEND(S,D) sextl S, D +#endif + + .text + .align 4 + .globl __reml + .type __reml, @funcnoplt + .usepv __reml, no + + cfi_startproc + cfi_return_column (RA) +__reml: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f2, 16(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f2, 16) + mf_fpcr $f2 + + EXTEND (X, RV) + EXTEND (Y, AT) + _ITOFT2 RV, $f0, 24, AT, $f1, 32 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + cvttq/c $f0, $f0 + excb + mt_fpcr $f2 + _FTOIT $f0, RV, 24 + + ldt $f0, 0(sp) + mull RV, Y, RV + ldt $f1, 8(sp) + ldt $f2, 16(sp) + lda sp, FRAME(sp) + cfi_restore ($f0) + cfi_restore ($f1) + cfi_restore ($f2) + cfi_def_cfa_offset (0) + subl X, RV, RV + ret $31, (RA), 1 + + cfi_endproc + .size __reml, .-__reml + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/remlu.S b/REORG.TODO/sysdeps/alpha/remlu.S new file mode 100644 index 0000000000..f8691e19a4 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/remlu.S @@ -0,0 +1,4 @@ +#define UNSIGNED +#define EXTEND(S,D) zapnot S, 15, D +#define __reml __remlu +#include <reml.S> diff --git a/REORG.TODO/sysdeps/alpha/remq.S b/REORG.TODO/sysdeps/alpha/remq.S new file mode 100644 index 0000000000..cfc82aeb8b --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/remq.S @@ -0,0 +1,268 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + + +/* 64-bit signed long remainder. These are not normal C functions. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may + be clobbered. + + Theory of operation here is that we can use the FPU divider for virtually + all operands that we see: all dividend values between -2**53 and 2**53-1 + can be computed directly. Note that divisor values need not be checked + against that range because the rounded fp value will be close enough such + that the quotient is < 1, which will properly be truncated to zero when we + convert back to integer. + + When the dividend is outside the range for which we can compute exact + results, we use the fp quotent as an estimate from which we begin refining + an exact integral value. This reduces the number of iterations in the + shift-and-subtract loop significantly. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + + .text + .align 4 + .globl __remq + .type __remq, @funcnoplt + .usepv __remq, no + + cfi_startproc + cfi_return_column (RA) +__remq: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + + /* Get the fp divide insn issued as quickly as possible. After + that's done, we have at least 22 cycles until its results are + ready -- all the time in the world to figure out how we're + going to use the results. */ + stt $f0, 0(sp) + excb + beq Y, DIVBYZERO + + stt $f1, 8(sp) + stt $f3, 48(sp) + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f3, 48) + mf_fpcr $f3 + + _ITOFT2 X, $f0, 16, Y, $f1, 24 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + divt/c $f0, $f1, $f0 + + /* Check to see if X fit in the double as an exact value. */ + sll X, (64-53), AT + ldt $f1, 8(sp) + sra AT, (64-53), AT + cmpeq X, AT, AT + beq AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert, compute remainder, clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f3 + _FTOIT $f0, AT, 16 + mulq AT, Y, AT + ldt $f0, 0(sp) + ldt $f3, 48(sp) + cfi_restore ($f1) + cfi_remember_state + cfi_restore ($f0) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + lda sp, FRAME(sp) + subq X, AT, RV + ret $31, (RA), 1 + + .align 4 + cfi_restore_state +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + stq t0, 16(sp) + stq t1, 24(sp) + stq t2, 32(sp) + stq t5, 40(sp) + cfi_rel_offset (t0, 16) + cfi_rel_offset (t1, 24) + cfi_rel_offset (t2, 32) + cfi_rel_offset (t5, 40) + +#define Q t0 /* quotient */ +#define R RV /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + /* The fixup code below can only handle unsigned values. */ + or X, Y, AT + mov $31, t5 + blt AT, $fix_sign_in +$fix_sign_in_ret1: + cvttq/c $f0, $f0 + + _FTOIT $f0, Q, 8 + .align 3 +$fix_sign_in_ret2: + ldt $f0, 0(sp) + stq t3, 0(sp) + cfi_restore ($f0) + cfi_rel_offset (t3, 0) + + mulq Q, Y, QY + excb + stq t4, 8(sp) + mt_fpcr $f3 + cfi_rel_offset (t4, 8) + + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + ldq t0, 16(sp) + ldq t1, 24(sp) + ldq t2, 32(sp) + bne t5, $fix_sign_out + +$fix_sign_out_ret: + ldq t3, 0(sp) + ldq t4, 8(sp) + ldq t5, 40(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore (t0) + cfi_restore (t1) + cfi_restore (t2) + cfi_restore (t3) + cfi_restore (t4) + cfi_restore (t5) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 +$fix_sign_in: + /* If we got here, then X|Y is negative. Need to adjust everything + such that we're doing unsigned division in the fixup loop. */ + /* T5 records the changes we had to make: + bit 0: set if X was negated. Note that the sign of the + remainder follows the sign of the divisor. + bit 2: set if Y was negated. + */ + xor X, Y, t1 + cmplt X, 0, t5 + negq X, t0 + cmovne t5, t0, X + + cmplt Y, 0, AT + negq Y, t0 + s4addq AT, t5, t5 + cmovne AT, t0, Y + + bge t1, $fix_sign_in_ret1 + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + .align 3 + negq Q, Q + br $fix_sign_in_ret2 + + .align 4 +$fix_sign_out: + /* Now we get to undo what we did above. */ + /* ??? Is this really faster than just increasing the size of + the stack frame and storing X and Y in memory? */ + and t5, 4, AT + negq Y, t4 + cmovne AT, t4, Y + + negq X, t4 + cmovlbs t5, t4, X + negq RV, t4 + cmovlbs t5, t4, RV + + br $fix_sign_out_ret + + cfi_endproc + .size __remq, .-__remq + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/remqu.S b/REORG.TODO/sysdeps/alpha/remqu.S new file mode 100644 index 0000000000..732a350ea9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/remqu.S @@ -0,0 +1,271 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "div_libc.h" + + +/* 64-bit unsigned long remainder. These are not normal C functions. Argument + registers are t10 and t11, the result goes in t12. Only t12 and AT may be + clobbered. + + Theory of operation here is that we can use the FPU divider for virtually + all operands that we see: all dividend values between -2**53 and 2**53-1 + can be computed directly. Note that divisor values need not be checked + against that range because the rounded fp value will be close enough such + that the quotient is < 1, which will properly be truncated to zero when we + convert back to integer. + + When the dividend is outside the range for which we can compute exact + results, we use the fp quotent as an estimate from which we begin refining + an exact integral value. This reduces the number of iterations in the + shift-and-subtract loop significantly. + + The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE + for cvttq/c even without /sui being set. It will not, however, properly + raise the exception, so we don't have to worry about FPCR_INED being clear + and so dying by SIGFPE. */ + + .text + .align 4 + .globl __remqu + .type __remqu, @funcnoplt + .usepv __remqu, no + + cfi_startproc + cfi_return_column (RA) +__remqu: + lda sp, -FRAME(sp) + cfi_def_cfa_offset (FRAME) + CALL_MCOUNT + + /* Get the fp divide insn issued as quickly as possible. After + that's done, we have at least 22 cycles until its results are + ready -- all the time in the world to figure out how we're + going to use the results. */ + subq Y, 1, AT + stt $f0, 0(sp) + and Y, AT, AT + + stt $f1, 8(sp) + excb + stt $f3, 48(sp) + beq AT, $powerof2 + cfi_rel_offset ($f0, 0) + cfi_rel_offset ($f1, 8) + cfi_rel_offset ($f3, 48) + + _ITOFT2 X, $f0, 16, Y, $f1, 24 + mf_fpcr $f3 + cvtqt $f0, $f0 + cvtqt $f1, $f1 + + blt X, $x_is_neg + divt/c $f0, $f1, $f0 + + /* Check to see if Y was mis-converted as signed value. */ + ldt $f1, 8(sp) + blt Y, $y_is_neg + + /* Check to see if X fit in the double as an exact value. */ + srl X, 53, AT + bne AT, $x_big + + /* If we get here, we're expecting exact results from the division. + Do nothing else besides convert, compute remainder, clean up. */ + cvttq/c $f0, $f0 + excb + mt_fpcr $f3 + _FTOIT $f0, AT, 16 + + mulq AT, Y, AT + ldt $f0, 0(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore ($f0) + cfi_restore ($f1) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + + .align 4 + subq X, AT, RV + ret $31, (RA), 1 + + .align 4 + cfi_restore_state +$x_is_neg: + /* If we get here, X is so big that bit 63 is set, which made the + conversion come out negative. Fix it up lest we not even get + a good estimate. */ + ldah AT, 0x5f80 /* 2**64 as float. */ + stt $f2, 24(sp) + cfi_rel_offset ($f2, 24) + _ITOFS AT, $f2, 16 + + addt $f0, $f2, $f0 + divt/c $f0, $f1, $f0 + + /* Ok, we've now the divide issued. Continue with other checks. */ + .align 4 + ldt $f1, 8(sp) + unop + ldt $f2, 24(sp) + blt Y, $y_is_neg + cfi_restore ($f1) + cfi_restore ($f2) + cfi_remember_state /* for y_is_neg */ + + .align 4 +$x_big: + /* If we get here, X is large enough that we don't expect exact + results, and neither X nor Y got mis-translated for the fp + division. Our task is to take the fp result, figure out how + far it's off from the correct result and compute a fixup. */ + stq t0, 16(sp) + stq t1, 24(sp) + stq t2, 32(sp) + stq t3, 40(sp) + cfi_rel_offset (t0, 16) + cfi_rel_offset (t1, 24) + cfi_rel_offset (t2, 32) + cfi_rel_offset (t3, 40) + +#define Q t0 /* quotient */ +#define R RV /* remainder */ +#define SY t1 /* scaled Y */ +#define S t2 /* scalar */ +#define QY t3 /* Q*Y */ + + cvttq/c $f0, $f0 + _FTOIT $f0, Q, 8 + mulq Q, Y, QY + + .align 4 + stq t4, 8(sp) + excb + ldt $f0, 0(sp) + mt_fpcr $f3 + cfi_rel_offset (t4, 8) + cfi_restore ($f0) + + subq QY, X, R + mov Y, SY + mov 1, S + bgt R, $q_high + +$q_high_ret: + subq X, QY, R + mov Y, SY + mov 1, S + bgt R, $q_low + +$q_low_ret: + ldq t4, 8(sp) + ldq t0, 16(sp) + ldq t1, 24(sp) + ldq t2, 32(sp) + + ldq t3, 40(sp) + ldt $f3, 48(sp) + lda sp, FRAME(sp) + cfi_remember_state + cfi_restore (t0) + cfi_restore (t1) + cfi_restore (t2) + cfi_restore (t3) + cfi_restore (t4) + cfi_restore ($f3) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_restore_state + /* The quotient that we computed was too large. We need to reduce + it by S such that Y*S >= R. Obviously the closer we get to the + correct value the better, but overshooting high is ok, as we'll + fix that up later. */ +0: + addq SY, SY, SY + addq S, S, S +$q_high: + cmpult SY, R, AT + bne AT, 0b + + subq Q, S, Q + unop + subq QY, SY, QY + br $q_high_ret + + .align 4 + /* The quotient that we computed was too small. Divide Y by the + current remainder (R) and add that to the existing quotient (Q). + The expectation, of course, is that R is much smaller than X. */ + /* Begin with a shift-up loop. Compute S such that Y*S >= R. We + already have a copy of Y in SY and the value 1 in S. */ +0: + addq SY, SY, SY + addq S, S, S +$q_low: + cmpult SY, R, AT + bne AT, 0b + + /* Shift-down and subtract loop. Each iteration compares our scaled + Y (SY) with the remainder (R); if SY <= R then X is divisible by + Y's scalar (S) so add it to the quotient (Q). */ +2: addq Q, S, t3 + srl S, 1, S + cmpule SY, R, AT + subq R, SY, t4 + + cmovne AT, t3, Q + cmovne AT, t4, R + srl SY, 1, SY + bne S, 2b + + br $q_low_ret + + .align 4 + cfi_restore_state +$y_is_neg: + /* If we get here, Y is so big that bit 63 is set. The results + from the divide will be completely wrong. Fortunately, the + quotient must be either 0 or 1, so the remainder must be X + or X-Y, so just compute it directly. */ + cmpule Y, X, AT + subq X, Y, RV + ldt $f0, 0(sp) + cmoveq AT, X, RV + + lda sp, FRAME(sp) + cfi_restore ($f0) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + .align 4 + cfi_def_cfa_offset (FRAME) +$powerof2: + subq Y, 1, AT + beq Y, DIVBYZERO + and X, AT, RV + lda sp, FRAME(sp) + cfi_def_cfa_offset (0) + ret $31, (RA), 1 + + cfi_endproc + .size __remqu, .-__remqu + + DO_DIVBYZERO diff --git a/REORG.TODO/sysdeps/alpha/rshift.S b/REORG.TODO/sysdeps/alpha/rshift.S new file mode 100644 index 0000000000..65db71c2c9 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/rshift.S @@ -0,0 +1,105 @@ + # Alpha 21064 __mpn_rshift -- + + # Copyright (C) 1994-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # cnt r19 + + # This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, + # it would take 4 cycles/limb. It should be possible to get down to 3 + # cycles/limb since both ldq and stq can be paired with the other used + # instructions. But there are many restrictions in the 21064 pipeline that + # makes it hard, if not impossible, to get down to 3 cycles/limb: + + # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. + # 2. Only aligned instruction pairs can be paired. + # 3. The store buffer or silo might not be able to deal with the bandwidth. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_rshift + .ent __mpn_rshift +__mpn_rshift: + .frame $30,0,$26,0 + + ldq $4,0($17) # load first limb + addq $17,8,$17 + subq $31,$19,$7 + subq $18,1,$18 + and $18,4-1,$20 # number of limbs in first loop + sll $4,$7,$0 # compute function result + + beq $20,.L0 + subq $18,$20,$18 + + .align 3 +.Loop0: + ldq $3,0($17) + addq $16,8,$16 + addq $17,8,$17 + subq $20,1,$20 + srl $4,$19,$5 + sll $3,$7,$6 + bis $3,$3,$4 + bis $5,$6,$8 + stq $8,-8($16) + bne $20,.Loop0 + +.L0: beq $18,.Lend + + .align 3 +.Loop: ldq $3,0($17) + addq $16,32,$16 + subq $18,4,$18 + srl $4,$19,$5 + sll $3,$7,$6 + + ldq $4,8($17) + srl $3,$19,$1 + bis $5,$6,$8 + stq $8,-32($16) + sll $4,$7,$2 + + ldq $3,16($17) + srl $4,$19,$5 + bis $1,$2,$8 + stq $8,-24($16) + sll $3,$7,$6 + + ldq $4,24($17) + srl $3,$19,$1 + bis $5,$6,$8 + stq $8,-16($16) + sll $4,$7,$2 + + addq $17,32,$17 + bis $1,$2,$8 + stq $8,-8($16) + + bgt $18,.Loop + +.Lend: srl $4,$19,$8 + stq $8,0($16) + ret $31,($26),1 + .end __mpn_rshift diff --git a/REORG.TODO/sysdeps/alpha/setjmp.S b/REORG.TODO/sysdeps/alpha/setjmp.S new file mode 100644 index 0000000000..a6758646f1 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/setjmp.S @@ -0,0 +1,120 @@ +/* Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define __ASSEMBLY__ + +#include <sysdep.h> +#include <jmpbuf-offsets.h> + + .ent __sigsetjmp + .global __sigsetjmp +__sigsetjmp: + ldgp gp, 0(pv) + +$sigsetjmp_local: +#ifndef PIC +#define FRAME 16 + subq sp, FRAME, sp + .frame sp, FRAME, ra, 0 + stq ra, 0(sp) + .mask 0x04000000, -FRAME +#else +#define FRAME 0 + .frame sp, FRAME, ra, 0 +#endif +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + stq s0, JB_S0*8(a0) + stq s1, JB_S1*8(a0) + stq s2, JB_S2*8(a0) + stq s3, JB_S3*8(a0) + stq s4, JB_S4*8(a0) + stq s5, JB_S5*8(a0) +#ifdef PTR_MANGLE + PTR_MANGLE(t1, ra, t0) + stq t1, JB_PC*8(a0) +#else + stq ra, JB_PC*8(a0) +#endif +#if defined(PTR_MANGLE) && FRAME == 0 + PTR_MANGLE2(t1, sp, t0) +#else + addq sp, FRAME, t1 +# ifdef PTR_MANGLE + PTR_MANGLE2(t1, t1, t0) +# endif +#endif + stq t1, JB_SP*8(a0) +#ifdef PTR_MANGLE + PTR_MANGLE2(t1, fp, t0) + stq t1, JB_FP*8(a0) +#else + stq fp, JB_FP*8(a0) +#endif + stt $f2, JB_F2*8(a0) + stt $f3, JB_F3*8(a0) + stt $f4, JB_F4*8(a0) + stt $f5, JB_F5*8(a0) + stt $f6, JB_F6*8(a0) + stt $f7, JB_F7*8(a0) + stt $f8, JB_F8*8(a0) + stt $f9, JB_F9*8(a0) + +#ifndef PIC + /* Call to C to (potentially) save our signal mask. */ + jsr ra, __sigjmp_save + ldq ra, 0(sp) + addq sp, 16, sp + ret +#elif IS_IN (rtld) + /* In ld.so we never save the signal mask. */ + mov 0, v0 + ret +#else + /* Tailcall to save the signal mask. */ + br $31, __sigjmp_save !samegp +#endif + +END(__sigsetjmp) +hidden_def (__sigsetjmp) + +/* Put these traditional entry points in the same file so that we can + elide much of the nonsense in trying to jmp to the real function. */ + +ENTRY(_setjmp) + ldgp gp, 0(pv) + .prologue 1 + mov 0, a1 + br $sigsetjmp_local +END(_setjmp) +libc_hidden_def (_setjmp) + +ENTRY(setjmp) + ldgp gp, 0(pv) + .prologue 1 + mov 1, a1 + br $sigsetjmp_local +END(setjmp) + +weak_extern(_setjmp) +weak_extern(setjmp) diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/Makefile b/REORG.TODO/sysdeps/alpha/soft-fp/Makefile new file mode 100644 index 0000000000..83baa7c49d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/Makefile @@ -0,0 +1,11 @@ +# Software floating-point emulation. + +ifeq ($(subdir),soft-fp) +sysdep_routines += ots_add ots_sub ots_mul ots_div ots_cmp ots_cmpe \ + ots_cvtxq ots_cvtqx ots_cvtqux ots_cvttx ots_cvtxt ots_nintxq \ + fraiseexcpt +endif + +ifeq ($(subdir),math) +CPPFLAGS += -I../soft-fp +endif diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/Versions b/REORG.TODO/sysdeps/alpha/soft-fp/Versions new file mode 100644 index 0000000000..3901287115 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/Versions @@ -0,0 +1,8 @@ +libc { + GLIBC_2.3.4 { + _OtsAddX; _OtsSubX; _OtsMulX; _OtsDivX; + _OtsEqlX; _OtsNeqX; _OtsLssX; _OtsLeqX; _OtsGtrX; _OtsGeqX; + _OtsCvtQX; _OtsCvtQUX; _OtsCvtXQ; _OtsNintXQ; + _OtsConvertFloatTX; _OtsConvertFloatXT; + } +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/e_sqrtl.c b/REORG.TODO/sysdeps/alpha/soft-fp/e_sqrtl.c new file mode 100644 index 0000000000..8b30bcbc52 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/e_sqrtl.c @@ -0,0 +1,49 @@ +/* long double square root in software floating-point emulation. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <soft-fp.h> +#include <quad.h> +#include <shlib-compat.h> + +long double +__ieee754_sqrtl (const long double a) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(C); + long double c; + long _round = 4; /* dynamic rounding */ + + FP_INIT_ROUNDMODE; + FP_UNPACK_Q(A, a); + FP_SQRT_Q(C, A); + FP_PACK_Q(c, C); + FP_HANDLE_EXCEPTIONS; + return c; +} + +/* ??? We forgot to add this symbol in 2.15. Getting this into 2.18 isn't as + straight-forward as just adding the alias, since a generic Versions file + includes the 2.15 version and the linker uses the first one it sees. */ +#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18) +versioned_symbol (libm, __ieee754_sqrtl, __sqrtl_finite, GLIBC_2_18); +#else +strong_alias(__ieee754_sqrtl, __sqrtl_finite) +#endif diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/local-soft-fp.h b/REORG.TODO/sysdeps/alpha/soft-fp/local-soft-fp.h new file mode 100644 index 0000000000..d562e0829e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/local-soft-fp.h @@ -0,0 +1,55 @@ +#include <stdlib.h> +#include <soft-fp.h> +#include <quad.h> + +/* Helpers for the Ots functions which receive long double arguments + in two integer registers, and return values in $16+$17. */ + +#define AXP_UNPACK_RAW_Q(X, val) \ + do { \ + union _FP_UNION_Q _flo; \ + _flo.longs.a = val##l; \ + _flo.longs.b = val##h; \ + FP_UNPACK_RAW_QP(X, &_flo); \ + } while (0) + +#define AXP_UNPACK_SEMIRAW_Q(X, val) \ + do { \ + union _FP_UNION_Q _flo; \ + _flo.longs.a = val##l; \ + _flo.longs.b = val##h; \ + FP_UNPACK_SEMIRAW_QP(X, &_flo); \ + } while (0) + +#define AXP_UNPACK_Q(X, val) \ + do { \ + AXP_UNPACK_RAW_Q(X, val); \ + _FP_UNPACK_CANONICAL(Q, 2, X); \ + } while (0) + +#define AXP_PACK_RAW_Q(val, X) FP_PACK_RAW_QP(&val##_flo, X) + +#define AXP_PACK_SEMIRAW_Q(val, X) \ + do { \ + _FP_PACK_SEMIRAW(Q, 2, X); \ + AXP_PACK_RAW_Q(val, X); \ + } while (0) + +#define AXP_PACK_Q(val, X) \ + do { \ + _FP_PACK_CANONICAL(Q, 2, X); \ + AXP_PACK_RAW_Q(val, X); \ + } while (0) + +#define AXP_DECL_RETURN_Q(X) union _FP_UNION_Q X##_flo + +/* ??? We don't have a real way to tell the compiler that we're wanting + to return values in $16+$17. Instead use a volatile asm to make sure + that the values are live, and just hope that nothing kills the values + in between here and the end of the function. */ +#define AXP_RETURN_Q(X) \ + do { \ + register long r16 __asm__("16") = X##_flo.longs.a; \ + register long r17 __asm__("17") = X##_flo.longs.b; \ + asm volatile ("" : : "r"(r16), "r"(r17)); \ + } while (0) diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_add.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_add.c new file mode 100644 index 0000000000..7291a730e7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_add.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: addition. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +void +_OtsAddX(long al, long ah, long bl, long bh, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_INIT_ROUNDMODE; + AXP_UNPACK_SEMIRAW_Q(A, a); + AXP_UNPACK_SEMIRAW_Q(B, b); + FP_ADD_Q(C, A, B); + AXP_PACK_SEMIRAW_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmp.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmp.c new file mode 100644 index 0000000000..84498f8059 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmp.c @@ -0,0 +1,63 @@ +/* Software floating-point emulation: comparison. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +static long +internal_equality (long al, long ah, long bl, long bh, long neq) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + long r; + + AXP_UNPACK_RAW_Q(A, a); + AXP_UNPACK_RAW_Q(B, b); + + if ((A_e == _FP_EXPMAX_Q && !_FP_FRAC_ZEROP_2(A)) + || (B_e == _FP_EXPMAX_Q && !_FP_FRAC_ZEROP_2(B))) + { + /* EQ and NE signal invalid operation only if either operand is SNaN. */ + if (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B)) + { + FP_SET_EXCEPTION(FP_EX_INVALID); + FP_HANDLE_EXCEPTIONS; + } + return -1; + } + + r = (A_e == B_e + && _FP_FRAC_EQ_2 (A, B) + && (A_s == B_s || (!A_e && _FP_FRAC_ZEROP_2(A)))); + r ^= neq; + + return r; +} + +long +_OtsEqlX (long al, long ah, long bl, long bh) +{ + return internal_equality (al, ah, bl, bh, 0); +} + +long +_OtsNeqX (long al, long ah, long bl, long bh) +{ + return internal_equality (al, ah, bl, bh, 1); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmpe.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmpe.c new file mode 100644 index 0000000000..d1e950d991 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cmpe.c @@ -0,0 +1,77 @@ +/* Software floating-point emulation: comparison. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +static long +internal_compare (long al, long ah, long bl, long bh) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + long r; + + AXP_UNPACK_RAW_Q(A, a); + AXP_UNPACK_RAW_Q(B, b); + FP_CMP_Q (r, A, B, 2, 2); + + FP_HANDLE_EXCEPTIONS; + + return r; +} + +long +_OtsLssX (long al, long ah, long bl, long bh) +{ + long r = internal_compare (al, ah, bl, bh); + if (r == 2) + return -1; + else + return r < 0; +} + +long +_OtsLeqX (long al, long ah, long bl, long bh) +{ + long r = internal_compare (al, ah, bl, bh); + if (r == 2) + return -1; + else + return r <= 0; +} + +long +_OtsGtrX (long al, long ah, long bl, long bh) +{ + long r = internal_compare (al, ah, bl, bh); + if (r == 2) + return -1; + else + return r > 0; +} + +long +_OtsGeqX (long al, long ah, long bl, long bh) +{ + long r = internal_compare (al, ah, bl, bh); + if (r == 2) + return -1; + else + return r >= 0; +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqux.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqux.c new file mode 100644 index 0000000000..bb6fac00a3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqux.c @@ -0,0 +1,39 @@ +/* Software floating-point emulation: unsigned integer to float conversion. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +/* Should never actually be used, since we've more bits of precision + than the incomming long, but needed for linkage. */ +#undef FP_ROUNDMODE +#define FP_ROUNDMODE FP_RND_ZERO + +void +_OtsCvtQUX (unsigned long a) +{ + FP_DECL_EX; + FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_FROM_INT_Q(C, a, 64, unsigned long); + AXP_PACK_RAW_Q(c, C); + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqx.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqx.c new file mode 100644 index 0000000000..e1d8a7a4ae --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtqx.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: signed integer to float conversion. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +/* Should never actually be used, since we've more bits of precision + than the incomming long, but needed for linkage. */ +#undef FP_ROUNDMODE +#define FP_ROUNDMODE FP_RND_ZERO + +void +_OtsCvtQX (long a) +{ + FP_DECL_EX; + FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_FROM_INT_Q(C, a, 64, unsigned long); + AXP_PACK_RAW_Q(c, C); + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvttx.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvttx.c new file mode 100644 index 0000000000..00e13f3396 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvttx.c @@ -0,0 +1,47 @@ +/* Software floating-point emulation: floating point extension. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" +#include "double.h" + +/* Should never actually be used, since we're extending, but needed + for linkage. */ +#undef FP_ROUNDMODE +#define FP_ROUNDMODE FP_RND_ZERO + +void +_OtsConvertFloatTX(double a) +{ + FP_DECL_EX; + FP_DECL_D(A); + FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_UNPACK_RAW_D(A, a); +#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q + FP_EXTEND(Q,D,4,2,C,A); +#else + FP_EXTEND(Q,D,2,1,C,A); +#endif + AXP_PACK_RAW_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxq.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxq.c new file mode 100644 index 0000000000..eda0074ef0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxq.c @@ -0,0 +1,41 @@ +/* Software floating-point emulation: float to integer conversion. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +long +_OtsCvtXQ (long al, long ah, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); + unsigned long r; + long s; + + /* If bit 3 is set, then integer overflow detection is requested. */ + s = _round & 8 ? 1 : -1; + _round = _round & 3; + + FP_INIT_ROUNDMODE; + AXP_UNPACK_RAW_Q(A, a); + FP_TO_INT_Q(r, A, 64, s); + FP_HANDLE_EXCEPTIONS; + + return r; +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxt.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxt.c new file mode 100644 index 0000000000..59be37e394 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_cvtxt.c @@ -0,0 +1,43 @@ +/* Software floating-point emulation: floating point truncation. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" +#include "double.h" + +double +_OtsConvertFloatXT (long al, long ah, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); + FP_DECL_D(R); + double r; + + FP_INIT_ROUNDMODE; + AXP_UNPACK_SEMIRAW_Q(A, a); +#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q + FP_TRUNC(D,Q,2,4,R,A); +#else + FP_TRUNC(D,Q,1,2,R,A); +#endif + FP_PACK_SEMIRAW_D(r, R); + FP_HANDLE_EXCEPTIONS; + + return r; +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_div.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_div.c new file mode 100644 index 0000000000..1ce38ced05 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_div.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: division. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +void +_OtsDivX(long al, long ah, long bl, long bh, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_INIT_ROUNDMODE; + AXP_UNPACK_Q(A, a); + AXP_UNPACK_Q(B, b); + FP_DIV_Q(C, A, B); + AXP_PACK_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_mul.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_mul.c new file mode 100644 index 0000000000..937c24246d --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_mul.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: multiplication. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +void +_OtsMulX(long al, long ah, long bl, long bh, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_INIT_ROUNDMODE; + AXP_UNPACK_Q(A, a); + AXP_UNPACK_Q(B, b); + FP_MUL_Q(C, A, B); + AXP_PACK_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_nintxq.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_nintxq.c new file mode 100644 index 0000000000..ce3be4fd62 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_nintxq.c @@ -0,0 +1,51 @@ +/* Software floating-point emulation: convert to fortran nearest. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +long +_OtsNintXQ (long al, long ah, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + unsigned long r; + long s; + + /* If bit 3 is set, then integer overflow detection is requested. */ + s = _round & 8 ? 1 : -1; + _round = _round & 3; + + FP_INIT_ROUNDMODE; + AXP_UNPACK_SEMIRAW_Q(A, a); + + /* Build 0.5 * sign(A) */ + B_e = _FP_EXPBIAS_Q; + __FP_FRAC_SET_2 (B, 0, 0); + B_s = A_s; + + FP_ADD_Q(C, A, B); + _FP_FRAC_SRL_2(C, _FP_WORKBITS); + _FP_FRAC_HIGH_RAW_Q(C) &= ~(_FP_W_TYPE)_FP_IMPLBIT_Q; + FP_TO_INT_Q(r, C, 64, s); + if (s > 0 && (_fex &= FP_EX_INVALID)) + FP_HANDLE_EXCEPTIONS; + + return r; +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/ots_sub.c b/REORG.TODO/sysdeps/alpha/soft-fp/ots_sub.c new file mode 100644 index 0000000000..69893f1ea8 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/ots_sub.c @@ -0,0 +1,38 @@ +/* Software floating-point emulation: subtraction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "local-soft-fp.h" + +void +_OtsSubX(long al, long ah, long bl, long bh, long _round) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + AXP_DECL_RETURN_Q(c); + + FP_INIT_ROUNDMODE; + AXP_UNPACK_SEMIRAW_Q(A, a); + AXP_UNPACK_SEMIRAW_Q(B, b); + FP_SUB_Q(C, A, B); + AXP_PACK_SEMIRAW_Q(c, C); + FP_HANDLE_EXCEPTIONS; + + AXP_RETURN_Q(c); +} diff --git a/REORG.TODO/sysdeps/alpha/soft-fp/sfp-machine.h b/REORG.TODO/sysdeps/alpha/soft-fp/sfp-machine.h new file mode 100644 index 0000000000..7935b540db --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/soft-fp/sfp-machine.h @@ -0,0 +1,99 @@ +/* Machine-dependent software floating-point definitions. + Alpha userland IEEE 128-bit version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com), + Jakub Jelinek (jj@ultra.linux.cz) and + David S. Miller (davem@redhat.com). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +/* Alpha Architecture Handbook, 4.7.10.4 sez that we should prefer any + type of NaN in Fb, then Fa. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + R##_c = FP_CLS_NAN; \ + } while (0) + +/* Rounding mode settings. */ +#define FP_RND_NEAREST FE_TONEAREST +#define FP_RND_ZERO FE_TOWARDZERO +#define FP_RND_PINF FE_UPWARD +#define FP_RND_MINF FE_DOWNWARD + +/* Obtain the current rounding mode. It's given as an argument to + all the Ots functions, with 4 meaning "dynamic". */ +#define FP_ROUNDMODE _round + +/* Exception flags. */ +#define FP_EX_INVALID FE_INVALID +#define FP_EX_OVERFLOW FE_OVERFLOW +#define FP_EX_UNDERFLOW FE_UNDERFLOW +#define FP_EX_DIVZERO FE_DIVBYZERO +#define FP_EX_INEXACT FE_INEXACT + +#define _FP_TININESS_AFTER_ROUNDING 1 + +#define FP_INIT_ROUNDMODE \ +do { \ + if (__builtin_expect (_round == 4, 0)) \ + { \ + unsigned long t; \ + __asm__ __volatile__("excb; mf_fpcr %0" : "=f"(t)); \ + _round = (t >> FPCR_ROUND_SHIFT) & 3; \ + } \ +} while (0) + +/* We copy the libm function into libc for soft-fp. */ +extern int __feraiseexcept (int __excepts) attribute_hidden; + +#define FP_HANDLE_EXCEPTIONS \ +do { \ + if (__builtin_expect (_fex, 0)) \ + __feraiseexcept (_fex); \ +} while (0) + +#define FP_TRAPPING_EXCEPTIONS \ + ((__ieee_get_fp_control () & SWCR_ENABLE_MASK) << SWCR_ENABLE_SHIFT) diff --git a/REORG.TODO/sysdeps/alpha/sotruss-lib.c b/REORG.TODO/sysdeps/alpha/sotruss-lib.c new file mode 100644 index 0000000000..01ded0131a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/sotruss-lib.c @@ -0,0 +1,50 @@ +/* Override generic sotruss-lib.c to define actual functions for Alpha. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define HAVE_ARCH_PLTENTER +#define HAVE_ARCH_PLTEXIT + +#include <elf/sotruss-lib.c> + +ElfW(Addr) +la_alpha_gnu_pltenter (ElfW(Sym) *sym __attribute__ ((unused)), + unsigned int ndx __attribute__ ((unused)), + uintptr_t *refcook, uintptr_t *defcook, + La_alpha_regs *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + print_enter (refcook, defcook, symname, + regs->lr_r16, regs->lr_r17, regs->lr_r18, *flags); + + /* No need to copy anything, we will not need the parameters in any case. */ + *framesizep = 0; + + return sym->st_value; +} + +unsigned int +la_alpha_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, + const struct La_alpha_regs *inregs, + struct La_alpha_retval *outregs, const char *symname) +{ + print_exit (refcook, defcook, symname, outregs->lrv_r0); + + return 0; +} diff --git a/REORG.TODO/sysdeps/alpha/stackinfo.h b/REORG.TODO/sysdeps/alpha/stackinfo.h new file mode 100644 index 0000000000..0796dc8e4c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stackinfo.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains a bit of information about the stack allocation + of the processor. */ + +#ifndef _STACKINFO_H +#define _STACKINFO_H 1 + +#include <elf.h> + +/* On Alpha the stack grows down. */ +#define _STACK_GROWS_DOWN 1 + +/* Default to an executable stack. PF_X can be overridden if PT_GNU_STACK is + * present, but it is presumed absent. */ +#define DEFAULT_STACK_PERMS (PF_R|PF_W|PF_X) + +#endif /* stackinfo.h */ diff --git a/REORG.TODO/sysdeps/alpha/start.S b/REORG.TODO/sysdeps/alpha/start.S new file mode 100644 index 0000000000..b149b1fcac --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/start.S @@ -0,0 +1,85 @@ +/* Startup code for Alpha/ELF. + Copyright (C) 1993-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text + .align 3 + .globl _start + .ent _start, 0 + .type _start,@function +_start: + .frame $15, 0, $15 + br gp, 1f +1: ldgp gp, 0(gp) + subq sp, 16, sp + mov 0, $15 + .prologue 0 + + /* Load address of the user's main function. */ + lda a0, main + + ldl a1, 16(sp) /* get argc */ + lda a2, 24(sp) /* get argv */ + + /* Load address of our own entry points to .fini and .init. */ + lda a3, __libc_csu_init + lda a4, __libc_csu_fini + + /* Store address of the shared library termination function. */ + mov v0, a5 + + /* Provide the highest stack address to the user code. */ + stq sp, 0(sp) + + /* Call the user's main function, and exit with its value. + But let the libc call main. */ + jsr ra, __libc_start_main + + /* Die very horribly if exit returns. Call_pal hlt is callable from + kernel mode only; this will result in an illegal instruction trap. */ + call_pal 0 + .end _start + +/* For ECOFF backwards compatibility. */ +weak_alias (_start, __start) + +/* Define a symbol for the first piece of initialized data. */ + .data + .globl __data_start +__data_start: + .weak data_start + data_start = __data_start diff --git a/REORG.TODO/sysdeps/alpha/stpcpy.S b/REORG.TODO/sysdeps/alpha/stpcpy.S new file mode 100644 index 0000000000..62fd0d9e36 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stpcpy.S @@ -0,0 +1,55 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy a null-terminated string from SRC to DST. Return a pointer + to the null-terminator in the source. */ + +#include <sysdep.h> + + .text + +ENTRY(__stpcpy) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + jsr t9, __stxcpy # do the work of the copy + + and t8, 0xf0, t2 # binary search for byte offset of the + and t8, 0xcc, t1 # last byte written. + and t8, 0xaa, t0 + andnot a0, 7, a0 + cmovne t2, 4, t2 + cmovne t1, 2, t1 + cmovne t0, 1, t0 + addq a0, t2, v0 + addq t0, t1, t0 + addq v0, t0, v0 + + ret + + END(__stpcpy) + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/alpha/stpncpy.S b/REORG.TODO/sysdeps/alpha/stpncpy.S new file mode 100644 index 0000000000..62f6a8f2fd --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stpncpy.S @@ -0,0 +1,106 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@tamu.edu) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than COUNT bytes of the null-terminated string from + SRC to DST. If SRC does not cover all of COUNT, the balance is + zeroed. Return the address of the terminating null in DEST, if + any, else DEST + COUNT. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + +ENTRY(__stpncpy) + ldgp gp, 0(pv) +#ifdef PROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + .prologue 1 + + beq a2, $zerocount + jsr t9, __stxncpy # do the work of the copy + + and t8, 0xf0, t3 # binary search for byte offset of the + and t8, 0xcc, t2 # last byte written. + and t8, 0xaa, t1 + andnot a0, 7, v0 + cmovne t3, 4, t3 + cmovne t2, 2, t2 + cmovne t1, 1, t1 + addq v0, t3, v0 + addq t1, t2, t1 + addq v0, t1, v0 + + bne a2, $multiword # do we have full words left? + + .align 3 + zapnot t0, t8, t4 # e0 : was last byte a null? + subq t8, 1, t2 # .. e1 : + addq v0, 1, t5 # e0 : + subq t10, 1, t3 # .. e1 : + or t2, t8, t2 # e0 : clear the bits between the last + or t3, t10, t3 # .. e1 : written byte and the last byte in + andnot t3, t2, t3 # e0 : COUNT + cmovne t4, t5, v0 # .. e1 : if last written wasnt null, inc v0 + zap t0, t3, t0 # e0 : + stq_u t0, 0(a0) # e1 : + ret # .. e1 : + + .align 3 +$multiword: + subq t8, 1, t7 # e0 : clear the final bits in the prev + or t7, t8, t7 # e1 : word + zapnot t0, t7, t0 # e0 : + subq a2, 1, a2 # .. e1 : + stq_u t0, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + + beq a2, 1f # e1 : + blbc a2, 0f # e1 : + + stq_u zero, 0(a0) # e0 : zero one word + subq a2, 1, a2 # .. e1 : + addq a0, 8, a0 # e0 : + beq a2, 1f # .. e1 : + +0: stq_u zero, 0(a0) # e0 : zero two words + subq a2, 2, a2 # .. e1 : + stq_u zero, 8(a0) # e0 : + addq a0, 16, a0 # .. e1 : + bne a2, 0b # e1 : + unop + +1: ldq_u t0, 0(a0) # e0 : clear the leading bits in the final + subq t10, 1, t7 # .. e1 : word + or t7, t10, t7 # e0 : + zap t0, t7, t0 # e1 (stall) + stq_u t0, 0(a0) # e0 : + ret # .. e1 : + +$zerocount: + mov a0, v0 + ret + + END(__stpncpy) + +libc_hidden_def (__stpncpy) +weak_alias (__stpncpy, stpncpy) diff --git a/REORG.TODO/sysdeps/alpha/strcat.S b/REORG.TODO/sysdeps/alpha/strcat.S new file mode 100644 index 0000000000..bffd3e8ee0 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strcat.S @@ -0,0 +1,71 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Append a null-terminated string from SRC to DST. */ + +#include <sysdep.h> + + .text + +ENTRY(strcat) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov a0, v0 # set up return value + + /* Find the end of the string. */ + + ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned) + lda t1, -1(zero) + insqh t1, a0, t1 + andnot a0, 7, a0 + or t1, t0, t0 + cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne t1, $found + +$loop: ldq t0, 8(a0) + addq a0, 8, a0 # addr += 8 + cmpbge zero, t0, t1 + beq t1, $loop + +$found: negq t1, t2 # clear all but least set bit + and t1, t2, t1 + + and t1, 0xf0, t2 # binary search for that set bit + and t1, 0xcc, t3 + and t1, 0xaa, t4 + cmovne t2, 4, t2 + cmovne t3, 2, t3 + cmovne t4, 1, t4 + addq t2, t3, t2 + addq a0, t4, a0 + addq a0, t2, a0 + + /* Now do the append. */ + + mov ra, t9 + jmp $31, __stxcpy + + END(strcat) +libc_hidden_builtin_def (strcat) diff --git a/REORG.TODO/sysdeps/alpha/strchr.S b/REORG.TODO/sysdeps/alpha/strchr.S new file mode 100644 index 0000000000..e3cf75f39f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strchr.S @@ -0,0 +1,94 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@tamu.edu) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return the address of a given character within a null-terminated + string, or null if it is not found. + + This is generally scheduled for the EV5 (got to look out for my own + interests :-), but with EV4 needs in mind. There *should* be no more + stalls for the EV4 than there are for the EV5. +*/ + +#include <sysdep.h> + + .set noreorder + .set noat + +ENTRY(strchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + zapnot a1, 1, a1 # e0 : zero extend the search character + ldq_u t0, 0(a0) # .. e1 : load first quadword + sll a1, 8, t5 # e0 : replicate the search character + andnot a0, 7, v0 # .. e1 : align our loop pointer + or t5, a1, a1 # e0 : + lda t4, -1 # .. e1 : build garbage mask + sll a1, 16, t5 # e0 : + cmpbge zero, t0, t2 # .. e1 : bits set iff byte == zero + mskqh t4, a0, t4 # e0 : + or t5, a1, a1 # .. e1 : + sll a1, 32, t5 # e0 : + cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage + or t5, a1, a1 # e0 : + xor t0, a1, t1 # .. e1 : make bytes == c zero + cmpbge zero, t1, t3 # e0 : bits set iff byte == c + or t2, t3, t0 # e1 : bits set iff char match or zero match + andnot t0, t4, t0 # e0 : clear garbage bits + bne t0, $found # .. e1 (zdb) + +$loop: ldq t0, 8(v0) # e0 : + addq v0, 8, v0 # .. e1 : + nop # e0 : + xor t0, a1, t1 # .. e1 (ev5 data stall) + cmpbge zero, t0, t2 # e0 : bits set iff byte == 0 + cmpbge zero, t1, t3 # .. e1 : bits set iff byte == c + or t2, t3, t0 # e0 : + beq t0, $loop # .. e1 (zdb) + +$found: negq t0, t1 # e0 : clear all but least set bit + and t0, t1, t0 # e1 (stall) + + and t0, t3, t1 # e0 : bit set iff byte was the char + beq t1, $retnull # .. e1 (zdb) + + and t0, 0xf0, t2 # e0 : binary search for that set bit + and t0, 0xcc, t3 # .. e1 : + and t0, 0xaa, t4 # e0 : + cmovne t2, 4, t2 # .. e1 : + cmovne t3, 2, t3 # e0 : + cmovne t4, 1, t4 # .. e1 : + addq t2, t3, t2 # e0 : + addq v0, t4, v0 # .. e1 : + addq v0, t2, v0 # e0 : + ret # .. e1 : + +$retnull: + mov zero, v0 # e0 : + ret # .. e1 : + + END(strchr) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/alpha/strcmp.S b/REORG.TODO/sysdeps/alpha/strcmp.S new file mode 100644 index 0000000000..ae211bb7ae --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strcmp.S @@ -0,0 +1,194 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Bytewise compare two null-terminated strings. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + +ENTRY(strcmp) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jmp AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u t0, 0(a0) # e0 : give cache time to catch up + xor a0, a1, t2 # .. e1 : are s1 and s2 co-aligned? + ldq_u t1, 0(a1) # e0 : + and t2, 7, t2 # .. e1 : + lda t3, -1 # e0 : + bne t2, $unaligned # .. e1 : + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. + t3 == -1. */ + +$aligned: + mskqh t3, a0, t3 # e0 : + nop # .. e1 : + ornot t1, t3, t1 # e0 : + ornot t0, t3, t0 # .. e1 : + cmpbge zero, t1, t7 # e0 : bits set iff null found + bne t7, $eos # e1 (zdb) + + /* Aligned compare main loop. + On entry to this basic block: + t0 == an s1 word. + t1 == an s2 word not containing a null. */ + +$a_loop: + xor t0, t1, t2 # e0 : + bne t2, $wordcmp # .. e1 (zdb) + ldq_u t1, 8(a1) # e0 : + ldq_u t0, 8(a0) # .. e1 : + addq a1, 8, a1 # e0 : + addq a0, 8, a0 # .. e1 : + cmpbge zero, t1, t7 # e0 : + beq t7, $a_loop # .. e1 (zdb) + br $eos # e1 : + + /* The two strings are not co-aligned. Align s1 and cope. */ + +$unaligned: + and a0, 7, t4 # e0 : find s1 misalignment + and a1, 7, t5 # .. e1 : find s2 misalignment + subq a1, t4, a1 # e0 : + + /* If s2 misalignment is larger than s2 misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t8 # .. e1 : + beq t8, $u_head # e1 : + + mskqh t3, t5, t3 # e0 : + ornot t1, t3, t3 # e0 : + cmpbge zero, t3, t7 # e1 : is there a zero? + beq t7, $u_head # e1 : + + /* We've found a zero in the first partial word of s2. Align + our current s1 and s2 words and compare what we've got. */ + + extql t1, t5, t1 # e0 : + extql t0, a0, t0 # e0 : + cmpbge zero, t1, t7 # .. e1 : find that zero again + br $eos # e1 : and finish up + + .align 3 +$u_head: + /* We know just enough now to be able to assemble the first + full word of s2. We can still find a zero at the end of it. + + On entry to this basic block: + t0 == first word of s1 + t1 == first partial word of s2. */ + + ldq_u t2, 8(a1) # e0 : load second partial s2 word + lda t3, -1 # .. e1 : create leading garbage mask + extql t1, a1, t1 # e0 : create first s2 word + mskqh t3, a0, t3 # e0 : + extqh t2, a1, t4 # e0 : + ornot t0, t3, t0 # .. e1 : kill s1 garbage + or t1, t4, t1 # e0 : s2 word now complete + cmpbge zero, t0, t7 # .. e1 : find zero in first s1 word + ornot t1, t3, t1 # e0 : kill s2 garbage + lda t3, -1 # .. e1 : + mskql t3, a1, t3 # e0 : mask for s2[1] bits we have seen + bne t7, $eos # .. e1 : + xor t0, t1, t4 # e0 : compare aligned words + bne t4, $wordcmp # .. e1 (zdb) + or t2, t3, t3 # e0 : + cmpbge zero, t3, t7 # e1 : + bne t7, $u_final # e1 : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned words from s2. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t2 == the unshifted low-bits from the next s2 word. */ + + .align 3 +$u_loop: + extql t2, a1, t3 # e0 : + ldq_u t2, 16(a1) # .. e1 : load next s2 high bits + ldq_u t0, 8(a0) # e0 : load next s1 word + addq a1, 8, a1 # .. e1 : + addq a0, 8, a0 # e0 : + nop # .. e1 : + extqh t2, a1, t1 # e0 : + cmpbge zero, t0, t7 # .. e1 : find zero in current s1 word + or t1, t3, t1 # e0 : + bne t7, $eos # .. e1 : + xor t0, t1, t4 # e0 : compare the words + bne t4, $wordcmp # .. e1 (zdb) + cmpbge zero, t2, t4 # e0 : find zero in next low bits + beq t4, $u_loop # .. e1 (zdb) + + /* We've found a zero in the low bits of the last s2 word. Get + the next s1 word and align them. */ +$u_final: + ldq_u t0, 8(a0) # e1 : + extql t2, a1, t1 # .. e0 : + cmpbge zero, t1, t7 # e0 : + + /* We've found a zero somewhere in a word we just read. + On entry to this basic block: + t0 == s1 word + t1 == s2 word + t7 == cmpbge mask containing the zero. */ + + .align 3 +$eos: + negq t7, t6 # e0 : create bytemask of valid data + and t6, t7, t8 # e1 : + subq t8, 1, t6 # e0 : + or t6, t8, t7 # e1 : + zapnot t0, t7, t0 # e0 : kill the garbage + zapnot t1, t7, t1 # .. e1 : + xor t0, t1, v0 # e0 : and compare + beq v0, $done # .. e1 : + + /* Here we have two differing co-aligned words in t0 & t1. + Bytewise compare them and return (t0 > t1 ? 1 : -1). */ +$wordcmp: + cmpbge t0, t1, t2 # e0 : comparison yields bit mask of ge + cmpbge t1, t0, t3 # .. e1 : + xor t2, t3, t0 # e0 : bits set iff t0/t1 bytes differ + negq t0, t1 # e1 : clear all but least bit + and t0, t1, t0 # e0 : + lda v0, -1 # .. e1 : + and t0, t2, t1 # e0 : was bit set in t0 > t1? + cmovne t1, 1, v0 # .. e1 (zdb) + +$done: + ret # e1 : + + END(strcmp) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/alpha/strcpy.S b/REORG.TODO/sysdeps/alpha/strcpy.S new file mode 100644 index 0000000000..4726630aab --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strcpy.S @@ -0,0 +1,41 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy a null-terminated string from SRC to DST. Return a pointer + to the null-terminator in the source. */ + +#include <sysdep.h> + + .text + +ENTRY(strcpy) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov a0, v0 # set up return value + mov ra, t9 + jmp $31, __stxcpy # do the copy + + END(strcpy) +libc_hidden_builtin_def (strcpy) diff --git a/REORG.TODO/sysdeps/alpha/strlen.S b/REORG.TODO/sysdeps/alpha/strlen.S new file mode 100644 index 0000000000..72aef91e0e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strlen.S @@ -0,0 +1,76 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by David Mosberger (davidm@cs.arizona.edu). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Finds length of a 0-terminated string. Optimized for the Alpha + architecture: + + - memory accessed as aligned quadwords only + - uses cmpbge to compare 8 bytes in parallel + - does binary search to find 0 byte in last quadword (HAKMEM + needed 12 instructions to do this instead of the 8 instructions + that the binary search needs). +*/ + +#include <sysdep.h> + + .set noreorder + .set noat + +ENTRY(strlen) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned) + lda t1, -1(zero) + insqh t1, a0, t1 + andnot a0, 7, v0 + or t1, t0, t0 + nop # dual issue the next two on ev5 + cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne t1, $found + +$loop: ldq t0, 8(v0) + addq v0, 8, v0 # addr += 8 + cmpbge zero, t0, t1 + beq t1, $loop + +$found: negq t1, t2 # clear all but least set bit + and t1, t2, t1 + + and t1, 0xf0, t2 # binary search for that set bit + and t1, 0xcc, t3 + and t1, 0xaa, t4 + cmovne t2, 4, t2 + cmovne t3, 2, t3 + cmovne t4, 1, t4 + addq t2, t3, t2 + addq v0, t4, v0 + addq v0, t2, v0 + nop # dual issue next two on ev4 and ev5 + + subq v0, a0, v0 + ret + + END(strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/alpha/strncat.S b/REORG.TODO/sysdeps/alpha/strncat.S new file mode 100644 index 0000000000..61c4445da3 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strncat.S @@ -0,0 +1,94 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <rth@tamu.edu>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Append no more than COUNT characters from the null-terminated string SRC + to the null-terminated string DST. Always null-terminate the new DST. */ + +#include <sysdep.h> + + .text + +ENTRY(strncat) + ldgp gp, 0(pv) +#ifdef PROF + .set noat + lda AT, _mcount + jsr AT, (AT), _mcount + .set at +#endif + .prologue 1 + + mov a0, v0 # set up return value + beq a2, $zerocount + + /* Find the end of the string. */ + + ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned) + lda t1, -1(zero) + insqh t1, a0, t1 + andnot a0, 7, a0 + or t1, t0, t0 + cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne t1, $found + +$loop: ldq t0, 8(a0) + addq a0, 8, a0 # addr += 8 + cmpbge zero, t0, t1 + beq t1, $loop + +$found: negq t1, t2 # clear all but least set bit + and t1, t2, t1 + + and t1, 0xf0, t2 # binary search for that set bit + and t1, 0xcc, t3 + and t1, 0xaa, t4 + cmovne t2, 4, t2 + cmovne t3, 2, t3 + cmovne t4, 1, t4 + addq t2, t3, t2 + addq a0, t4, a0 + addq a0, t2, a0 + + /* Now do the append. */ + + jsr t9, __stxncpy + + /* Worry about the null termination. */ + + zapnot t0, t8, t1 # was last byte a null? + bne t1, 0f + ret + +0: and t10, 0x80, t1 + bne t1, 1f + + /* Here there are bytes left in the current word. Clear one. */ + addq t10, t10, t10 # end-of-count bit <<= 1 + zap t0, t10, t0 + stq_u t0, 0(a0) + ret + +1: /* Here we must read the next DST word and clear the first byte. */ + ldq_u t0, 8(a0) + zap t0, 1, t0 + stq_u t0, 8(a0) + +$zerocount: + ret + + END(strncat) diff --git a/REORG.TODO/sysdeps/alpha/strncmp.S b/REORG.TODO/sysdeps/alpha/strncmp.S new file mode 100644 index 0000000000..e5a6e0a832 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strncmp.S @@ -0,0 +1,277 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Bytewise compare two null-terminated strings of length no longer than N. */ + +#include <sysdep.h> + + .set noat + .set noreorder + +/* EV6 only predicts one branch per octaword. We'll use these to push + subsequent branches back to the next bundle. This will generally add + a fetch+decode cycle to older machines, so skip in that case. */ +#ifdef __alpha_fix__ +# define ev6_unop unop +#else +# define ev6_unop +#endif + + .text + +ENTRY(strncmp) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + xor a0, a1, t2 # are s1 and s2 co-aligned? + beq a2, $zerolength + ldq_u t0, 0(a0) # load asap to give cache time to catch up + ldq_u t1, 0(a1) + lda t3, -1 + and t2, 7, t2 + srl t3, 1, t6 + and a0, 7, t4 # find s1 misalignment + and a1, 7, t5 # find s2 misalignment + cmovlt a2, t6, a2 # bound neg count to LONG_MAX + addq a1, a2, a3 # s2+count + addq a2, t4, a2 # bias count by s1 misalignment + and a2, 7, t10 # ofs of last byte in s1 last word + srl a2, 3, a2 # remaining full words in s1 count + bne t2, $unaligned + + /* On entry to this basic block: + t0 == the first word of s1. + t1 == the first word of s2. + t3 == -1. */ +$aligned: + mskqh t3, a1, t8 # mask off leading garbage + ornot t1, t8, t1 + ornot t0, t8, t0 + cmpbge zero, t1, t7 # bits set iff null found + beq a2, $eoc # check end of count + bne t7, $eos + beq t10, $ant_loop + + /* Aligned compare main loop. + On entry to this basic block: + t0 == an s1 word. + t1 == an s2 word not containing a null. */ + + .align 4 +$a_loop: + xor t0, t1, t2 # e0 : + bne t2, $wordcmp # .. e1 (zdb) + ldq_u t1, 8(a1) # e0 : + ldq_u t0, 8(a0) # .. e1 : + + subq a2, 1, a2 # e0 : + addq a1, 8, a1 # .. e1 : + addq a0, 8, a0 # e0 : + beq a2, $eoc # .. e1 : + + cmpbge zero, t1, t7 # e0 : + beq t7, $a_loop # .. e1 : + + br $eos + + /* Alternate aligned compare loop, for when there's no trailing + bytes on the count. We have to avoid reading too much data. */ + .align 4 +$ant_loop: + xor t0, t1, t2 # e0 : + ev6_unop + ev6_unop + bne t2, $wordcmp # .. e1 (zdb) + + subq a2, 1, a2 # e0 : + beq a2, $zerolength # .. e1 : + ldq_u t1, 8(a1) # e0 : + ldq_u t0, 8(a0) # .. e1 : + + addq a1, 8, a1 # e0 : + addq a0, 8, a0 # .. e1 : + cmpbge zero, t1, t7 # e0 : + beq t7, $ant_loop # .. e1 : + + br $eos + + /* The two strings are not co-aligned. Align s1 and cope. */ + /* On entry to this basic block: + t0 == the first word of s1. + t1 == the first word of s2. + t3 == -1. + t4 == misalignment of s1. + t5 == misalignment of s2. + t10 == misalignment of s1 end. */ + .align 4 +$unaligned: + /* If s1 misalignment is larger than s2 misalignment, we need + extra startup checks to avoid SEGV. */ + subq a1, t4, a1 # adjust s2 for s1 misalignment + cmpult t4, t5, t9 + subq a3, 1, a3 # last byte of s2 + bic a1, 7, t8 + mskqh t3, t5, t7 # mask garbage in s2 + subq a3, t8, a3 + ornot t1, t7, t7 + srl a3, 3, a3 # remaining full words in s2 count + beq t9, $u_head + + /* Failing that, we need to look for both eos and eoc within the + first word of s2. If we find either, we can continue by + pretending that the next word of s2 is all zeros. */ + lda t2, 0 # next = zero + cmpeq a3, 0, t8 # eoc in the first word of s2? + cmpbge zero, t7, t7 # eos in the first word of s2? + or t7, t8, t8 + bne t8, $u_head_nl + + /* We know just enough now to be able to assemble the first + full word of s2. We can still find a zero at the end of it. + + On entry to this basic block: + t0 == first word of s1 + t1 == first partial word of s2. + t3 == -1. + t10 == ofs of last byte in s1 last word. + t11 == ofs of last byte in s2 last word. */ +$u_head: + ldq_u t2, 8(a1) # load second partial s2 word + subq a3, 1, a3 +$u_head_nl: + extql t1, a1, t1 # create first s2 word + mskqh t3, a0, t8 + extqh t2, a1, t4 + ornot t0, t8, t0 # kill s1 garbage + or t1, t4, t1 # s2 word now complete + cmpbge zero, t0, t7 # find eos in first s1 word + ornot t1, t8, t1 # kill s2 garbage + beq a2, $eoc + subq a2, 1, a2 + bne t7, $eos + mskql t3, a1, t8 # mask out s2[1] bits we have seen + xor t0, t1, t4 # compare aligned words + or t2, t8, t8 + bne t4, $wordcmp + cmpbge zero, t8, t7 # eos in high bits of s2[1]? + cmpeq a3, 0, t8 # eoc in s2[1]? + or t7, t8, t7 + bne t7, $u_final + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned words from s2. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t2 == the unshifted low-bits from the next s2 word. + t10 == ofs of last byte in s1 last word. + t11 == ofs of last byte in s2 last word. */ + .align 4 +$u_loop: + extql t2, a1, t3 # e0 : + ldq_u t2, 16(a1) # .. e1 : load next s2 high bits + ldq_u t0, 8(a0) # e0 : load next s1 word + addq a1, 8, a1 # .. e1 : + + addq a0, 8, a0 # e0 : + subq a3, 1, a3 # .. e1 : + extqh t2, a1, t1 # e0 : + cmpbge zero, t0, t7 # .. e1 : eos in current s1 word + + or t1, t3, t1 # e0 : + beq a2, $eoc # .. e1 : eoc in current s1 word + subq a2, 1, a2 # e0 : + cmpbge zero, t2, t4 # .. e1 : eos in s2[1] + + xor t0, t1, t3 # e0 : compare the words + ev6_unop + ev6_unop + bne t7, $eos # .. e1 : + + cmpeq a3, 0, t5 # e0 : eoc in s2[1] + ev6_unop + ev6_unop + bne t3, $wordcmp # .. e1 : + + or t4, t5, t4 # e0 : eos or eoc in s2[1]. + beq t4, $u_loop # .. e1 (zdb) + + /* We've found a zero in the low bits of the last s2 word. Get + the next s1 word and align them. */ + .align 3 +$u_final: + ldq_u t0, 8(a0) + extql t2, a1, t1 + cmpbge zero, t1, t7 + bne a2, $eos + + /* We've hit end of count. Zero everything after the count + and compare whats left. */ + .align 3 +$eoc: + mskql t0, t10, t0 + mskql t1, t10, t1 + cmpbge zero, t1, t7 + + /* We've found a zero somewhere in a word we just read. + On entry to this basic block: + t0 == s1 word + t1 == s2 word + t7 == cmpbge mask containing the zero. */ + .align 3 +$eos: + negq t7, t6 # create bytemask of valid data + and t6, t7, t8 + subq t8, 1, t6 + or t6, t8, t7 + zapnot t0, t7, t0 # kill the garbage + zapnot t1, t7, t1 + xor t0, t1, v0 # ... and compare + beq v0, $done + + /* Here we have two differing co-aligned words in t0 & t1. + Bytewise compare them and return (t0 > t1 ? 1 : -1). */ + .align 3 +$wordcmp: + cmpbge t0, t1, t2 # comparison yields bit mask of ge + cmpbge t1, t0, t3 + xor t2, t3, t0 # bits set iff t0/t1 bytes differ + negq t0, t1 # clear all but least bit + and t0, t1, t0 + lda v0, -1 + and t0, t2, t1 # was bit set in t0 > t1? + cmovne t1, 1, v0 +$done: + ret + + .align 3 +$zerolength: + clr v0 + ret + + END(strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/alpha/strncpy.S b/REORG.TODO/sysdeps/alpha/strncpy.S new file mode 100644 index 0000000000..96f3973ff7 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strncpy.S @@ -0,0 +1,87 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than COUNT bytes of the null-terminated string from + SRC to DST. If SRC does not cover all of COUNT, the balance is + zeroed. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + +ENTRY(strncpy) + ldgp gp, 0(pv) +#ifdef PROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + .prologue 1 + + mov a0, v0 # set return value now + beq a2, $zerocount + jsr t9, __stxncpy # do the work of the copy + + bne a2, $multiword # do we have full words left? + + .align 3 + subq t8, 1, t2 # e0 : guess not + subq t10, 1, t3 # .. e1 : + or t2, t8, t2 # e0 : clear the bits between the last + or t3, t10, t3 # .. e1 : written byte and the last byte in + andnot t3, t2, t3 # e0 : COUNT + zap t0, t3, t0 # e1 : + stq_u t0, 0(a0) # e0 : + ret # .. e1 : + +$multiword: + subq t8, 1, t7 # e0 : clear the final bits in the prev + or t7, t8, t7 # e1 : word + zapnot t0, t7, t0 # e0 : + subq a2, 1, a2 # .. e1 : + stq_u t0, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + + beq a2, 1f # e1 : + blbc a2, 0f # e1 : + + stq_u zero, 0(a0) # e0 : zero one word + subq a2, 1, a2 # .. e1 : + addq a0, 8, a0 # e0 : + beq a2, 1f # .. e1 : + +0: stq_u zero, 0(a0) # e0 : zero two words + subq a2, 2, a2 # .. e1 : + stq_u zero, 8(a0) # e0 : + addq a0, 16, a0 # .. e1 : + bne a2, 0b # e1 : + unop + +1: ldq_u t0, 0(a0) # e0 : clear the leading bits in the final + subq t10, 1, t7 # .. e1 : word + or t7, t10, t7 # e0 : + zap t0, t7, t0 # e1 (stall) + stq_u t0, 0(a0) # e0 : + +$zerocount: + ret # .. e1 : + + END(strncpy) +libc_hidden_builtin_def (strncpy) diff --git a/REORG.TODO/sysdeps/alpha/strrchr.S b/REORG.TODO/sysdeps/alpha/strrchr.S new file mode 100644 index 0000000000..6b169ab86c --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/strrchr.S @@ -0,0 +1,110 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Return the address of the last occurrence of a given character + within a null-terminated string, or null if it is not found. + + This is generally scheduled for the EV5 (got to look out for my own + interests :-), but with EV4 needs in mind. There are, in fact, fewer + stalls on the EV4 than there are on the EV5. +*/ + +#include <sysdep.h> + + .set noreorder + .set noat + +ENTRY(strrchr) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + + and a1, 0xff, a1 # e0 : zero extend our test character + mov zero, t6 # .. e1 : t6 is last match aligned addr + sll a1, 8, t5 # e0 : replicate our test character + mov zero, t7 # .. e1 : t7 is last match byte compare mask + or t5, a1, a1 # e0 : + ldq_u t0, 0(a0) # .. e1 : load first quadword + sll a1, 16, t5 # e0 : + andnot a0, 7, v0 # .. e1 : align source addr + or t5, a1, a1 # e0 : + lda t4, -1 # .. e1 : build garbage mask + sll a1, 32, t5 # e0 : + cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero + mskqh t4, a0, t4 # e0 : + or t5, a1, a1 # .. e1 : character replication complete + xor t0, a1, t2 # e0 : make bytes == c zero + cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage + cmpbge zero, t2, t3 # e0 : bits set iff byte == c + andnot t1, t4, t1 # .. e1 : clear garbage from null test + andnot t3, t4, t3 # e0 : clear garbage from char test + bne t1, $eos # .. e1 : did we already hit the terminator? + + /* Character search main loop */ +$loop: + ldq t0, 8(v0) # e0 : load next quadword + cmovne t3, v0, t6 # .. e1 : save previous comparisons match + cmovne t3, t3, t7 # e0 : + addq v0, 8, v0 # .. e1 : + xor t0, a1, t2 # e0 : + cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero + cmpbge zero, t2, t3 # e0 : bits set iff byte == c + beq t1, $loop # .. e1 : if we havnt seen a null, loop + + /* Mask out character matches after terminator */ +$eos: + negq t1, t4 # e0 : isolate first null byte match + and t1, t4, t4 # e1 : + subq t4, 1, t5 # e0 : build a mask of the bytes upto... + or t4, t5, t4 # e1 : ... and including the null + + and t3, t4, t3 # e0 : mask out char matches after null + cmovne t3, t3, t7 # .. e1 : save it, if match found + cmovne t3, v0, t6 # e0 : + + /* Locate the address of the last matched character */ + + /* Retain the early exit for the ev4 -- the ev5 mispredict penalty + is 5 cycles -- the same as just falling through. */ + beq t7, $retnull # .. e1 : + + and t7, 0xf0, t2 # e0 : binary search for the high bit set + cmovne t2, t2, t7 # .. e1 (zdb) + cmovne t2, 4, t2 # e0 : + and t7, 0xcc, t1 # .. e1 : + cmovne t1, t1, t7 # e0 : + cmovne t1, 2, t1 # .. e1 : + and t7, 0xaa, t0 # e0 : + cmovne t0, 1, t0 # .. e1 (zdb) + addq t2, t1, t1 # e0 : + addq t6, t0, v0 # .. e1 : add our aligned base ptr to the mix + addq v0, t1, v0 # e0 : + ret # .. e1 : + +$retnull: + mov zero, v0 # e0 : + ret # .. e1 : + + END(strrchr) + +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/alpha/stxcpy.S b/REORG.TODO/sysdeps/alpha/stxcpy.S new file mode 100644 index 0000000000..a3efd9ce48 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stxcpy.S @@ -0,0 +1,294 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy a null-terminated string from SRC to DST. + + This is an internal routine used by strcpy, stpcpy, and strcat. + As such, it uses special linkage conventions to make implementation + of these public functions more efficient. + + On input: + t9 = return address + a0 = DST + a1 = SRC + + On output: + t8 = bitmask (with one bit set) indicating the last byte written + a0 = unaligned address of the last *word* written + + Furthermore, v0, a3-a5, t11, and t12 are untouched. +*/ + +/* This is generally scheduled for the EV5, but should still be pretty + good for the EV4 too. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + .type __stxcpy, @function + .globl __stxcpy + .usepv __stxcpy, no + + cfi_startproc + cfi_return_column (t9) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + .align 3 +stxcpy_aligned: + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # e1 : build a mask against false zero + mskqh t2, a1, t2 # e0 : detection in the src word + mskqh t1, a1, t3 # e0 : + ornot t1, t2, t2 # .. e1 : + mskql t0, a1, t0 # e0 : assemble the first output word + cmpbge zero, t2, t7 # .. e1 : bits set iff null found + or t0, t3, t1 # e0 : + bne t7, $a_eos # .. e1 : + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == a source word not containing a null. */ +$a_loop: + stq_u t1, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + ldq_u t1, 0(a1) # e0 : + addq a1, 8, a1 # .. e1 : + cmpbge zero, t1, t7 # e0 (stall) + beq t7, $a_loop # .. e1 (zdb) + + /* Take care of the final (partial) word store. + On entry to this basic block we have: + t1 == the source word containing the null + t7 == the cmpbge mask that found it. */ +$a_eos: + negq t7, t6 # e0 : find low bit set + and t7, t6, t8 # e1 (stall) + + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t8, 0x80, t6 # e0 : + bne t6, 1f # .. e1 (zdb) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t0, 0(a0) # e0 : + subq t8, 1, t6 # .. e1 : + zapnot t1, t6, t1 # e0 : clear src bytes >= null + or t8, t6, t7 # .. e1 : + zap t0, t7, t0 # e0 : clear dst bytes <= null + or t0, t1, t1 # e1 : + +1: stq_u t1, 0(a0) # e0 : + ret (t9) # .. e1 : + + .align 3 +__stxcpy: + /* Are source and destination co-aligned? */ + xor a0, a1, t0 # e0 : + unop # : + and t0, 7, t0 # e0 : + bne t0, $unaligned # .. e1 : + + /* We are co-aligned; take care of a partial first word. */ + ldq_u t1, 0(a1) # e0 : load first src word + and a0, 7, t0 # .. e1 : take care not to load a word ... + addq a1, 8, a1 # e0 : + beq t0, stxcpy_aligned # .. e1 : ... if we wont need it + ldq_u t0, 0(a0) # e0 : + br stxcpy_aligned # .. e1 : + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 3 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, for masking back in, if needed else 0 + t1 == the low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # e0 : + addq a1, 8, a1 # .. e1 : + + extql t1, a1, t1 # e0 : + extqh t2, a1, t4 # e0 : + mskql t0, a0, t0 # e0 : + or t1, t4, t1 # .. e1 : + mskqh t1, a0, t1 # e0 : + or t0, t1, t1 # e1 : + + or t1, t6, t6 # e0 : + cmpbge zero, t6, t7 # .. e1 : + lda t6, -1 # e0 : for masking just below + bne t7, $u_final # .. e1 : + + mskql t6, a1, t6 # e0 : mask out the bits we have + or t6, t2, t2 # e1 : already extracted before + cmpbge zero, t2, t7 # e0 : testing eos + bne t7, $u_late_head_exit # .. e1 (zdb) + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + stq_u t1, 0(a0) # e0 : store first output word + addq a0, 8, a0 # .. e1 : + extql t2, a1, t0 # e0 : position ho-bits of lo word + ldq_u t2, 8(a1) # .. e1 : read next high-order source word + addq a1, 8, a1 # e0 : + cmpbge zero, t2, t7 # .. e1 : + nop # e0 : + bne t7, $u_eos # .. e1 : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + extqh t2, a1, t1 # e0 : extract high bits for current word + addq a1, 8, a1 # .. e1 : + extql t2, a1, t3 # e0 : extract low bits for next time + addq a0, 8, a0 # .. e1 : + or t0, t1, t1 # e0 : current dst word now complete + ldq_u t2, 0(a1) # .. e1 : load high word for next time + stq_u t1, -8(a0) # e0 : save the current word + mov t3, t0 # .. e1 : + cmpbge zero, t2, t7 # e0 : test new word for eos + beq t7, $u_loop # .. e1 : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + extqh t2, a1, t1 # e0 : + or t0, t1, t1 # e1 : first (partial) source word complete + + cmpbge zero, t1, t7 # e0 : is the null in this first bit? + bne t7, $u_final # .. e1 (zdb) + +$u_late_head_exit: + stq_u t1, 0(a0) # e0 : the null was in the high-order bits + addq a0, 8, a0 # .. e1 : + extql t2, a1, t1 # e0 : + cmpbge zero, t1, t7 # .. e1 : + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t1 == assembled source word + t7 == cmpbge mask that found the null. */ +$u_final: + negq t7, t6 # e0 : isolate low bit set + and t6, t7, t8 # e1 : + + and t8, 0x80, t6 # e0 : avoid dest word load if we can + bne t6, 1f # .. e1 (zdb) + + ldq_u t0, 0(a0) # e0 : + subq t8, 1, t6 # .. e1 : + or t6, t8, t7 # e0 : + zapnot t1, t6, t1 # .. e1 : kill source bytes >= null + zap t0, t7, t0 # e0 : kill dest bytes <= null + or t0, t1, t1 # e1 : + +1: stq_u t1, 0(a0) # e0 : + ret (t9) # .. e1 : + + /* Unaligned copy entry point. */ + .align 3 +$unaligned: + + ldq_u t1, 0(a1) # e0 : load first source word + + and a0, 7, t4 # .. e1 : find dest misalignment + and a1, 7, t5 # e0 : find src misalignment + + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + + mov zero, t0 # .. e1 : + mov zero, t6 # e0 : + beq t4, 1f # .. e1 : + ldq_u t0, 0(a0) # e0 : + lda t6, -1 # .. e1 : + mskql t6, a0, t6 # e0 : +1: + subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t8 # e0 : + beq t8, $u_head # .. e1 (zdb) + + lda t2, -1 # e1 : mask out leading garbage in source + mskqh t2, t5, t2 # e0 : + nop # e0 : + ornot t1, t2, t3 # .. e1 : + cmpbge zero, t3, t7 # e0 : is there a zero? + beq t7, $u_head # .. e1 (zdb) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # e0 : + + negq t7, t6 # .. e1 : build bitmask of bytes <= zero + and t6, t7, t8 # e0 : + and a1, 7, t5 # .. e1 : + subq t8, 1, t6 # e0 : + or t6, t8, t7 # e1 : + srl t8, t5, t8 # e0 : adjust final null return value + + zapnot t2, t7, t2 # .. e1 : prepare source word; mirror changes + and t1, t2, t1 # e1 : to source validity mask + extql t2, a1, t2 # .. e0 : + extql t1, a1, t1 # e0 : + + andnot t0, t2, t0 # .. e1 : zero place for source to reside + or t0, t1, t1 # e1 : and put it there + stq_u t1, 0(a0) # .. e0 : + ret (t9) + + cfi_endproc diff --git a/REORG.TODO/sysdeps/alpha/stxncpy.S b/REORG.TODO/sysdeps/alpha/stxncpy.S new file mode 100644 index 0000000000..718a37ad0a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/stxncpy.S @@ -0,0 +1,352 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy no more than COUNT bytes of the null-terminated string from + SRC to DST. + + This is an internal routine used by strncpy, stpncpy, and strncat. + As such, it uses special linkage conventions to make implementation + of these public functions more efficient. + + On input: + t9 = return address + a0 = DST + a1 = SRC + a2 = COUNT + + Furthermore, COUNT may not be zero. + + On output: + t0 = last word written + t8 = bitmask (with one bit set) indicating the last byte written + t10 = bitmask (with one bit set) indicating the byte position of + the end of the range specified by COUNT + a0 = unaligned address of the last *word* written + a2 = the number of full words left in COUNT + + Furthermore, v0, a3-a5, t11, and t12 are untouched. +*/ + + +/* This is generally scheduled for the EV5, but should still be pretty + good for the EV4 too. */ + +#include <sysdep.h> + + .set noat + .set noreorder + + .text + .type __stxncpy, @function + .globl __stxncpy + .usepv __stxncpy, no + + cfi_startproc + cfi_return_column (t9) + + /* On entry to this basic block: + t0 == the first destination word for masking back in + t1 == the first source word. */ + .align 3 +stxncpy_aligned: + /* Create the 1st output word and detect 0's in the 1st input word. */ + lda t2, -1 # e1 : build a mask against false zero + mskqh t2, a1, t2 # e0 : detection in the src word + mskqh t1, a1, t3 # e0 : + ornot t1, t2, t2 # .. e1 : + mskql t0, a1, t0 # e0 : assemble the first output word + cmpbge zero, t2, t7 # .. e1 : bits set iff null found + or t0, t3, t0 # e0 : + beq a2, $a_eoc # .. e1 : + bne t7, $a_eos # .. e1 : + + /* On entry to this basic block: + t0 == a source word not containing a null. */ +$a_loop: + stq_u t0, 0(a0) # e0 : + addq a0, 8, a0 # .. e1 : + ldq_u t0, 0(a1) # e0 : + addq a1, 8, a1 # .. e1 : + subq a2, 1, a2 # e0 : + cmpbge zero, t0, t7 # .. e1 (stall) + beq a2, $a_eoc # e1 : + beq t7, $a_loop # e1 : + + /* Take care of the final (partial) word store. At this point + the end-of-count bit is set in t7 iff it applies. + + On entry to this basic block we have: + t0 == the source word containing the null + t7 == the cmpbge mask that found it. */ +$a_eos: + negq t7, t8 # e0 : find low bit set + and t7, t8, t8 # e1 (stall) + + /* For the sake of the cache, don't read a destination word + if we're not going to need it. */ + and t8, 0x80, t6 # e0 : + bne t6, 1f # .. e1 (zdb) + + /* We're doing a partial word store and so need to combine + our source and original destination words. */ + ldq_u t1, 0(a0) # e0 : + subq t8, 1, t6 # .. e1 : + or t8, t6, t7 # e0 : + unop # + zapnot t0, t7, t0 # e0 : clear src bytes > null + zap t1, t7, t1 # .. e1 : clear dst bytes <= null + or t0, t1, t0 # e1 : + +1: stq_u t0, 0(a0) # e0 : + ret (t9) # e1 : + + /* Add the end-of-count bit to the eos detection bitmask. */ +$a_eoc: + or t10, t7, t7 + br $a_eos + + .align 3 +__stxncpy: + /* Are source and destination co-aligned? */ + lda t2, -1 + xor a0, a1, t1 + srl t2, 1, t2 + and a0, 7, t0 # find dest misalignment + cmovlt a2, t2, a2 # bound neg count to LONG_MAX + and t1, 7, t1 + addq a2, t0, a2 # bias count by dest misalignment + subq a2, 1, a2 + and a2, 7, t2 + srl a2, 3, a2 # a2 = loop counter = (count - 1)/8 + addq zero, 1, t10 + sll t10, t2, t10 # t10 = bitmask of last count byte + bne t1, $unaligned + + /* We are co-aligned; take care of a partial first word. */ + + ldq_u t1, 0(a1) # e0 : load first src word + addq a1, 8, a1 # .. e1 : + + beq t0, stxncpy_aligned # avoid loading dest word if not needed + ldq_u t0, 0(a0) # e0 : + br stxncpy_aligned # .. e1 : + + +/* The source and destination are not co-aligned. Align the destination + and cope. We have to be very careful about not reading too much and + causing a SEGV. */ + + .align 3 +$u_head: + /* We know just enough now to be able to assemble the first + full source word. We can still find a zero at the end of it + that prevents us from outputting the whole thing. + + On entry to this basic block: + t0 == the first dest word, unmasked + t1 == the shifted low bits of the first source word + t6 == bytemask that is -1 in dest word bytes */ + + ldq_u t2, 8(a1) # e0 : load second src word + addq a1, 8, a1 # .. e1 : + mskql t0, a0, t0 # e0 : mask trailing garbage in dst + extqh t2, a1, t4 # e0 : + or t1, t4, t1 # e1 : first aligned src word complete + mskqh t1, a0, t1 # e0 : mask leading garbage in src + or t0, t1, t0 # e0 : first output word complete + or t0, t6, t6 # e1 : mask original data for zero test + cmpbge zero, t6, t7 # e0 : + beq a2, $u_eocfin # .. e1 : + lda t6, -1 # e0 : + bne t7, $u_final # .. e1 : + + mskql t6, a1, t6 # e0 : mask out bits already seen + nop # .. e1 : + stq_u t0, 0(a0) # e0 : store first output word + or t6, t2, t2 # .. e1 : + cmpbge zero, t2, t7 # e0 : find nulls in second partial + addq a0, 8, a0 # .. e1 : + subq a2, 1, a2 # e0 : + bne t7, $u_late_head_exit # .. e1 : + + /* Finally, we've got all the stupid leading edge cases taken care + of and we can set up to enter the main loop. */ + + extql t2, a1, t1 # e0 : position hi-bits of lo word + beq a2, $u_eoc # .. e1 : + ldq_u t2, 8(a1) # e0 : read next high-order source word + addq a1, 8, a1 # .. e1 : + extqh t2, a1, t0 # e0 : position lo-bits of hi word + cmpbge zero, t2, t7 # .. e1 : test new word for eos + nop # e0 : + bne t7, $u_eos # .. e1 : + + /* Unaligned copy main loop. In order to avoid reading too much, + the loop is structured to detect zeros in aligned source words. + This has, unfortunately, effectively pulled half of a loop + iteration out into the head and half into the tail, but it does + prevent nastiness from accumulating in the very thing we want + to run as fast as possible. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word + + We further know that t2 does not contain a null terminator. */ + + .align 3 +$u_loop: + or t0, t1, t0 # e0 : current dst word now complete + subq a2, 1, a2 # .. e1 : decrement word count + stq_u t0, 0(a0) # e0 : save the current word + addq a0, 8, a0 # .. e1 : + extql t2, a1, t1 # e0 : extract high bits for next time + beq a2, $u_eoc # .. e1 : + ldq_u t2, 8(a1) # e0 : load high word for next time + addq a1, 8, a1 # .. e1 : + nop # e0 : + cmpbge zero, t2, t7 # .. e1 : test new word for eos + extqh t2, a1, t0 # e0 : extract low bits for current word + beq t7, $u_loop # .. e1 : + + /* We've found a zero somewhere in the source word we just read. + If it resides in the lower half, we have one (probably partial) + word to write out, and if it resides in the upper half, we + have one full and one partial word left to write out. + + On entry to this basic block: + t0 == the shifted low-order bits from the current source word + t1 == the shifted high-order bits from the previous source word + t2 == the unshifted current source word. */ +$u_eos: + or t0, t1, t0 # e0 : first (partial) source word complete + cmpbge zero, t0, t7 # e0 : is the null in this first bit? + bne t7, $u_final # .. e1 (zdb) + + stq_u t0, 0(a0) # e0 : the null was in the high-order bits + addq a0, 8, a0 # .. e1 : + subq a2, 1, a2 # e0 : + +$u_late_head_exit: + extql t2, a1, t0 # e0 : + cmpbge zero, t0, t7 # e0 : + or t7, t10, t6 # e1 : + cmoveq a2, t6, t7 # e0 : + + /* Take care of a final (probably partial) result word. + On entry to this basic block: + t0 == assembled source word + t7 == cmpbge mask that found the null. */ +$u_final: + negq t7, t6 # e0 : isolate low bit set + and t6, t7, t8 # e1 : + + and t8, 0x80, t6 # e0 : avoid dest word load if we can + bne t6, 1f # .. e1 (zdb) + + ldq_u t1, 0(a0) # e0 : + subq t8, 1, t6 # .. e1 : + or t6, t8, t7 # e0 : + zapnot t0, t7, t0 # .. e1 : kill source bytes > null + zap t1, t7, t1 # e0 : kill dest bytes <= null + or t0, t1, t0 # e1 : + +1: stq_u t0, 0(a0) # e0 : + ret (t9) # .. e1 : + + /* Got to end-of-count before end of string. + On entry to this basic block: + t1 == the shifted high-order bits from the previous source word */ +$u_eoc: + and a1, 7, t6 # e1 : + sll t10, t6, t6 # e0 : + and t6, 0xff, t6 # e0 : + bne t6, 1f # e1 : avoid src word load if we can + + ldq_u t2, 8(a1) # e0 : load final src word + nop # .. e1 : + extqh t2, a1, t0 # e0 : extract high bits for last word + or t1, t0, t1 # e1 : + +1: cmpbge zero, t1, t7 + mov t1, t0 + +$u_eocfin: # end-of-count, final word + or t10, t7, t7 + br $u_final + + /* Unaligned copy entry point. */ + .align 3 +$unaligned: + + ldq_u t1, 0(a1) # e0 : load first source word + + and a0, 7, t4 # .. e1 : find dest misalignment + and a1, 7, t5 # e0 : find src misalignment + + /* Conditionally load the first destination word and a bytemask + with 0xff indicating that the destination byte is sacrosanct. */ + + mov zero, t0 # .. e1 : + mov zero, t6 # e0 : + beq t4, 1f # .. e1 : + ldq_u t0, 0(a0) # e0 : + lda t6, -1 # .. e1 : + mskql t6, a0, t6 # e0 : +1: + subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr + + /* If source misalignment is larger than dest misalignment, we need + extra startup checks to avoid SEGV. */ + + cmplt t4, t5, t8 # e1 : + extql t1, a1, t1 # .. e0 : shift src into place + lda t2, -1 # e0 : for creating masks later + beq t8, $u_head # e1 : + + mskqh t2, t5, t2 # e0 : begin src byte validity mask + cmpbge zero, t1, t7 # .. e1 : is there a zero? + extql t2, a1, t2 # e0 : + or t7, t10, t5 # .. e1 : test for end-of-count too + cmpbge zero, t2, t3 # e0 : + cmoveq a2, t5, t7 # .. e1 : + andnot t7, t3, t7 # e0 : + beq t7, $u_head # .. e1 (zdb) + + /* At this point we've found a zero in the first partial word of + the source. We need to isolate the valid source data and mask + it into the original destination data. (Incidentally, we know + that we'll need at least one byte of that original dest word.) */ + + ldq_u t0, 0(a0) # e0 : + negq t7, t6 # .. e1 : build bitmask of bytes <= zero + mskqh t1, t4, t1 # e0 : + and t6, t7, t8 # .. e1 : + subq t8, 1, t6 # e0 : + or t6, t8, t7 # e1 : + + zapnot t2, t7, t2 # e0 : prepare source word; mirror changes + zapnot t1, t7, t1 # .. e1 : to source validity mask + + andnot t0, t2, t0 # e0 : zero place for source to reside + or t0, t1, t0 # e1 : and put it there + stq_u t0, 0(a0) # e0 : + ret (t9) # .. e1 : + + cfi_endproc diff --git a/REORG.TODO/sysdeps/alpha/sub_n.S b/REORG.TODO/sysdeps/alpha/sub_n.S new file mode 100644 index 0000000000..bc529e490e --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/sub_n.S @@ -0,0 +1,118 @@ + # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_sub_n + .ent __mpn_sub_n +__mpn_sub_n: + .frame $30,0,$26,0 + + ldq $3,0($17) + ldq $4,0($18) + + subq $19,1,$19 + and $19,4-1,$2 # number of limbs in first loop + bis $31,$31,$0 + beq $2,.L0 # if multiple of 4 limbs, skip first loop + + subq $19,$2,$19 + +.Loop0: subq $2,1,$2 + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + + addq $17,8,$17 + addq $18,8,$18 + bis $5,$5,$3 + bis $6,$6,$4 + addq $16,8,$16 + bne $2,.Loop0 + +.L0: beq $19,.Lend + + .align 3 +.Loop: subq $19,4,$19 + + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + + ldq $3,16($17) + addq $6,$0,$6 + ldq $4,16($18) + cmpult $6,$0,$1 + subq $5,$6,$6 + cmpult $5,$6,$0 + stq $6,8($16) + or $0,$1,$0 + + ldq $5,24($17) + addq $4,$0,$4 + ldq $6,24($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,16($16) + or $0,$1,$0 + + ldq $3,32($17) + addq $6,$0,$6 + ldq $4,32($18) + cmpult $6,$0,$1 + subq $5,$6,$6 + cmpult $5,$6,$0 + stq $6,24($16) + or $0,$1,$0 + + addq $17,32,$17 + addq $18,32,$18 + addq $16,32,$16 + bne $19,.Loop + +.Lend: addq $4,$0,$4 + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + ret $31,($26),1 + + .end __mpn_sub_n diff --git a/REORG.TODO/sysdeps/alpha/submul_1.S b/REORG.TODO/sysdeps/alpha/submul_1.S new file mode 100644 index 0000000000..020866733a --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/submul_1.S @@ -0,0 +1,90 @@ + # Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and + # subtract the result from a second limb vector. + + # Copyright (C) 1992-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_submul_1 + .ent __mpn_submul_1 2 +__mpn_submul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + umulh $2,$19,$0 # $0 = prod_high + beq $18,.Lend1 # jump if size was == 1 + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + subq $5,$3,$3 + cmpult $5,$3,$4 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + beq $18,.Lend2 # jump if size was == 2 + + .align 3 +.Loop: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + addq $5,$0,$0 # combine carries + bne $18,.Loop + +.Lend2: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $5,$0,$0 # combine carries + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +.Lend1: subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $0,$5,$0 + ret $31,($26),1 + + .end __mpn_submul_1 diff --git a/REORG.TODO/sysdeps/alpha/tininess.h b/REORG.TODO/sysdeps/alpha/tininess.h new file mode 100644 index 0000000000..1db37790f8 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/tininess.h @@ -0,0 +1 @@ +#define TININESS_AFTER_ROUNDING 1 diff --git a/REORG.TODO/sysdeps/alpha/tls-macros.h b/REORG.TODO/sysdeps/alpha/tls-macros.h new file mode 100644 index 0000000000..00489c289f --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/tls-macros.h @@ -0,0 +1,25 @@ +/* Macros to support TLS testing in times of missing compiler support. */ + +extern void *__tls_get_addr (void *); + +# define TLS_GD(x) \ + ({ register void *__gp asm ("$29"); void *__result; \ + asm ("lda %0, " #x "($gp) !tlsgd" : "=r" (__result) : "r"(__gp)); \ + __tls_get_addr (__result); }) + +# define TLS_LD(x) \ + ({ register void *__gp asm ("$29"); void *__result; \ + asm ("lda %0, " #x "($gp) !tlsldm" : "=r" (__result) : "r"(__gp)); \ + __result = __tls_get_addr (__result); \ + asm ("lda %0, " #x "(%0) !dtprel" : "+r" (__result)); \ + __result; }) + +# define TLS_IE(x) \ + ({ register void *__gp asm ("$29"); long ofs; \ + asm ("ldq %0, " #x "($gp) !gottprel" : "=r"(ofs) : "r"(__gp)); \ + __builtin_thread_pointer () + ofs; }) + +# define TLS_LE(x) \ + ({ void *__result = __builtin_thread_pointer (); \ + asm ("lda %0, " #x "(%0) !tprel" : "+r" (__result)); \ + __result; }) diff --git a/REORG.TODO/sysdeps/alpha/tst-audit.h b/REORG.TODO/sysdeps/alpha/tst-audit.h new file mode 100644 index 0000000000..042a8c7718 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/tst-audit.h @@ -0,0 +1,24 @@ +/* Definitions for testing PLT entry/exit auditing. Alpha version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define pltenter la_alpha_gnu_pltenter +#define pltexit la_alpha_gnu_pltexit +#define La_regs La_alpha_regs +#define La_retval La_alpha_retval +#define int_retval lrv_r0 diff --git a/REORG.TODO/sysdeps/alpha/udiv_qrnnd.S b/REORG.TODO/sysdeps/alpha/udiv_qrnnd.S new file mode 100644 index 0000000000..899b445641 --- /dev/null +++ b/REORG.TODO/sysdeps/alpha/udiv_qrnnd.S @@ -0,0 +1,159 @@ + # Alpha 21064 __udiv_qrnnd + + # Copyright (C) 1992-2017 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + + .set noreorder + .set noat + + .text + +LEAF(__udiv_qrnnd, 0) +#ifdef PROF + ldgp gp, 0(pv) + lda AT, _mcount + jsr AT, (AT), _mcount + .prologue 1 +#else + .prologue 0 +#endif + +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 + + ldiq cnt,16 + blt d,$largedivisor + +$loop1: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop1 + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addq $5,$6,$5 + +$loop2: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop2 + + addq n1,n1,n1 + addq $4,n1,n1 + bne $6,$Odd + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$Odd: + /* q' in n0. r' in n1 */ + addq n1,n0,n1 + + cmpult n1,n0,tmp # tmp := carry from addq + subq n1,d,AT + addq n0,tmp,n0 + cmovne tmp,AT,n1 + + cmpult n1,d,tmp + addq n0,1,AT + cmoveq tmp,AT,n0 + subq n1,d,AT + cmoveq tmp,AT,n1 + + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + + .end __udiv_qrnnd |