diff options
Diffstat (limited to 'REORG.TODO/sysdeps/aarch64')
131 files changed, 10834 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/aarch64/Implies b/REORG.TODO/sysdeps/aarch64/Implies new file mode 100644 index 0000000000..e5adf4d63c --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/Implies @@ -0,0 +1,6 @@ +wordsize-64 +ieee754/ldbl-128 +ieee754/dbl-64/wordsize-64 +ieee754/dbl-64 +ieee754/flt-32 +aarch64/soft-fp diff --git a/REORG.TODO/sysdeps/aarch64/Makefile b/REORG.TODO/sysdeps/aarch64/Makefile new file mode 100644 index 0000000000..562c1373ae --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/Makefile @@ -0,0 +1,18 @@ +long-double-fcts = yes + +ifeq ($(subdir),debug) +CFLAGS-backtrace.c += -funwind-tables +endif + +ifeq ($(subdir),elf) +sysdep-dl-routines += tlsdesc dl-tlsdesc +gen-as-const-headers += dl-link.sym +endif + +ifeq ($(subdir),csu) +gen-as-const-headers += tlsdesc.sym +endif + +ifeq ($(subdir),gmon) +CFLAGS-mcount.c += -mgeneral-regs-only +endif diff --git a/REORG.TODO/sysdeps/aarch64/Versions b/REORG.TODO/sysdeps/aarch64/Versions new file mode 100644 index 0000000000..e1aa44f33d --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/Versions @@ -0,0 +1,5 @@ +libc { + GLIBC_2.18 { + _mcount; + } +} diff --git a/REORG.TODO/sysdeps/aarch64/__longjmp.S b/REORG.TODO/sysdeps/aarch64/__longjmp.S new file mode 100644 index 0000000000..c9056e63e7 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/__longjmp.S @@ -0,0 +1,118 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <jmpbuf-offsets.h> +#include <stap-probe.h> + +/* __longjmp(jmpbuf, val) */ + +ENTRY (__longjmp) + cfi_def_cfa(x0, 0) + cfi_offset(x19, JB_X19<<3) + cfi_offset(x20, JB_X20<<3) + cfi_offset(x21, JB_X21<<3) + cfi_offset(x22, JB_X22<<3) + cfi_offset(x23, JB_X23<<3) + cfi_offset(x24, JB_X24<<3) + cfi_offset(x25, JB_X25<<3) + cfi_offset(x26, JB_X26<<3) + cfi_offset(x27, JB_X27<<3) + cfi_offset(x28, JB_X28<<3) + cfi_offset(x29, JB_X29<<3) + cfi_offset(x30, JB_LR<<3) + + cfi_offset( d8, JB_D8<<3) + cfi_offset( d9, JB_D9<<3) + cfi_offset(d10, JB_D10<<3) + cfi_offset(d11, JB_D11<<3) + cfi_offset(d12, JB_D12<<3) + cfi_offset(d13, JB_D13<<3) + cfi_offset(d14, JB_D14<<3) + cfi_offset(d15, JB_D15<<3) + + DELOUSE (0) + + ldp x19, x20, [x0, #JB_X19<<3] + ldp x21, x22, [x0, #JB_X21<<3] + ldp x23, x24, [x0, #JB_X23<<3] + ldp x25, x26, [x0, #JB_X25<<3] + ldp x27, x28, [x0, #JB_X27<<3] +#ifdef PTR_DEMANGLE + ldp x29, x4, [x0, #JB_X29<<3] + PTR_DEMANGLE (30, 4, 3, 2) +#else + ldp x29, x30, [x0, #JB_X29<<3] +#endif + /* longjmp probe takes 3 arguments, address of jump buffer as + first argument (8@x0), return value as second argument (-4@x1), + and target address (8@x30), respectively. */ + LIBC_PROBE (longjmp, 3, 8@x0, -4@x1, 8@x30) + ldp d8, d9, [x0, #JB_D8<<3] + ldp d10, d11, [x0, #JB_D10<<3] + ldp d12, d13, [x0, #JB_D12<<3] + ldp d14, d15, [x0, #JB_D14<<3] + + /* Originally this was implemented with a series of + .cfi_restore() directives. + + The theory was that cfi_restore should revert to previous + frame value is the same as the current value. In practice + this doesn't work, even after cfi_restore() gdb continues + to try to recover a previous frame value offset from x0, + which gets stuffed after a few more instructions. The + cfi_same_value() mechanism appears to work fine. */ + + cfi_same_value(x19) + cfi_same_value(x20) + cfi_same_value(x21) + cfi_same_value(x22) + cfi_same_value(x23) + cfi_same_value(x24) + cfi_same_value(x25) + cfi_same_value(x26) + cfi_same_value(x27) + cfi_same_value(x28) + cfi_same_value(x29) + cfi_same_value(x30) + cfi_same_value(d8) + cfi_same_value(d9) + cfi_same_value(d10) + cfi_same_value(d11) + cfi_same_value(d12) + cfi_same_value(d13) + cfi_same_value(d14) + cfi_same_value(d15) +#ifdef PTR_DEMANGLE + ldr x4, [x0, #JB_SP<<3] + PTR_DEMANGLE (5, 4, 3, 2) +#else + ldr x5, [x0, #JB_SP<<3] +#endif + mov sp, x5 + + /* longjmp_target probe takes 3 arguments, address of jump buffer + as first argument (8@x0), return value as second argument (-4@x1), + and target address (8@x30), respectively. */ + LIBC_PROBE (longjmp_target, 3, 8@x0, -4@x1, 8@x30) + cmp x1, #0 + mov x0, #1 + csel x0, x1, x0, ne + /* Use br instead of ret because ret is guaranteed to mispredict */ + br x30 +END (__longjmp) diff --git a/REORG.TODO/sysdeps/aarch64/abort-instr.h b/REORG.TODO/sysdeps/aarch64/abort-instr.h new file mode 100644 index 0000000000..8b1c40b57d --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/abort-instr.h @@ -0,0 +1 @@ +#define ABORT_INSTRUCTION asm ("brk\t#1000") diff --git a/REORG.TODO/sysdeps/aarch64/atomic-machine.h b/REORG.TODO/sysdeps/aarch64/atomic-machine.h new file mode 100644 index 0000000000..eb59a5b94c --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/atomic-machine.h @@ -0,0 +1,170 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _AARCH64_ATOMIC_MACHINE_H +#define _AARCH64_ATOMIC_MACHINE_H 1 + +#include <stdint.h> + +typedef int8_t atomic8_t; +typedef int16_t atomic16_t; +typedef int32_t atomic32_t; +typedef int64_t atomic64_t; + +typedef uint8_t uatomic8_t; +typedef uint16_t uatomic16_t; +typedef uint32_t uatomic32_t; +typedef uint64_t uatomic64_t; + +typedef intptr_t atomicptr_t; +typedef uintptr_t uatomicptr_t; +typedef intmax_t atomic_max_t; +typedef uintmax_t uatomic_max_t; + +#define __HAVE_64B_ATOMICS 1 +#define USE_ATOMIC_COMPILER_BUILTINS 1 +#define ATOMIC_EXCHANGE_USES_CAS 0 + +/* Compare and exchange. + For all "bool" routines, we return FALSE if exchange succesful. */ + +# define __arch_compare_and_exchange_bool_8_int(mem, newval, oldval, model) \ + ({ \ + typeof (*mem) __oldval = (oldval); \ + !__atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0, \ + model, __ATOMIC_RELAXED); \ + }) + +# define __arch_compare_and_exchange_bool_16_int(mem, newval, oldval, model) \ + ({ \ + typeof (*mem) __oldval = (oldval); \ + !__atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0, \ + model, __ATOMIC_RELAXED); \ + }) + +# define __arch_compare_and_exchange_bool_32_int(mem, newval, oldval, model) \ + ({ \ + typeof (*mem) __oldval = (oldval); \ + !__atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0, \ + model, __ATOMIC_RELAXED); \ + }) + +# define __arch_compare_and_exchange_bool_64_int(mem, newval, oldval, model) \ + ({ \ + typeof (*mem) __oldval = (oldval); \ + !__atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0, \ + model, __ATOMIC_RELAXED); \ + }) + +# define __arch_compare_and_exchange_val_8_int(mem, newval, oldval, model) \ + ({ \ + typeof (*mem) __oldval = (oldval); \ + __atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0, \ + model, __ATOMIC_RELAXED); \ + __oldval; \ + }) + +# define __arch_compare_and_exchange_val_16_int(mem, newval, oldval, model) \ + ({ \ + typeof (*mem) __oldval = (oldval); \ + __atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0, \ + model, __ATOMIC_RELAXED); \ + __oldval; \ + }) + +# define __arch_compare_and_exchange_val_32_int(mem, newval, oldval, model) \ + ({ \ + typeof (*mem) __oldval = (oldval); \ + __atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0, \ + model, __ATOMIC_RELAXED); \ + __oldval; \ + }) + +# define __arch_compare_and_exchange_val_64_int(mem, newval, oldval, model) \ + ({ \ + typeof (*mem) __oldval = (oldval); \ + __atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0, \ + model, __ATOMIC_RELAXED); \ + __oldval; \ + }) + + +/* Compare and exchange with "acquire" semantics, ie barrier after. */ + +# define atomic_compare_and_exchange_bool_acq(mem, new, old) \ + __atomic_bool_bysize (__arch_compare_and_exchange_bool, int, \ + mem, new, old, __ATOMIC_ACQUIRE) + +# define atomic_compare_and_exchange_val_acq(mem, new, old) \ + __atomic_val_bysize (__arch_compare_and_exchange_val, int, \ + mem, new, old, __ATOMIC_ACQUIRE) + +/* Compare and exchange with "release" semantics, ie barrier before. */ + +# define atomic_compare_and_exchange_val_rel(mem, new, old) \ + __atomic_val_bysize (__arch_compare_and_exchange_val, int, \ + mem, new, old, __ATOMIC_RELEASE) + + +/* Atomic exchange (without compare). */ + +# define __arch_exchange_8_int(mem, newval, model) \ + __atomic_exchange_n (mem, newval, model) + +# define __arch_exchange_16_int(mem, newval, model) \ + __atomic_exchange_n (mem, newval, model) + +# define __arch_exchange_32_int(mem, newval, model) \ + __atomic_exchange_n (mem, newval, model) + +# define __arch_exchange_64_int(mem, newval, model) \ + __atomic_exchange_n (mem, newval, model) + +# define atomic_exchange_acq(mem, value) \ + __atomic_val_bysize (__arch_exchange, int, mem, value, __ATOMIC_ACQUIRE) + +# define atomic_exchange_rel(mem, value) \ + __atomic_val_bysize (__arch_exchange, int, mem, value, __ATOMIC_RELEASE) + + +/* Atomically add value and return the previous (unincremented) value. */ + +# define __arch_exchange_and_add_8_int(mem, value, model) \ + __atomic_fetch_add (mem, value, model) + +# define __arch_exchange_and_add_16_int(mem, value, model) \ + __atomic_fetch_add (mem, value, model) + +# define __arch_exchange_and_add_32_int(mem, value, model) \ + __atomic_fetch_add (mem, value, model) + +# define __arch_exchange_and_add_64_int(mem, value, model) \ + __atomic_fetch_add (mem, value, model) + +# define atomic_exchange_and_add_acq(mem, value) \ + __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, \ + __ATOMIC_ACQUIRE) + +# define atomic_exchange_and_add_rel(mem, value) \ + __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, \ + __ATOMIC_RELEASE) + +/* Barrier macro. */ +#define atomic_full_barrier() __sync_synchronize() + +#endif diff --git a/REORG.TODO/sysdeps/aarch64/backtrace.c b/REORG.TODO/sysdeps/aarch64/backtrace.c new file mode 100644 index 0000000000..27ce597b39 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/backtrace.c @@ -0,0 +1 @@ +#include <sysdeps/x86_64/backtrace.c> diff --git a/REORG.TODO/sysdeps/aarch64/bits/endian.h b/REORG.TODO/sysdeps/aarch64/bits/endian.h new file mode 100644 index 0000000000..e86b988355 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/bits/endian.h @@ -0,0 +1,30 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _ENDIAN_H +# error "Never use <bits/endian.h> directly; include <endian.h> instead." +#endif + +/* AArch64 can be either big or little endian. */ +#ifdef __AARCH64EB__ +# define __BYTE_ORDER __BIG_ENDIAN +#else +# define __BYTE_ORDER __LITTLE_ENDIAN +#endif + +#define __FLOAT_WORD_ORDER __BYTE_ORDER diff --git a/REORG.TODO/sysdeps/aarch64/bits/fenv.h b/REORG.TODO/sysdeps/aarch64/bits/fenv.h new file mode 100644 index 0000000000..23177d7c60 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/bits/fenv.h @@ -0,0 +1,82 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_H +# error "Never use <bits/fenv.h> directly; include <fenv.h> instead." +#endif + +/* Define bits representing exceptions in the FPSR status word. */ +enum + { + FE_INVALID = +#define FE_INVALID 1 + FE_INVALID, + FE_DIVBYZERO = +#define FE_DIVBYZERO 2 + FE_DIVBYZERO, + FE_OVERFLOW = +#define FE_OVERFLOW 4 + FE_OVERFLOW, + FE_UNDERFLOW = +#define FE_UNDERFLOW 8 + FE_UNDERFLOW, + FE_INEXACT = +#define FE_INEXACT 16 + FE_INEXACT, + }; + +/* Amount to shift by to convert an exception bit in FPSR to a an + exception bit mask in FPCR. */ +#define FE_EXCEPT_SHIFT 8 + +/* All supported exceptions. */ +#define FE_ALL_EXCEPT \ + (FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INEXACT) + +/* Define bits representing rounding modes in the FPCR Rmode field. */ +#define FE_TONEAREST 0x000000 +#define FE_UPWARD 0x400000 +#define FE_DOWNWARD 0x800000 +#define FE_TOWARDZERO 0xc00000 + +/* Type representing exception flags. */ +typedef unsigned int fexcept_t; + +/* Type representing floating-point environment. */ +typedef struct + { + unsigned int __fpcr; + unsigned int __fpsr; + } +fenv_t; + +/* If the default argument is used we use this value. */ +#define FE_DFL_ENV ((const fenv_t *) -1l) + +#ifdef __USE_GNU +/* Floating-point environment where none of the exceptions are masked. */ +# define FE_NOMASK_ENV ((const fenv_t *) -2) +#endif + +#if __GLIBC_USE (IEC_60559_BFP_EXT) +/* Type representing floating-point control modes. */ +typedef unsigned int femode_t; + +/* Default floating-point control modes. */ +# define FE_DFL_MODE ((const femode_t *) -1L) +#endif diff --git a/REORG.TODO/sysdeps/aarch64/bits/fp-fast.h b/REORG.TODO/sysdeps/aarch64/bits/fp-fast.h new file mode 100644 index 0000000000..01a00c8e0c --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/bits/fp-fast.h @@ -0,0 +1,34 @@ +/* Define FP_FAST_* macros. AArch64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/fp-fast.h> directly; include <math.h> instead." +#endif + +#ifdef __USE_ISOC99 + +/* The GCC 4.6 compiler will define __FP_FAST_FMA{,F,L} if the fma{,f,l} + builtins are supported. */ +# define FP_FAST_FMA 1 +# define FP_FAST_FMAF 1 + +# ifdef __FP_FAST_FMAL +# define FP_FAST_FMAL 1 +# endif + +#endif diff --git a/REORG.TODO/sysdeps/aarch64/bits/link.h b/REORG.TODO/sysdeps/aarch64/bits/link.h new file mode 100644 index 0000000000..a8829df4a0 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/bits/link.h @@ -0,0 +1,60 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LINK_H +# error "Never include <bits/link.h> directly; use <link.h> instead." +#endif + +/* Registers for entry into PLT on AArch64. */ +typedef struct La_aarch64_regs +{ + uint64_t lr_xreg[8]; + uint64_t lr_dreg[8]; + uint64_t lr_sp; + uint64_t lr_lr; +} La_aarch64_regs; + +/* Return values for calls from PLT on AArch64. */ +typedef struct La_aarch64_retval +{ + /* Up to two integer registers can be used for a return value. */ + uint64_t lrv_xreg[2]; + /* Up to four D registers can be used for a return value. */ + uint64_t lrv_dreg[4]; + +} La_aarch64_retval; +__BEGIN_DECLS + +extern ElfW(Addr) +la_aarch64_gnu_pltenter (ElfW(Sym) *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_aarch64_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); + +extern unsigned int +la_aarch64_gnu_pltexit (ElfW(Sym) *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_aarch64_regs *__inregs, + La_aarch64_retval *__outregs, + const char *__symname); + +__END_DECLS diff --git a/REORG.TODO/sysdeps/aarch64/bits/setjmp.h b/REORG.TODO/sysdeps/aarch64/bits/setjmp.h new file mode 100644 index 0000000000..f8d15ad399 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/bits/setjmp.h @@ -0,0 +1,33 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_SETJMP_H +#define _BITS_SETJMP_H 1 + +#if !defined _SETJMP_H && !defined _PTHREAD_H +# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead." +#endif + +#ifndef _ASM +/* Jump buffer contains: + x19-x28, x29(fp), x30(lr), (x31)sp, d8-d15. Other registers are not + saved. */ +__extension__ typedef unsigned long long __jmp_buf [22]; + +#endif +#endif diff --git a/REORG.TODO/sysdeps/aarch64/bits/string.h b/REORG.TODO/sysdeps/aarch64/bits/string.h new file mode 100644 index 0000000000..295eeb7be9 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/bits/string.h @@ -0,0 +1,24 @@ +/* Optimized, inlined string functions. AArch64 version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _STRING_H +# error "Never use <bits/string.h> directly; include <string.h> instead." +#endif + +/* AArch64 uses the aligned string inline ABI. */ +#define _STRING_INLINE_unaligned 0 diff --git a/REORG.TODO/sysdeps/aarch64/bits/wordsize.h b/REORG.TODO/sysdeps/aarch64/bits/wordsize.h new file mode 100644 index 0000000000..ad53d699ca --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/bits/wordsize.h @@ -0,0 +1,28 @@ +/* Determine the wordsize from the preprocessor defines. + + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef __LP64__ +# define __WORDSIZE 64 +#else +# define __WORDSIZE 32 +# define __WORDSIZE32_SIZE_ULONG 1 +# define __WORDSIZE32_PTRDIFF_LONG 1 +#endif + +#define __WORDSIZE_TIME64_COMPAT32 0 diff --git a/REORG.TODO/sysdeps/aarch64/bsd-_setjmp.S b/REORG.TODO/sysdeps/aarch64/bsd-_setjmp.S new file mode 100644 index 0000000000..4e6a2da560 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/bsd-_setjmp.S @@ -0,0 +1 @@ +/* _setjmp is in setjmp.S */ diff --git a/REORG.TODO/sysdeps/aarch64/bsd-setjmp.S b/REORG.TODO/sysdeps/aarch64/bsd-setjmp.S new file mode 100644 index 0000000000..1da848d2f1 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/bsd-setjmp.S @@ -0,0 +1 @@ +/* setjmp is in setjmp.S */ diff --git a/REORG.TODO/sysdeps/aarch64/configure b/REORG.TODO/sysdeps/aarch64/configure new file mode 100644 index 0000000000..5bd355a691 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/configure @@ -0,0 +1,174 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/aarch64. + +# We check to see if the compiler and flags are +# selecting the big endian ABI and if they are then +# we set libc_cv_aarch64_be to yes which causes +# HAVE_AARCH64_BE to be defined in config.h and +# in include/libc-symbols.h and thus available to +# shlib-versions to select the appropriate name for +# the dynamic linker via %ifdef. + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for big endian" >&5 +$as_echo_n "checking for big endian... " >&6; } +if ${libc_cv_aarch64_be+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __AARCH64EB__ + yes + #endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then : + libc_cv_aarch64_be=yes +else + libc_cv_aarch64_be=no +fi +rm -f conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_be" >&5 +$as_echo "$libc_cv_aarch64_be" >&6; } +if test $libc_cv_aarch64_be = yes; then + $as_echo "#define HAVE_AARCH64_BE 1" >>confdefs.h + + config_vars="$config_vars +default-abi = lp64_be" +else + config_vars="$config_vars +default-abi = lp64" +fi diff --git a/REORG.TODO/sysdeps/aarch64/configure.ac b/REORG.TODO/sysdeps/aarch64/configure.ac new file mode 100644 index 0000000000..7851dd4dac --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/configure.ac @@ -0,0 +1,22 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/aarch64. + +# We check to see if the compiler and flags are +# selecting the big endian ABI and if they are then +# we set libc_cv_aarch64_be to yes which causes +# HAVE_AARCH64_BE to be defined in config.h and +# in include/libc-symbols.h and thus available to +# shlib-versions to select the appropriate name for +# the dynamic linker via %ifdef. +AC_CACHE_CHECK([for big endian], + [libc_cv_aarch64_be], + [AC_EGREP_CPP(yes,[#ifdef __AARCH64EB__ + yes + #endif + ], libc_cv_aarch64_be=yes, libc_cv_aarch64_be=no)]) +if test $libc_cv_aarch64_be = yes; then + AC_DEFINE(HAVE_AARCH64_BE) + LIBC_CONFIG_VAR([default-abi], [lp64_be]) +else + LIBC_CONFIG_VAR([default-abi], [lp64]) +fi diff --git a/REORG.TODO/sysdeps/aarch64/crti.S b/REORG.TODO/sysdeps/aarch64/crti.S new file mode 100644 index 0000000000..db91192d81 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/crti.S @@ -0,0 +1,91 @@ +/* Special .init and .fini section support for AArch64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crti.S puts a function prologue at the beginning of the .init and + .fini sections and defines global symbols for those addresses, so + they can be called as functions. The symbols _init and _fini are + magic and cause the linker to emit DT_INIT and DT_FINI. */ + +#include <sysdep.h> +#include <libc-symbols.h> + +#ifndef PREINIT_FUNCTION +# define PREINIT_FUNCTION __gmon_start__ +#endif + +#ifndef PREINIT_FUNCTION_WEAK +# define PREINIT_FUNCTION_WEAK 1 +#endif + +#if PREINIT_FUNCTION_WEAK + weak_extern (PREINIT_FUNCTION) +#else + .hidden PREINIT_FUNCTION +#endif + +#if PREINIT_FUNCTION_WEAK + .align 2 + .type call_weak_fn, %function +call_weak_fn: + adrp x0, :got:PREINIT_FUNCTION + ldr PTR_REG (0), [x0, #:got_lo12:PREINIT_FUNCTION] + cbz x0, 1f + b PREINIT_FUNCTION +1: + RET + .size call_weak_fn, .-call_weak_fn +#endif + + .section .init,"ax",%progbits + .align 2 + .global _init + .type _init, %function +_init: + stp x29, x30, [sp, -16]! + mov x29, sp +#if PREINIT_FUNCTION_WEAK + bl call_weak_fn +#else + bl PREINIT_FUNCTION +#endif + + .section .fini,"ax",%progbits + .align 2 + .global _fini + .type _fini, %function +_fini: + stp x29, x30, [sp, -16]! + mov x29, sp diff --git a/REORG.TODO/sysdeps/aarch64/crtn.S b/REORG.TODO/sysdeps/aarch64/crtn.S new file mode 100644 index 0000000000..0549157e14 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/crtn.S @@ -0,0 +1,46 @@ +/* Special .init and .fini section support for AArch64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crtn.S puts function epilogues in the .init and .fini sections + corresponding to the prologues in crti.S. */ + + .section .init,"ax",%progbits + ldp x29, x30, [sp], 16 + RET + + .section .fini,"ax",%progbits + ldp x29, x30, [sp], 16 + RET diff --git a/REORG.TODO/sysdeps/aarch64/dl-irel.h b/REORG.TODO/sysdeps/aarch64/dl-irel.h new file mode 100644 index 0000000000..4a8027510b --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/dl-irel.h @@ -0,0 +1,53 @@ +/* Machine-dependent ELF indirect relocation inline functions. + AArch64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_IREL_H +#define _DL_IREL_H + +#include <stdio.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <sysdep.h> + +#define ELF_MACHINE_IRELA 1 + +static inline ElfW(Addr) +__attribute ((always_inline)) +elf_ifunc_invoke (ElfW(Addr) addr) +{ + return ((ElfW(Addr) (*) (unsigned long int)) (addr)) (GLRO(dl_hwcap)); +} + +static inline void +__attribute ((always_inline)) +elf_irela (const ElfW(Rela) *reloc) +{ + ElfW(Addr) *const reloc_addr = (void *) reloc->r_offset; + const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info); + + if (__glibc_likely (r_type == AARCH64_R(IRELATIVE))) + { + ElfW(Addr) value = elf_ifunc_invoke (reloc->r_addend); + *reloc_addr = value; + } + else + __libc_fatal ("unexpected reloc type in static binary"); +} + +#endif diff --git a/REORG.TODO/sysdeps/aarch64/dl-link.sym b/REORG.TODO/sysdeps/aarch64/dl-link.sym new file mode 100644 index 0000000000..d67d28b40c --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/dl-link.sym @@ -0,0 +1,15 @@ +#include <stddef.h> +#include <sysdep.h> +#include <link.h> +#include <dl-tlsdesc.h> + +DL_SIZEOF_RG sizeof(struct La_aarch64_regs) +DL_SIZEOF_RV sizeof(struct La_aarch64_retval) + +DL_OFFSET_RG_X0 offsetof(struct La_aarch64_regs, lr_xreg) +DL_OFFSET_RG_D0 offsetof(struct La_aarch64_regs, lr_dreg) +DL_OFFSET_RG_SP offsetof(struct La_aarch64_regs, lr_sp) +DL_OFFSET_RG_LR offsetof(struct La_aarch64_regs, lr_lr) + +DL_OFFSET_RV_X0 offsetof(struct La_aarch64_retval, lrv_xreg) +DL_OFFSET_RV_D0 offsetof(struct La_aarch64_retval, lrv_dreg) diff --git a/REORG.TODO/sysdeps/aarch64/dl-machine.h b/REORG.TODO/sysdeps/aarch64/dl-machine.h new file mode 100644 index 0000000000..15d79a6961 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/dl-machine.h @@ -0,0 +1,447 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef dl_machine_h +#define dl_machine_h + +#define ELF_MACHINE_NAME "aarch64" + +#include <sysdep.h> +#include <tls.h> +#include <dl-tlsdesc.h> +#include <dl-irel.h> +#include <cpu-features.c> + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int __attribute__ ((unused)) +elf_machine_matches_host (const ElfW(Ehdr) *ehdr) +{ + return ehdr->e_machine == EM_AARCH64; +} + +/* Return the link-time address of _DYNAMIC. Conveniently, this is the + first element of the GOT. */ +static inline ElfW(Addr) __attribute__ ((unused)) +elf_machine_dynamic (void) +{ + extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden; + return _GLOBAL_OFFSET_TABLE_[0]; +} + +/* Return the run-time load address of the shared object. */ + +static inline ElfW(Addr) __attribute__ ((unused)) +elf_machine_load_address (void) +{ + /* To figure out the load address we use the definition that for any symbol: + dynamic_addr(symbol) = static_addr(symbol) + load_addr + + The choice of symbol is arbitrary. The static address we obtain + by constructing a non GOT reference to the symbol, the dynamic + address of the symbol we compute using adrp/add to compute the + symbol's address relative to the PC. + This depends on 32/16bit relocations being resolved at link time + and that the static address fits in the 32/16 bits. */ + + ElfW(Addr) static_addr; + ElfW(Addr) dynamic_addr; + + asm (" \n" +" adrp %1, _dl_start; \n" +#ifdef __LP64__ +" add %1, %1, #:lo12:_dl_start \n" +#else +" add %w1, %w1, #:lo12:_dl_start \n" +#endif +" ldr %w0, 1f \n" +" b 2f \n" +"1: \n" +#ifdef __LP64__ +" .word _dl_start \n" +#else +# ifdef __AARCH64EB__ +" .short 0 \n" +# endif +" .short _dl_start \n" +# ifndef __AARCH64EB__ +" .short 0 \n" +# endif +#endif +"2: \n" + : "=r" (static_addr), "=r" (dynamic_addr)); + return dynamic_addr - static_addr; +} + +/* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + +static inline int __attribute__ ((unused)) +elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) +{ + if (l->l_info[DT_JMPREL] && lazy) + { + ElfW(Addr) *got; + extern void _dl_runtime_resolve (ElfW(Word)); + extern void _dl_runtime_profile (ElfW(Word)); + + got = (ElfW(Addr) *) D_PTR (l, l_info[DT_PLTGOT]); + if (got[1]) + { + l->l_mach.plt = got[1] + l->l_addr; + } + got[1] = (ElfW(Addr)) l; + + /* The got[2] entry contains the address of a function which gets + called to get the address of a so far unresolved function and + jump to it. The profiling extension of the dynamic linker allows + to intercept the calls to collect information. In this case we + don't store the address in the GOT so that all future calls also + end in this function. */ + if ( profile) + { + got[2] = (ElfW(Addr)) &_dl_runtime_profile; + + if (GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), l)) + /* Say that we really want profiling and the timers are + started. */ + GL(dl_profile_map) = l; + } + else + { + /* This function will get called to fix up the GOT entry + indicated by the offset on the stack, and then jump to + the resolved address. */ + got[2] = (ElfW(Addr)) &_dl_runtime_resolve; + } + } + + if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy) + *(ElfW(Addr)*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr) + = (ElfW(Addr)) &_dl_tlsdesc_resolve_rela; + + return lazy; +} + +/* Initial entry point for the dynamic linker. The C function + _dl_start is the real entry point, its return value is the user + program's entry point */ +#ifdef __LP64__ +# define RTLD_START RTLD_START_1 ("x", "3", "sp") +#else +# define RTLD_START RTLD_START_1 ("w", "2", "wsp") +#endif + + +#define RTLD_START_1(PTR, PTR_SIZE_LOG, PTR_SP) asm ("\ +.text \n\ +.globl _start \n\ +.type _start, %function \n\ +.globl _dl_start_user \n\ +.type _dl_start_user, %function \n\ +_start: \n\ + mov " PTR "0, " PTR_SP " \n\ + bl _dl_start \n\ + // returns user entry point in x0 \n\ + mov x21, x0 \n\ +_dl_start_user: \n\ + // get the original arg count \n\ + ldr " PTR "1, [sp] \n\ + // get the argv address \n\ + add " PTR "2, " PTR_SP ", #(1<<" PTR_SIZE_LOG ") \n\ + // get _dl_skip_args to see if we were \n\ + // invoked as an executable \n\ + adrp x4, _dl_skip_args \n\ + ldr w4, [x4, #:lo12:_dl_skip_args] \n\ + // do we need to adjust argc/argv \n\ + cmp w4, 0 \n\ + beq .L_done_stack_adjust \n\ + // subtract _dl_skip_args from original arg count \n\ + sub " PTR "1, " PTR "1, " PTR "4 \n\ + // store adjusted argc back to stack \n\ + str " PTR "1, [sp] \n\ + // find the first unskipped argument \n\ + mov " PTR "3, " PTR "2 \n\ + add " PTR "4, " PTR "2, " PTR "4, lsl #" PTR_SIZE_LOG " \n\ + // shuffle argv down \n\ +1: ldr " PTR "5, [x4], #(1<<" PTR_SIZE_LOG ") \n\ + str " PTR "5, [x3], #(1<<" PTR_SIZE_LOG ") \n\ + cmp " PTR "5, #0 \n\ + bne 1b \n\ + // shuffle envp down \n\ +1: ldr " PTR "5, [x4], #(1<<" PTR_SIZE_LOG ") \n\ + str " PTR "5, [x3], #(1<<" PTR_SIZE_LOG ") \n\ + cmp " PTR "5, #0 \n\ + bne 1b \n\ + // shuffle auxv down \n\ +1: ldp " PTR "0, " PTR "5, [x4, #(2<<" PTR_SIZE_LOG ")]! \n\ + stp " PTR "0, " PTR "5, [x3], #(2<<" PTR_SIZE_LOG ") \n\ + cmp " PTR "0, #0 \n\ + bne 1b \n\ + // Update _dl_argv \n\ + adrp x3, _dl_argv \n\ + str " PTR "2, [x3, #:lo12:_dl_argv] \n\ +.L_done_stack_adjust: \n\ + // compute envp \n\ + add " PTR "3, " PTR "2, " PTR "1, lsl #" PTR_SIZE_LOG " \n\ + add " PTR "3, " PTR "3, #(1<<" PTR_SIZE_LOG ") \n\ + adrp x16, _rtld_local \n\ + add " PTR "16, " PTR "16, #:lo12:_rtld_local \n\ + ldr " PTR "0, [x16] \n\ + bl _dl_init \n\ + // load the finalizer function \n\ + adrp x0, _dl_fini \n\ + add " PTR "0, " PTR "0, #:lo12:_dl_fini \n\ + // jump to the user_s entry point \n\ + br x21 \n\ +"); + +#define elf_machine_type_class(type) \ + ((((type) == AARCH64_R(JUMP_SLOT) \ + || (type) == AARCH64_R(TLS_DTPMOD) \ + || (type) == AARCH64_R(TLS_DTPREL) \ + || (type) == AARCH64_R(TLS_TPREL) \ + || (type) == AARCH64_R(TLSDESC)) * ELF_RTYPE_CLASS_PLT) \ + | (((type) == AARCH64_R(COPY)) * ELF_RTYPE_CLASS_COPY) \ + | (((type) == AARCH64_R(GLOB_DAT)) * ELF_RTYPE_CLASS_EXTERN_PROTECTED_DATA)) + +#define ELF_MACHINE_JMP_SLOT AARCH64_R(JUMP_SLOT) + +/* AArch64 uses RELA not REL */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +#define DL_PLATFORM_INIT dl_platform_init () + +static inline void __attribute__ ((unused)) +dl_platform_init (void) +{ + if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') + /* Avoid an empty string which would disturb us. */ + GLRO(dl_platform) = NULL; + +#ifdef SHARED + /* init_cpu_features has been called early from __libc_start_main in + static executable. */ + init_cpu_features (&GLRO(dl_aarch64_cpu_features)); +#endif +} + + +static inline ElfW(Addr) +elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const ElfW(Rela) *reloc, + ElfW(Addr) *reloc_addr, + ElfW(Addr) value) +{ + return *reloc_addr = value; +} + +/* Return the final value of a plt relocation. */ +static inline ElfW(Addr) +elf_machine_plt_value (struct link_map *map, + const ElfW(Rela) *reloc, + ElfW(Addr) value) +{ + return value; +} + +#endif + +/* Names of the architecture-specific auditing callback functions. */ +#define ARCH_LA_PLTENTER aarch64_gnu_pltenter +#define ARCH_LA_PLTEXIT aarch64_gnu_pltexit + +#ifdef RESOLVE_MAP + +auto inline void +__attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + const ElfW(Sym) *sym, const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) +{ + ElfW(Addr) *const reloc_addr = reloc_addr_arg; + const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info); + + if (__builtin_expect (r_type == AARCH64_R(RELATIVE), 0)) + *reloc_addr = map->l_addr + reloc->r_addend; + else if (__builtin_expect (r_type == R_AARCH64_NONE, 0)) + return; + else + { + const ElfW(Sym) *const refsym = sym; + struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); + ElfW(Addr) value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value; + + if (sym != NULL + && __glibc_unlikely (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC) + && __glibc_likely (sym->st_shndx != SHN_UNDEF) + && __glibc_likely (!skip_ifunc)) + value = elf_ifunc_invoke (value); + + switch (r_type) + { + case AARCH64_R(COPY): + if (sym == NULL) + break; + + if (sym->st_size > refsym->st_size + || (GLRO(dl_verbose) && sym->st_size < refsym->st_size)) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + _dl_error_printf ("\ +%s: Symbol `%s' has different size in shared object, consider re-linking\n", + RTLD_PROGNAME, strtab + refsym->st_name); + } + memcpy (reloc_addr_arg, (void *) value, + MIN (sym->st_size, refsym->st_size)); + break; + + case AARCH64_R(RELATIVE): + case AARCH64_R(GLOB_DAT): + case AARCH64_R(JUMP_SLOT): + case AARCH64_R(ABS32): +#ifdef __LP64__ + case AARCH64_R(ABS64): +#endif + *reloc_addr = value + reloc->r_addend; + break; + + case AARCH64_R(TLSDESC): + { + struct tlsdesc volatile *td = + (struct tlsdesc volatile *)reloc_addr; +#ifndef RTLD_BOOTSTRAP + if (! sym) + { + td->arg = (void*)reloc->r_addend; + td->entry = _dl_tlsdesc_undefweak; + } + else +#endif + { +#ifndef RTLD_BOOTSTRAP +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (!TRY_STATIC_TLS (map, sym_map)) + { + td->arg = _dl_make_tlsdesc_dynamic + (sym_map, sym->st_value + reloc->r_addend); + td->entry = _dl_tlsdesc_dynamic; + } + else +# endif +#endif + { + td->arg = (void*)(sym->st_value + sym_map->l_tls_offset + + reloc->r_addend); + td->entry = _dl_tlsdesc_return; + } + } + break; + } + + case AARCH64_R(TLS_DTPMOD): +#ifdef RTLD_BOOTSTRAP + *reloc_addr = 1; +#else + if (sym_map != NULL) + { + *reloc_addr = sym_map->l_tls_modid; + } +#endif + break; + + case AARCH64_R(TLS_DTPREL): + if (sym) + *reloc_addr = sym->st_value + reloc->r_addend; + break; + + case AARCH64_R(TLS_TPREL): + if (sym) + { + CHECK_STATIC_TLS (map, sym_map); + *reloc_addr = + sym->st_value + reloc->r_addend + sym_map->l_tls_offset; + } + break; + + case AARCH64_R(IRELATIVE): + value = map->l_addr + reloc->r_addend; + value = elf_ifunc_invoke (value); + *reloc_addr = value; + break; + + default: + _dl_reloc_bad_type (map, r_type, 0); + break; + } + } +} + +inline void +__attribute__ ((always_inline)) +elf_machine_rela_relative (ElfW(Addr) l_addr, + const ElfW(Rela) *reloc, + void *const reloc_addr_arg) +{ + ElfW(Addr) *const reloc_addr = reloc_addr_arg; + *reloc_addr = l_addr + reloc->r_addend; +} + +inline void +__attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + ElfW(Addr) l_addr, + const ElfW(Rela) *reloc, + int skip_ifunc) +{ + ElfW(Addr) *const reloc_addr = (void *) (l_addr + reloc->r_offset); + const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info); + /* Check for unexpected PLT reloc type. */ + if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1)) + { + if (__builtin_expect (map->l_mach.plt, 0) == 0) + *reloc_addr += l_addr; + else + *reloc_addr = map->l_mach.plt; + } + else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1)) + { + struct tlsdesc volatile *td = + (struct tlsdesc volatile *)reloc_addr; + + td->arg = (void*)reloc; + td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + + map->l_addr); + } + else if (__glibc_unlikely (r_type == AARCH64_R(IRELATIVE))) + { + ElfW(Addr) value = map->l_addr + reloc->r_addend; + if (__glibc_likely (!skip_ifunc)) + value = elf_ifunc_invoke (value); + *reloc_addr = value; + } + else + _dl_reloc_bad_type (map, r_type, 1); +} + +#endif diff --git a/REORG.TODO/sysdeps/aarch64/dl-sysdep.h b/REORG.TODO/sysdeps/aarch64/dl-sysdep.h new file mode 100644 index 0000000000..0b510a69d1 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/dl-sysdep.h @@ -0,0 +1,25 @@ +/* Copyright (C) 2002-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include_next <dl-sysdep.h> + +/* _dl_argv cannot be attribute_relro, because _dl_start_user + might write into it after _dl_start returns. */ +#define DL_ARGV_NOT_RELRO 1 + +#define DL_EXTERN_PROTECTED_DATA diff --git a/REORG.TODO/sysdeps/aarch64/dl-tls.h b/REORG.TODO/sysdeps/aarch64/dl-tls.h new file mode 100644 index 0000000000..02068e1410 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/dl-tls.h @@ -0,0 +1,27 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Type used for the representation of TLS information in the GOT. */ +typedef struct +{ + unsigned long int ti_module; + unsigned long int ti_offset; +} tls_index; + + +extern void *__tls_get_addr (tls_index *ti); diff --git a/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S b/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S new file mode 100644 index 0000000000..94679a04ca --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.S @@ -0,0 +1,373 @@ +/* Thread-local storage handling in the ELF dynamic linker. + AArch64 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <tls.h> +#include "tlsdesc.h" + +#define NSAVEDQREGPAIRS 16 +#define SAVE_Q_REGISTERS \ + stp q0, q1, [sp, #-32*NSAVEDQREGPAIRS]!; \ + cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS); \ + stp q2, q3, [sp, #32*1]; \ + stp q4, q5, [sp, #32*2]; \ + stp q6, q7, [sp, #32*3]; \ + stp q8, q9, [sp, #32*4]; \ + stp q10, q11, [sp, #32*5]; \ + stp q12, q13, [sp, #32*6]; \ + stp q14, q15, [sp, #32*7]; \ + stp q16, q17, [sp, #32*8]; \ + stp q18, q19, [sp, #32*9]; \ + stp q20, q21, [sp, #32*10]; \ + stp q22, q23, [sp, #32*11]; \ + stp q24, q25, [sp, #32*12]; \ + stp q26, q27, [sp, #32*13]; \ + stp q28, q29, [sp, #32*14]; \ + stp q30, q31, [sp, #32*15]; + +#define RESTORE_Q_REGISTERS \ + ldp q2, q3, [sp, #32*1]; \ + ldp q4, q5, [sp, #32*2]; \ + ldp q6, q7, [sp, #32*3]; \ + ldp q8, q9, [sp, #32*4]; \ + ldp q10, q11, [sp, #32*5]; \ + ldp q12, q13, [sp, #32*6]; \ + ldp q14, q15, [sp, #32*7]; \ + ldp q16, q17, [sp, #32*8]; \ + ldp q18, q19, [sp, #32*9]; \ + ldp q20, q21, [sp, #32*10]; \ + ldp q22, q23, [sp, #32*11]; \ + ldp q24, q25, [sp, #32*12]; \ + ldp q26, q27, [sp, #32*13]; \ + ldp q28, q29, [sp, #32*14]; \ + ldp q30, q31, [sp, #32*15]; \ + ldp q0, q1, [sp], #32*NSAVEDQREGPAIRS; \ + cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS); + + .text + + /* Compute the thread pointer offset for symbols in the static + TLS block. The offset is the same for all threads. + Prototype: + _dl_tlsdesc_return (tlsdesc *) ; + */ + .hidden _dl_tlsdesc_return + .global _dl_tlsdesc_return + .type _dl_tlsdesc_return,%function + cfi_startproc + .align 2 +_dl_tlsdesc_return: + DELOUSE (0) + ldr PTR_REG (0), [x0, #PTR_SIZE] + RET + cfi_endproc + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return + + /* Same as _dl_tlsdesc_return but with synchronization for + lazy relocation. + Prototype: + _dl_tlsdesc_return_lazy (tlsdesc *) ; + */ + .hidden _dl_tlsdesc_return_lazy + .global _dl_tlsdesc_return_lazy + .type _dl_tlsdesc_return_lazy,%function + cfi_startproc + .align 2 +_dl_tlsdesc_return_lazy: + /* The ldar here happens after the load from [x0] at the call site + (that is generated by the compiler as part of the TLS access ABI), + so it reads the same value (this function is the final value of + td->entry) and thus it synchronizes with the release store to + td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load + from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */ + DELOUSE (0) + ldar PTR_REG (zr), [x0] + ldr PTR_REG (0), [x0, #PTR_SIZE] + RET + cfi_endproc + .size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy + + /* Handler for undefined weak TLS symbols. + Prototype: + _dl_tlsdesc_undefweak (tlsdesc *); + + The second word of the descriptor contains the addend. + Return the addend minus the thread pointer. This ensures + that when the caller adds on the thread pointer it gets back + the addend. */ + + .hidden _dl_tlsdesc_undefweak + .global _dl_tlsdesc_undefweak + .type _dl_tlsdesc_undefweak,%function + cfi_startproc + .align 2 +_dl_tlsdesc_undefweak: + str x1, [sp, #-16]! + cfi_adjust_cfa_offset (16) + /* The ldar here happens after the load from [x0] at the call site + (that is generated by the compiler as part of the TLS access ABI), + so it reads the same value (this function is the final value of + td->entry) and thus it synchronizes with the release store to + td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load + from [x0,#8] here happens after the initialization of td->arg. */ + DELOUSE (0) + ldar PTR_REG (zr), [x0] + ldr PTR_REG (0), [x0, #PTR_SIZE] + mrs x1, tpidr_el0 + sub PTR_REG (0), PTR_REG (0), PTR_REG (1) + ldr x1, [sp], #16 + cfi_adjust_cfa_offset (-16) + RET + cfi_endproc + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak + +#ifdef SHARED + /* Handler for dynamic TLS symbols. + Prototype: + _dl_tlsdesc_dynamic (tlsdesc *) ; + + The second word of the descriptor points to a + tlsdesc_dynamic_arg structure. + + Returns the offset between the thread pointer and the + object referenced by the argument. + + ptrdiff_t + __attribute__ ((__regparm__ (1))) + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) + { + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); + if (__builtin_expect (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + + td->tlsinfo.ti_offset + - __thread_pointer; + + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; + } + */ + + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,%function + cfi_startproc + .align 2 +_dl_tlsdesc_dynamic: +# define NSAVEXREGPAIRS 2 + stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]! + cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) + mov x29, sp + DELOUSE (0) + + /* Save just enough registers to support fast path, if we fall + into slow path we will save additional registers. */ + + stp x1, x2, [sp, #32+16*0] + stp x3, x4, [sp, #32+16*1] + + mrs x4, tpidr_el0 + /* The ldar here happens after the load from [x0] at the call site + (that is generated by the compiler as part of the TLS access ABI), + so it reads the same value (this function is the final value of + td->entry) and thus it synchronizes with the release store to + td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load + from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */ + ldar PTR_REG (zr), [x0] + ldr PTR_REG (1), [x0,#TLSDESC_ARG] + ldr PTR_REG (0), [x4,#TCBHEAD_DTV] + ldr PTR_REG (3), [x1,#TLSDESC_GEN_COUNT] + ldr PTR_REG (2), [x0,#DTV_COUNTER] + cmp PTR_REG (3), PTR_REG (2) + b.hi 2f + ldr PTR_REG (2), [x1,#TLSDESC_MODID] + add PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1) + ldr PTR_REG (0), [x0] /* Load val member of DTV entry. */ + cmp x0, #TLS_DTV_UNALLOCATED + b.eq 2f + ldr PTR_REG (1), [x1,#TLSDESC_MODOFF] + add PTR_REG (0), PTR_REG (0), PTR_REG (1) + sub PTR_REG (0), PTR_REG (0), PTR_REG (4) +1: + ldp x1, x2, [sp, #32+16*0] + ldp x3, x4, [sp, #32+16*1] + + ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) + cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) +# undef NSAVEXREGPAIRS + RET +2: + /* This is the slow path. We need to call __tls_get_addr() which + means we need to save and restore all the register that the + callee will trash. */ + + /* Save the remaining registers that we must treat as caller save. */ +# define NSAVEXREGPAIRS 7 + stp x5, x6, [sp, #-16*NSAVEXREGPAIRS]! + cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS) + stp x7, x8, [sp, #16*1] + stp x9, x10, [sp, #16*2] + stp x11, x12, [sp, #16*3] + stp x13, x14, [sp, #16*4] + stp x15, x16, [sp, #16*5] + stp x17, x18, [sp, #16*6] + + SAVE_Q_REGISTERS + + mov x0, x1 + bl __tls_get_addr + + mrs x1, tpidr_el0 + sub PTR_REG (0), PTR_REG (0), PTR_REG (1) + + RESTORE_Q_REGISTERS + + ldp x7, x8, [sp, #16*1] + ldp x9, x10, [sp, #16*2] + ldp x11, x12, [sp, #16*3] + ldp x13, x14, [sp, #16*4] + ldp x15, x16, [sp, #16*5] + ldp x17, x18, [sp, #16*6] + ldp x5, x6, [sp], #16*NSAVEXREGPAIRS + cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS) + b 1b + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic +# undef NSAVEXREGPAIRS +#endif + + /* This function is a wrapper for a lazy resolver for TLS_DESC + RELA relocations. + When the actual resolver returns, it will have adjusted the + TLS descriptor such that we can tail-call it for it to return + the TP offset of the symbol. */ + + .hidden _dl_tlsdesc_resolve_rela + .global _dl_tlsdesc_resolve_rela + .type _dl_tlsdesc_resolve_rela,%function + cfi_startproc + .align 2 +_dl_tlsdesc_resolve_rela: +#define NSAVEXREGPAIRS 9 + stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]! + cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) + mov x29, sp + stp x1, x4, [sp, #32+16*0] + stp x5, x6, [sp, #32+16*1] + stp x7, x8, [sp, #32+16*2] + stp x9, x10, [sp, #32+16*3] + stp x11, x12, [sp, #32+16*4] + stp x13, x14, [sp, #32+16*5] + stp x15, x16, [sp, #32+16*6] + stp x17, x18, [sp, #32+16*7] + str x0, [sp, #32+16*8] + + SAVE_Q_REGISTERS + + DELOUSE (3) + ldr PTR_REG (1), [x3, #PTR_SIZE] + bl _dl_tlsdesc_resolve_rela_fixup + + RESTORE_Q_REGISTERS + + ldr x0, [sp, #32+16*8] + DELOUSE (0) + ldr PTR_REG (1), [x0] + blr x1 + + ldp x1, x4, [sp, #32+16*0] + ldp x5, x6, [sp, #32+16*1] + ldp x7, x8, [sp, #32+16*2] + ldp x9, x10, [sp, #32+16*3] + ldp x11, x12, [sp, #32+16*4] + ldp x13, x14, [sp, #32+16*5] + ldp x15, x16, [sp, #32+16*6] + ldp x17, x18, [sp, #32+16*7] + ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) + cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) + ldp x2, x3, [sp], #16 + cfi_adjust_cfa_offset (-16) + RET +#undef NSAVEXREGPAIRS + cfi_endproc + .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela + + /* This function is a placeholder for lazy resolving of TLS + relocations. Once some thread starts resolving a TLS + relocation, it sets up the TLS descriptor to use this + resolver, such that other threads that would attempt to + resolve it concurrently may skip the call to the original lazy + resolver and go straight to a condition wait. + + When the actual resolver returns, it will have adjusted the + TLS descriptor such that we can tail-call it for it to return + the TP offset of the symbol. */ + + .hidden _dl_tlsdesc_resolve_hold + .global _dl_tlsdesc_resolve_hold + .type _dl_tlsdesc_resolve_hold,%function + cfi_startproc + .align 2 +_dl_tlsdesc_resolve_hold: +#define NSAVEXREGPAIRS 10 +1: + stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]! + cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) + mov x29, sp + stp x1, x2, [sp, #32+16*0] + stp x3, x4, [sp, #32+16*1] + stp x5, x6, [sp, #32+16*2] + stp x7, x8, [sp, #32+16*3] + stp x9, x10, [sp, #32+16*4] + stp x11, x12, [sp, #32+16*5] + stp x13, x14, [sp, #32+16*6] + stp x15, x16, [sp, #32+16*7] + stp x17, x18, [sp, #32+16*8] + str x0, [sp, #32+16*9] + + SAVE_Q_REGISTERS + + adr x1, 1b + bl _dl_tlsdesc_resolve_hold_fixup + + RESTORE_Q_REGISTERS + + ldr x0, [sp, #32+16*9] + DELOUSE (0) + ldr PTR_REG (1), [x0] + blr x1 + + ldp x1, x2, [sp, #32+16*0] + ldp x3, x4, [sp, #32+16*1] + ldp x5, x6, [sp, #32+16*2] + ldp x7, x8, [sp, #32+16*3] + ldp x9, x10, [sp, #32+16*4] + ldp x11, x12, [sp, #32+16*5] + ldp x13, x14, [sp, #32+16*6] + ldp x15, x16, [sp, #32+16*7] + ldp x17, x18, [sp, #32+16*8] + ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) + cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) + RET + cfi_endproc + .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold +#undef NSAVEXREGPAIRS diff --git a/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.h b/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.h new file mode 100644 index 0000000000..127d1258d3 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/dl-tlsdesc.h @@ -0,0 +1,68 @@ +/* Thread-local storage descriptor handling in the ELF dynamic linker. + AArch64 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _AARCH64_DL_TLSDESC_H +#define _AARCH64_DL_TLSDESC_H 1 + +/* Type used to represent a TLS descriptor in the GOT. */ +struct tlsdesc +{ + ptrdiff_t (*entry) (struct tlsdesc *); + void *arg; +}; + +typedef struct dl_tls_index +{ + unsigned long int ti_module; + unsigned long int ti_offset; +} tls_index; + +/* Type used as the argument in a TLS descriptor for a symbol that + needs dynamic TLS offsets. */ +struct tlsdesc_dynamic_arg +{ + tls_index tlsinfo; + size_t gen_count; +}; + +extern ptrdiff_t attribute_hidden +_dl_tlsdesc_return (struct tlsdesc *); + +extern ptrdiff_t attribute_hidden +_dl_tlsdesc_return_lazy (struct tlsdesc *); + +extern ptrdiff_t attribute_hidden +_dl_tlsdesc_undefweak (struct tlsdesc *); + +extern ptrdiff_t attribute_hidden +_dl_tlsdesc_resolve_rela (struct tlsdesc *); + +extern ptrdiff_t attribute_hidden +_dl_tlsdesc_resolve_hold (struct tlsdesc *); + +# ifdef SHARED +extern void *internal_function _dl_make_tlsdesc_dynamic (struct link_map *, + size_t); + +extern ptrdiff_t attribute_hidden +_dl_tlsdesc_dynamic (struct tlsdesc *); +#endif + +#endif diff --git a/REORG.TODO/sysdeps/aarch64/dl-trampoline.S b/REORG.TODO/sysdeps/aarch64/dl-trampoline.S new file mode 100644 index 0000000000..a2a0b7d4b1 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/dl-trampoline.S @@ -0,0 +1,300 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <libc-symbols.h> + +#include "dl-link.h" + +#define ip0 x16 +#define ip0l PTR_REG (16) +#define ip1 x17 +#define lr x30 + +/* RELA relocatons are 3 pointers */ +#define RELA_SIZE (PTR_SIZE * 3) + + .text + .globl _dl_runtime_resolve + .type _dl_runtime_resolve, #function + cfi_startproc + .align 2 +_dl_runtime_resolve: + /* AArch64 we get called with: + ip0 &PLTGOT[2] + ip1 temp(dl resolver entry point) + [sp, #8] lr + [sp, #0] &PLTGOT[n] + */ + + cfi_rel_offset (lr, 8) + + /* Save arguments. */ + stp x8, x9, [sp, #-(80+8*16)]! + cfi_adjust_cfa_offset (80+8*16) + cfi_rel_offset (x8, 0) + cfi_rel_offset (x9, 8) + + stp x6, x7, [sp, #16] + cfi_rel_offset (x6, 16) + cfi_rel_offset (x7, 24) + + stp x4, x5, [sp, #32] + cfi_rel_offset (x4, 32) + cfi_rel_offset (x5, 40) + + stp x2, x3, [sp, #48] + cfi_rel_offset (x2, 48) + cfi_rel_offset (x3, 56) + + stp x0, x1, [sp, #64] + cfi_rel_offset (x0, 64) + cfi_rel_offset (x1, 72) + + stp q0, q1, [sp, #(80+0*16)] + cfi_rel_offset (q0, 80+0*16) + cfi_rel_offset (q1, 80+1*16) + + stp q2, q3, [sp, #(80+2*16)] + cfi_rel_offset (q0, 80+2*16) + cfi_rel_offset (q1, 80+3*16) + + stp q4, q5, [sp, #(80+4*16)] + cfi_rel_offset (q0, 80+4*16) + cfi_rel_offset (q1, 80+5*16) + + stp q6, q7, [sp, #(80+6*16)] + cfi_rel_offset (q0, 80+6*16) + cfi_rel_offset (q1, 80+7*16) + + /* Get pointer to linker struct. */ + ldr PTR_REG (0), [ip0, #-PTR_SIZE] + + /* Prepare to call _dl_fixup(). */ + ldr x1, [sp, 80+8*16] /* Recover &PLTGOT[n] */ + + sub x1, x1, ip0 + add x1, x1, x1, lsl #1 + lsl x1, x1, #3 + sub x1, x1, #(RELA_SIZE<<3) + lsr x1, x1, #3 + + /* Call fixup routine. */ + bl _dl_fixup + + /* Save the return. */ + mov ip0, x0 + + /* Get arguments and return address back. */ + ldp q0, q1, [sp, #(80+0*16)] + ldp q2, q3, [sp, #(80+2*16)] + ldp q4, q5, [sp, #(80+4*16)] + ldp q6, q7, [sp, #(80+6*16)] + ldp x0, x1, [sp, #64] + ldp x2, x3, [sp, #48] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #16] + ldp x8, x9, [sp], #(80+8*16) + cfi_adjust_cfa_offset (-(80+8*16)) + + ldp ip1, lr, [sp], #16 + cfi_adjust_cfa_offset (-16) + + /* Jump to the newly found address. */ + br ip0 + + cfi_endproc + .size _dl_runtime_resolve, .-_dl_runtime_resolve +#ifndef PROF + .globl _dl_runtime_profile + .type _dl_runtime_profile, #function + cfi_startproc + .align 2 +_dl_runtime_profile: + /* AArch64 we get called with: + ip0 &PLTGOT[2] + ip1 temp(dl resolver entry point) + [sp, #8] lr + [sp, #0] &PLTGOT[n] + + Stack frame layout: + [sp, #...] lr + [sp, #...] &PLTGOT[n] + [sp, #96] La_aarch64_regs + [sp, #48] La_aarch64_retval + [sp, #40] frame size return from pltenter + [sp, #32] dl_profile_call saved x1 + [sp, #24] dl_profile_call saved x0 + [sp, #16] t1 + [sp, #0] x29, lr <- x29 + */ + +# define OFFSET_T1 16 +# define OFFSET_SAVED_CALL_X0 OFFSET_T1 + 8 +# define OFFSET_FS OFFSET_SAVED_CALL_X0 + 16 +# define OFFSET_RV OFFSET_FS + 8 +# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV + +# define SF_SIZE OFFSET_RG + DL_SIZEOF_RG + +# define OFFSET_PLTGOTN SF_SIZE +# define OFFSET_LR OFFSET_PLTGOTN + 8 + + /* Save arguments. */ + sub sp, sp, #SF_SIZE + cfi_adjust_cfa_offset (SF_SIZE) + stp x29, x30, [SP, #0] + mov x29, sp + cfi_def_cfa_register (x29) + cfi_rel_offset (x29, 0) + cfi_rel_offset (lr, 8) + + stp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] + cfi_rel_offset (x0, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 0) + cfi_rel_offset (x1, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 8) + stp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + cfi_rel_offset (x2, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 0) + cfi_rel_offset (x3, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 8) + stp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + cfi_rel_offset (x4, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 0) + cfi_rel_offset (x5, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 8) + stp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] + cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0) + cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8) + + stp d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] + cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0) + cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8) + stp d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1] + cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0) + cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8) + stp d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] + cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0) + cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8) + stp d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] + cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0) + cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8) + + add x0, x29, #SF_SIZE + 16 + ldr x1, [x29, #OFFSET_LR] + stp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_SP] + + /* Get pointer to linker struct. */ + ldr PTR_REG (0), [ip0, #-PTR_SIZE] + + /* Prepare to call _dl_profile_fixup(). */ + ldr x1, [x29, OFFSET_PLTGOTN] /* Recover &PLTGOT[n] */ + + sub x1, x1, ip0 + add x1, x1, x1, lsl #1 + lsl x1, x1, #3 + sub x1, x1, #(RELA_SIZE<<3) + lsr x1, x1, #3 + + stp x0, x1, [x29, #OFFSET_SAVED_CALL_X0] + + /* Set up extra args for _dl_profile_fixup */ + ldr x2, [x29, #OFFSET_LR] /* load saved LR */ + add x3, x29, #OFFSET_RG /* address of La_aarch64_reg */ + add x4, x29, #OFFSET_FS /* address of framesize */ + bl _dl_profile_fixup + + ldr ip0l, [x29, #OFFSET_FS] /* framesize == 0 */ + cmp ip0l, #0 + bge 1f + cfi_remember_state + + /* Save the return. */ + mov ip0, x0 + + /* Get arguments and return address back. */ + ldp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] + ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] + ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] + ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1] + ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] + ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] + + cfi_def_cfa_register (sp) + ldp x29, x30, [x29, #0] + cfi_restore(x29) + cfi_restore(x30) + + add sp, sp, SF_SIZE + 16 + cfi_adjust_cfa_offset (- SF_SIZE - 16) + + /* Jump to the newly found address. */ + br ip0 + + cfi_restore_state +1: + /* The new frame size is in ip0. */ + + sub PTR_REG (1), PTR_REG (29), ip0l + and sp, x1, #0xfffffffffffffff0 + + str x0, [x29, #OFFSET_T1] + + mov x0, sp + add x1, x29, #SF_SIZE + 16 + mov x2, ip0 + bl memcpy + + ldr ip0, [x29, #OFFSET_T1] + + /* Call the function. */ + ldp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] + ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] + ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] + ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1] + ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] + ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] + blr ip0 + stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] + stp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] + stp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1] + + /* Setup call to pltexit */ + ldp x0, x1, [x29, #OFFSET_SAVED_CALL_X0] + add x2, x29, #OFFSET_RG + add x3, x29, #OFFSET_RV + bl _dl_call_pltexit + + ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] + ldp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] + ldp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1] + /* LR from within La_aarch64_reg */ + ldr lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR] + cfi_restore(lr) + mov sp, x29 + cfi_def_cfa_register (sp) + ldr x29, [x29, #0] + cfi_restore(x29) + add sp, sp, SF_SIZE + 16 + cfi_adjust_cfa_offset (- SF_SIZE - 16) + + br lr + + cfi_endproc + .size _dl_runtime_profile, .-_dl_runtime_profile +#endif + .previous diff --git a/REORG.TODO/sysdeps/aarch64/fpu/e_sqrt.c b/REORG.TODO/sysdeps/aarch64/fpu/e_sqrt.c new file mode 100644 index 0000000000..f984d877b6 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/e_sqrt.c @@ -0,0 +1,28 @@ +/* Square root of floating point number. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math_private.h> + +double +__ieee754_sqrt (double d) +{ + double res; + asm ("fsqrt %d0, %d1" : "=w" (res) : "w" (d)); + return res; +} +strong_alias (__ieee754_sqrt, __sqrt_finite) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/e_sqrtf.c b/REORG.TODO/sysdeps/aarch64/fpu/e_sqrtf.c new file mode 100644 index 0000000000..67707ef833 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/e_sqrtf.c @@ -0,0 +1,28 @@ +/* Single-precision floating point square root. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math_private.h> + +float +__ieee754_sqrtf (float s) +{ + float res; + asm ("fsqrt %s0, %s1" : "=w" (res) : "w" (s)); + return res; +} +strong_alias (__ieee754_sqrtf, __sqrtf_finite) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fclrexcpt.c b/REORG.TODO/sysdeps/aarch64/fpu/fclrexcpt.c new file mode 100644 index 0000000000..254f6a6b54 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fclrexcpt.c @@ -0,0 +1,38 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +feclearexcept (int excepts) +{ + fpu_fpsr_t fpsr; + fpu_fpsr_t fpsr_new; + + excepts &= FE_ALL_EXCEPT; + + _FPU_GETFPSR (fpsr); + fpsr_new = fpsr & ~excepts; + + if (fpsr != fpsr_new) + _FPU_SETFPSR (fpsr_new); + + return 0; +} +libm_hidden_def (feclearexcept) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fedisblxcpt.c b/REORG.TODO/sysdeps/aarch64/fpu/fedisblxcpt.c new file mode 100644 index 0000000000..900da1f4ec --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fedisblxcpt.c @@ -0,0 +1,36 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +fedisableexcept (int excepts) +{ + fpu_control_t fpcr; + fpu_control_t fpcr_new; + + _FPU_GETCW (fpcr); + excepts &= FE_ALL_EXCEPT; + fpcr_new = fpcr & ~(excepts << FE_EXCEPT_SHIFT); + + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); + + return (fpcr >> FE_EXCEPT_SHIFT) & FE_ALL_EXCEPT; +} diff --git a/REORG.TODO/sysdeps/aarch64/fpu/feenablxcpt.c b/REORG.TODO/sysdeps/aarch64/fpu/feenablxcpt.c new file mode 100644 index 0000000000..029ef1db8b --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/feenablxcpt.c @@ -0,0 +1,47 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +feenableexcept (int excepts) +{ + fpu_control_t fpcr; + fpu_control_t fpcr_new; + fpu_control_t updated_fpcr; + + _FPU_GETCW (fpcr); + excepts &= FE_ALL_EXCEPT; + fpcr_new = fpcr | (excepts << FE_EXCEPT_SHIFT); + + if (fpcr != fpcr_new) + { + _FPU_SETCW (fpcr_new); + + /* Trapping exceptions are optional in AArch64; the relevant enable + bits in FPCR are RES0 hence the absence of support can be detected + by reading back the FPCR and comparing with the required value. */ + _FPU_GETCW (updated_fpcr); + + if (fpcr_new & ~updated_fpcr) + return -1; + } + + return (fpcr >> FE_EXCEPT_SHIFT) & FE_ALL_EXCEPT; +} diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fegetenv.c b/REORG.TODO/sysdeps/aarch64/fpu/fegetenv.c new file mode 100644 index 0000000000..ac277949d7 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fegetenv.c @@ -0,0 +1,35 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +__fegetenv (fenv_t *envp) +{ + fpu_control_t fpcr; + fpu_fpsr_t fpsr; + _FPU_GETCW (fpcr); + _FPU_GETFPSR (fpsr); + envp->__fpcr = fpcr; + envp->__fpsr = fpsr; + return 0; +} +libm_hidden_def (__fegetenv) +weak_alias (__fegetenv, fegetenv) +libm_hidden_weak (fegetenv) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fegetexcept.c b/REORG.TODO/sysdeps/aarch64/fpu/fegetexcept.c new file mode 100644 index 0000000000..fa44334644 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fegetexcept.c @@ -0,0 +1,28 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +fegetexcept (void) +{ + fpu_control_t fpcr; + _FPU_GETCW (fpcr); + return (fpcr >> FE_EXCEPT_SHIFT) & FE_ALL_EXCEPT; +} diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fegetmode.c b/REORG.TODO/sysdeps/aarch64/fpu/fegetmode.c new file mode 100644 index 0000000000..1898732184 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fegetmode.c @@ -0,0 +1,27 @@ +/* Store current floating-point control modes. AArch64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +fegetmode (femode_t *modep) +{ + _FPU_GETCW (*modep); + return 0; +} diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fegetround.c b/REORG.TODO/sysdeps/aarch64/fpu/fegetround.c new file mode 100644 index 0000000000..3d51d9b43e --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fegetround.c @@ -0,0 +1,29 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <get-rounding-mode.h> + +int +__fegetround (void) +{ + return get_rounding_mode (); +} +libm_hidden_def (__fegetround) +weak_alias (__fegetround, fegetround) +libm_hidden_weak (fegetround) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/feholdexcpt.c b/REORG.TODO/sysdeps/aarch64/fpu/feholdexcpt.c new file mode 100644 index 0000000000..df775ffc53 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/feholdexcpt.c @@ -0,0 +1,30 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <math_private.h> + +int +__feholdexcept (fenv_t *envp) +{ + libc_feholdexcept_aarch64 (envp); + return 0; +} +libm_hidden_def (__feholdexcept) +weak_alias (__feholdexcept, feholdexcept) +libm_hidden_weak (feholdexcept) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fesetenv.c b/REORG.TODO/sysdeps/aarch64/fpu/fesetenv.c new file mode 100644 index 0000000000..9875188735 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fesetenv.c @@ -0,0 +1,78 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +__fesetenv (const fenv_t *envp) +{ + fpu_control_t fpcr; + fpu_control_t fpcr_new; + fpu_control_t updated_fpcr; + fpu_fpsr_t fpsr; + fpu_fpsr_t fpsr_new; + + _FPU_GETCW (fpcr); + + if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV)) + { + /* The new FPCR/FPSR are valid, so don't merge the reserved flags. */ + fpcr_new = envp->__fpcr; + + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); + + _FPU_SETFPSR (envp->__fpsr); + return 0; + } + + _FPU_GETFPSR (fpsr); + fpcr_new = fpcr & _FPU_RESERVED; + fpsr_new = fpsr & _FPU_FPSR_RESERVED; + + if (envp == FE_DFL_ENV) + { + fpcr_new |= _FPU_DEFAULT; + fpsr_new |= _FPU_FPSR_DEFAULT; + } + else + { + fpcr_new |= _FPU_FPCR_IEEE; + fpsr_new |= _FPU_FPSR_IEEE; + } + + _FPU_SETFPSR (fpsr_new); + + if (fpcr != fpcr_new) + { + _FPU_SETCW (fpcr_new); + + /* Trapping exceptions are optional in AArch64; the relevant enable + bits in FPCR are RES0 hence the absence of support can be detected + by reading back the FPCR and comparing with the required value. */ + _FPU_GETCW (updated_fpcr); + + return fpcr_new & ~updated_fpcr; + } + + return 0; +} +libm_hidden_def (__fesetenv) +weak_alias (__fesetenv, fesetenv) +libm_hidden_weak (fesetenv) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fesetexcept.c b/REORG.TODO/sysdeps/aarch64/fpu/fesetexcept.c new file mode 100644 index 0000000000..b8915cd91b --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fesetexcept.c @@ -0,0 +1,34 @@ +/* Set given exception flags. AArch64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +fesetexcept (int excepts) +{ + fpu_fpsr_t fpsr; + fpu_fpsr_t fpsr_new; + + _FPU_GETFPSR (fpsr); + fpsr_new = fpsr | (excepts & FE_ALL_EXCEPT); + if (fpsr != fpsr_new) + _FPU_SETFPSR (fpsr_new); + + return 0; +} diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fesetmode.c b/REORG.TODO/sysdeps/aarch64/fpu/fesetmode.c new file mode 100644 index 0000000000..1947eb2a4f --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fesetmode.c @@ -0,0 +1,34 @@ +/* Install given floating-point control modes. AArch64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +fesetmode (const femode_t *modep) +{ + fpu_control_t fpcr, fpcr_new; + _FPU_GETCW (fpcr); + if (modep == FE_DFL_MODE) + fpcr_new = (fpcr & _FPU_RESERVED) | _FPU_DEFAULT; + else + fpcr_new = *modep; + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); + return 0; +} diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fesetround.c b/REORG.TODO/sysdeps/aarch64/fpu/fesetround.c new file mode 100644 index 0000000000..caae99f75d --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fesetround.c @@ -0,0 +1,34 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <math_private.h> +#include <fpu_control.h> + +int +__fesetround (int round) +{ + if (round & ~_FPU_FPCR_RM_MASK) + return 1; + + libc_fesetround_aarch64 (round); + return 0; +} +libm_hidden_def (__fesetround) +weak_alias (__fesetround, fesetround) +libm_hidden_weak (fesetround) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/feupdateenv.c b/REORG.TODO/sysdeps/aarch64/fpu/feupdateenv.c new file mode 100644 index 0000000000..e56fc3a44f --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/feupdateenv.c @@ -0,0 +1,89 @@ +/* Copyright (C) 2009-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +__feupdateenv (const fenv_t *envp) +{ + fpu_control_t fpcr; + fpu_control_t fpcr_new; + fpu_control_t updated_fpcr; + fpu_fpsr_t fpsr; + fpu_fpsr_t fpsr_new; + int excepts; + + _FPU_GETCW (fpcr); + _FPU_GETFPSR (fpsr); + excepts = fpsr & FE_ALL_EXCEPT; + + if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV)) + { + fpcr_new = envp->__fpcr; + fpsr_new = envp->__fpsr | excepts; + + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); + + if (fpsr != fpsr_new) + _FPU_SETFPSR (fpsr_new); + + if (excepts & (fpcr_new >> FE_EXCEPT_SHIFT)) + return __feraiseexcept (excepts); + + return 0; + } + + fpcr_new = fpcr & _FPU_RESERVED; + fpsr_new = fpsr & (_FPU_FPSR_RESERVED | FE_ALL_EXCEPT); + + if (envp == FE_DFL_ENV) + { + fpcr_new |= _FPU_DEFAULT; + fpsr_new |= _FPU_FPSR_DEFAULT; + } + else + { + fpcr_new |= _FPU_FPCR_IEEE; + fpsr_new |= _FPU_FPSR_IEEE; + } + + _FPU_SETFPSR (fpsr_new); + + if (fpcr != fpcr_new) + { + _FPU_SETCW (fpcr_new); + + /* Trapping exceptions are optional in AArch64; the relevant enable + bits in FPCR are RES0 hence the absence of support can be detected + by reading back the FPCR and comparing with the required value. */ + _FPU_GETCW (updated_fpcr); + + if (fpcr_new & ~updated_fpcr) + return 1; + } + + if (excepts & (fpcr_new >> FE_EXCEPT_SHIFT)) + return __feraiseexcept (excepts); + + return 0; +} +libm_hidden_def (__feupdateenv) +weak_alias (__feupdateenv, feupdateenv) +libm_hidden_weak (feupdateenv) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fgetexcptflg.c b/REORG.TODO/sysdeps/aarch64/fpu/fgetexcptflg.c new file mode 100644 index 0000000000..348c58ac40 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fgetexcptflg.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <math_private.h> + +int +fegetexceptflag (fexcept_t *flagp, int excepts) +{ + *flagp = libc_fetestexcept_aarch64 (excepts); + return 0; +} diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fpu_control.h b/REORG.TODO/sysdeps/aarch64/fpu/fpu_control.h new file mode 100644 index 0000000000..efad68b98e --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fpu_control.h @@ -0,0 +1,81 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _AARCH64_FPU_CONTROL_H +#define _AARCH64_FPU_CONTROL_H + +/* Macros for accessing the FPCR and FPSR. */ + +#define _FPU_GETCW(fpcr) \ + __asm__ __volatile__ ("mrs %0, fpcr" : "=r" (fpcr)) + +#define _FPU_SETCW(fpcr) \ + __asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr)) + +#define _FPU_GETFPSR(fpsr) \ + __asm__ __volatile__ ("mrs %0, fpsr" : "=r" (fpsr)) + +#define _FPU_SETFPSR(fpsr) \ + __asm__ __volatile__ ("msr fpsr, %0" : : "r" (fpsr)) + +/* Reserved bits should be preserved when modifying register + contents. These two masks indicate which bits in each of FPCR and + FPSR should not be changed. */ + +#define _FPU_RESERVED 0xfe0fe0ff +#define _FPU_FPSR_RESERVED 0x0fffffe0 + +#define _FPU_DEFAULT 0x00000000 +#define _FPU_FPSR_DEFAULT 0x00000000 + +/* Layout of FPCR and FPSR: + + | | | | | | | | + 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 + s s s s s s s s s s s + c c c c c c c c c c c c + N Z C V Q A D F R R S S S L L L I U U I U O D I I U U I U O D I + C H N Z M M T T B E E E D N N X F F Z O D N N X F F Z O + P O O R R Z N N N E K K E E E E E C K K C C C C C + D D I I P + E E D D + E E + */ + +#define _FPU_FPCR_RM_MASK 0xc00000 + +#define _FPU_FPCR_MASK_IXE 0x1000 +#define _FPU_FPCR_MASK_UFE 0x0800 +#define _FPU_FPCR_MASK_OFE 0x0400 +#define _FPU_FPCR_MASK_DZE 0x0200 +#define _FPU_FPCR_MASK_IOE 0x0100 + +#define _FPU_FPCR_IEEE \ + (_FPU_DEFAULT | _FPU_FPCR_MASK_IXE | \ + _FPU_FPCR_MASK_UFE | _FPU_FPCR_MASK_OFE | \ + _FPU_FPCR_MASK_DZE | _FPU_FPCR_MASK_IOE) + +#define _FPU_FPSR_IEEE 0 + +typedef unsigned int fpu_control_t; +typedef unsigned int fpu_fpsr_t; + +/* Default control word set at startup. */ +extern fpu_control_t __fpu_control; + +#endif diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fraiseexcpt.c b/REORG.TODO/sysdeps/aarch64/fpu/fraiseexcpt.c new file mode 100644 index 0000000000..557b061ca7 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fraiseexcpt.c @@ -0,0 +1,93 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> +#include <float.h> + +int +__feraiseexcept (int excepts) +{ + int fpsr; + const float fp_zero = 0.0; + const float fp_one = 1.0; + const float fp_max = FLT_MAX; + const float fp_min = FLT_MIN; + const float fp_1e32 = 1.0e32f; + const float fp_two = 2.0; + const float fp_three = 3.0; + + /* Raise exceptions represented by EXCEPTS. But we must raise only + one signal at a time. It is important that if the OVERFLOW or + UNDERFLOW exception and the inexact exception are given at the + same time, the OVERFLOW or UNDERFLOW exception precedes the + INEXACT exception. + + After each exception we read from the FPSR, to force the + exception to be raised immediately. */ + + if (FE_INVALID & excepts) + __asm__ __volatile__ ( + "ldr s0, %1\n\t" + "fdiv s0, s0, s0\n\t" + "mrs %0, fpsr" : "=r" (fpsr) + : "m" (fp_zero) + : "d0"); + + if (FE_DIVBYZERO & excepts) + __asm__ __volatile__ ( + "ldr s0, %1\n\t" + "ldr s1, %2\n\t" + "fdiv s0, s0, s1\n\t" + "mrs %0, fpsr" : "=r" (fpsr) + : "m" (fp_one), "m" (fp_zero) + : "d0", "d1"); + + if (FE_OVERFLOW & excepts) + /* There's no way to raise overflow without also raising inexact. */ + __asm__ __volatile__ ( + "ldr s0, %1\n\t" + "ldr s1, %2\n\t" + "fadd s0, s0, s1\n\t" + "mrs %0, fpsr" : "=r" (fpsr) + : "m" (fp_max), "m" (fp_1e32) + : "d0", "d1"); + + if (FE_UNDERFLOW & excepts) + __asm__ __volatile__ ( + "ldr s0, %1\n\t" + "ldr s1, %2\n\t" + "fdiv s0, s0, s1\n\t" + "mrs %0, fpsr" : "=r" (fpsr) + : "m" (fp_min), "m" (fp_three) + : "d0", "d1"); + + if (FE_INEXACT & excepts) + __asm__ __volatile__ ( + "ldr s0, %1\n\t" + "ldr s1, %2\n\t" + "fdiv s0, s0, s1\n\t" + "mrs %0, fpsr" : "=r" (fpsr) + : "m" (fp_two), "m" (fp_three) + : "d0", "d1"); + + return 0; +} +libm_hidden_def (__feraiseexcept) +weak_alias (__feraiseexcept, feraiseexcept) +libm_hidden_weak (feraiseexcept) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/fsetexcptflg.c b/REORG.TODO/sysdeps/aarch64/fpu/fsetexcptflg.c new file mode 100644 index 0000000000..dfa4810f58 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/fsetexcptflg.c @@ -0,0 +1,41 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +fesetexceptflag (const fexcept_t *flagp, int excepts) +{ + fpu_fpsr_t fpsr; + fpu_fpsr_t fpsr_new; + + /* Get the current environment. */ + _FPU_GETFPSR (fpsr); + excepts &= FE_ALL_EXCEPT; + + /* Set the desired exception mask. */ + fpsr_new = fpsr & ~excepts; + fpsr_new |= *flagp & excepts; + + /* Save state back to the FPU. */ + if (fpsr != fpsr_new) + _FPU_SETFPSR (fpsr_new); + + return 0; +} diff --git a/REORG.TODO/sysdeps/aarch64/fpu/ftestexcept.c b/REORG.TODO/sysdeps/aarch64/fpu/ftestexcept.c new file mode 100644 index 0000000000..d41c46bedc --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/ftestexcept.c @@ -0,0 +1,27 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <math_private.h> + +int +fetestexcept (int excepts) +{ + return libc_fetestexcept_aarch64 (excepts); +} +libm_hidden_def (fetestexcept) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/get-rounding-mode.h b/REORG.TODO/sysdeps/aarch64/fpu/get-rounding-mode.h new file mode 100644 index 0000000000..17ec111abc --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/get-rounding-mode.h @@ -0,0 +1,38 @@ +/* Determine floating-point rounding mode within libc. AArch64 version. + + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _AARCH64_GET_ROUNDING_MODE_H +#define _AARCH64_GET_ROUNDING_MODE_H 1 + +#include <fenv.h> +#include <fpu_control.h> + +/* Return the floating-point rounding mode. */ + +static inline int +get_rounding_mode (void) +{ + fpu_control_t fpcr; + + _FPU_GETCW (fpcr); + return fpcr & _FPU_FPCR_RM_MASK; +} + +#endif /* get-rounding-mode.h */ diff --git a/REORG.TODO/sysdeps/aarch64/fpu/math_private.h b/REORG.TODO/sysdeps/aarch64/fpu/math_private.h new file mode 100644 index 0000000000..807111ea5a --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/math_private.h @@ -0,0 +1,324 @@ +/* Private floating point rounding and exceptions handling. AArch64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef AARCH64_MATH_PRIVATE_H +#define AARCH64_MATH_PRIVATE_H 1 + +#include <fenv.h> +#include <fpu_control.h> + +#define math_opt_barrier(x) \ +({ __typeof (x) __x = (x); __asm ("" : "+w" (__x)); __x; }) +#define math_force_eval(x) \ +({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "w" (__x)); }) + +extern __always_inline double +__ieee754_sqrt (double d) +{ + double res; + asm __volatile__ ("fsqrt %d0, %d1" : "=w" (res) : "w" (d)); + return res; +} + +extern __always_inline float +__ieee754_sqrtf (float s) +{ + float res; + asm __volatile__ ("fsqrt %s0, %s1" : "=w" (res) : "w" (s)); + return res; +} + +static __always_inline void +libc_feholdexcept_aarch64 (fenv_t *envp) +{ + fpu_control_t fpcr; + fpu_control_t new_fpcr; + fpu_fpsr_t fpsr; + fpu_fpsr_t new_fpsr; + + _FPU_GETCW (fpcr); + _FPU_GETFPSR (fpsr); + envp->__fpcr = fpcr; + envp->__fpsr = fpsr; + + /* Clear exception flags and set all exceptions to non-stop. */ + new_fpcr = fpcr & ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT); + new_fpsr = fpsr & ~FE_ALL_EXCEPT; + + if (__glibc_unlikely (new_fpcr != fpcr)) + _FPU_SETCW (new_fpcr); + + if (new_fpsr != fpsr) + _FPU_SETFPSR (new_fpsr); +} + +#define libc_feholdexcept libc_feholdexcept_aarch64 +#define libc_feholdexceptf libc_feholdexcept_aarch64 +#define libc_feholdexceptl libc_feholdexcept_aarch64 + +static __always_inline void +libc_fesetround_aarch64 (int round) +{ + fpu_control_t fpcr; + + _FPU_GETCW (fpcr); + + /* Check whether rounding modes are different. */ + round = (fpcr ^ round) & _FPU_FPCR_RM_MASK; + + /* Set new rounding mode if different. */ + if (__glibc_unlikely (round != 0)) + _FPU_SETCW (fpcr ^ round); +} + +#define libc_fesetround libc_fesetround_aarch64 +#define libc_fesetroundf libc_fesetround_aarch64 +#define libc_fesetroundl libc_fesetround_aarch64 + +static __always_inline void +libc_feholdexcept_setround_aarch64 (fenv_t *envp, int round) +{ + fpu_control_t fpcr; + fpu_control_t new_fpcr; + fpu_fpsr_t fpsr; + fpu_fpsr_t new_fpsr; + + _FPU_GETCW (fpcr); + _FPU_GETFPSR (fpsr); + envp->__fpcr = fpcr; + envp->__fpsr = fpsr; + + /* Clear exception flags, set all exceptions to non-stop, + and set new rounding mode. */ + new_fpcr = fpcr & ~((FE_ALL_EXCEPT << FE_EXCEPT_SHIFT) | _FPU_FPCR_RM_MASK); + new_fpcr |= round; + new_fpsr = fpsr & ~FE_ALL_EXCEPT; + + if (__glibc_unlikely (new_fpcr != fpcr)) + _FPU_SETCW (new_fpcr); + + if (new_fpsr != fpsr) + _FPU_SETFPSR (new_fpsr); +} + +#define libc_feholdexcept_setround libc_feholdexcept_setround_aarch64 +#define libc_feholdexcept_setroundf libc_feholdexcept_setround_aarch64 +#define libc_feholdexcept_setroundl libc_feholdexcept_setround_aarch64 + +static __always_inline int +libc_fetestexcept_aarch64 (int ex) +{ + fpu_fpsr_t fpsr; + + _FPU_GETFPSR (fpsr); + return fpsr & ex & FE_ALL_EXCEPT; +} + +#define libc_fetestexcept libc_fetestexcept_aarch64 +#define libc_fetestexceptf libc_fetestexcept_aarch64 +#define libc_fetestexceptl libc_fetestexcept_aarch64 + +static __always_inline void +libc_fesetenv_aarch64 (const fenv_t *envp) +{ + fpu_control_t fpcr; + fpu_control_t new_fpcr; + + _FPU_GETCW (fpcr); + new_fpcr = envp->__fpcr; + + if (__glibc_unlikely (fpcr != new_fpcr)) + _FPU_SETCW (new_fpcr); + + _FPU_SETFPSR (envp->__fpsr); +} + +#define libc_fesetenv libc_fesetenv_aarch64 +#define libc_fesetenvf libc_fesetenv_aarch64 +#define libc_fesetenvl libc_fesetenv_aarch64 +#define libc_feresetround_noex libc_fesetenv_aarch64 +#define libc_feresetround_noexf libc_fesetenv_aarch64 +#define libc_feresetround_noexl libc_fesetenv_aarch64 + +static __always_inline int +libc_feupdateenv_test_aarch64 (const fenv_t *envp, int ex) +{ + fpu_control_t fpcr; + fpu_control_t new_fpcr; + fpu_fpsr_t fpsr; + fpu_fpsr_t new_fpsr; + int excepts; + + _FPU_GETCW (fpcr); + _FPU_GETFPSR (fpsr); + + /* Merge current exception flags with the saved fenv. */ + excepts = fpsr & FE_ALL_EXCEPT; + new_fpcr = envp->__fpcr; + new_fpsr = envp->__fpsr | excepts; + + if (__glibc_unlikely (fpcr != new_fpcr)) + _FPU_SETCW (new_fpcr); + + if (fpsr != new_fpsr) + _FPU_SETFPSR (new_fpsr); + + /* Raise the exceptions if enabled in the new FP state. */ + if (__glibc_unlikely (excepts & (new_fpcr >> FE_EXCEPT_SHIFT))) + __feraiseexcept (excepts); + + return excepts & ex; +} + +#define libc_feupdateenv_test libc_feupdateenv_test_aarch64 +#define libc_feupdateenv_testf libc_feupdateenv_test_aarch64 +#define libc_feupdateenv_testl libc_feupdateenv_test_aarch64 + +static __always_inline void +libc_feupdateenv_aarch64 (const fenv_t *envp) +{ + libc_feupdateenv_test_aarch64 (envp, 0); +} + +#define libc_feupdateenv libc_feupdateenv_aarch64 +#define libc_feupdateenvf libc_feupdateenv_aarch64 +#define libc_feupdateenvl libc_feupdateenv_aarch64 + +static __always_inline void +libc_feholdsetround_aarch64 (fenv_t *envp, int round) +{ + fpu_control_t fpcr; + fpu_fpsr_t fpsr; + + _FPU_GETCW (fpcr); + _FPU_GETFPSR (fpsr); + envp->__fpcr = fpcr; + envp->__fpsr = fpsr; + + /* Check whether rounding modes are different. */ + round = (fpcr ^ round) & _FPU_FPCR_RM_MASK; + + /* Set new rounding mode if different. */ + if (__glibc_unlikely (round != 0)) + _FPU_SETCW (fpcr ^ round); +} + +#define libc_feholdsetround libc_feholdsetround_aarch64 +#define libc_feholdsetroundf libc_feholdsetround_aarch64 +#define libc_feholdsetroundl libc_feholdsetround_aarch64 + +static __always_inline void +libc_feresetround_aarch64 (fenv_t *envp) +{ + fpu_control_t fpcr; + int round; + + _FPU_GETCW (fpcr); + + /* Check whether rounding modes are different. */ + round = (envp->__fpcr ^ fpcr) & _FPU_FPCR_RM_MASK; + + /* Restore the rounding mode if it was changed. */ + if (__glibc_unlikely (round != 0)) + _FPU_SETCW (fpcr ^ round); +} + +#define libc_feresetround libc_feresetround_aarch64 +#define libc_feresetroundf libc_feresetround_aarch64 +#define libc_feresetroundl libc_feresetround_aarch64 + +/* We have support for rounding mode context. */ +#define HAVE_RM_CTX 1 + +static __always_inline void +libc_feholdsetround_aarch64_ctx (struct rm_ctx *ctx, int r) +{ + fpu_control_t fpcr; + int round; + + _FPU_GETCW (fpcr); + ctx->env.__fpcr = fpcr; + + /* Check whether rounding modes are different. */ + round = (fpcr ^ r) & _FPU_FPCR_RM_MASK; + ctx->updated_status = round != 0; + + /* Set the rounding mode if changed. */ + if (__glibc_unlikely (round != 0)) + _FPU_SETCW (fpcr ^ round); +} + +#define libc_feholdsetround_ctx libc_feholdsetround_aarch64_ctx +#define libc_feholdsetroundf_ctx libc_feholdsetround_aarch64_ctx +#define libc_feholdsetroundl_ctx libc_feholdsetround_aarch64_ctx + +static __always_inline void +libc_feresetround_aarch64_ctx (struct rm_ctx *ctx) +{ + /* Restore the rounding mode if updated. */ + if (__glibc_unlikely (ctx->updated_status)) + _FPU_SETCW (ctx->env.__fpcr); +} + +#define libc_feresetround_ctx libc_feresetround_aarch64_ctx +#define libc_feresetroundf_ctx libc_feresetround_aarch64_ctx +#define libc_feresetroundl_ctx libc_feresetround_aarch64_ctx + +static __always_inline void +libc_feholdsetround_noex_aarch64_ctx (struct rm_ctx *ctx, int r) +{ + fpu_control_t fpcr; + fpu_fpsr_t fpsr; + int round; + + _FPU_GETCW (fpcr); + _FPU_GETFPSR (fpsr); + ctx->env.__fpcr = fpcr; + ctx->env.__fpsr = fpsr; + + /* Check whether rounding modes are different. */ + round = (fpcr ^ r) & _FPU_FPCR_RM_MASK; + ctx->updated_status = round != 0; + + /* Set the rounding mode if changed. */ + if (__glibc_unlikely (round != 0)) + _FPU_SETCW (fpcr ^ round); +} + +#define libc_feholdsetround_noex_ctx libc_feholdsetround_noex_aarch64_ctx +#define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_aarch64_ctx +#define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_aarch64_ctx + +static __always_inline void +libc_feresetround_noex_aarch64_ctx (struct rm_ctx *ctx) +{ + /* Restore the rounding mode if updated. */ + if (__glibc_unlikely (ctx->updated_status)) + _FPU_SETCW (ctx->env.__fpcr); + + /* Write new FPSR to restore exception flags. */ + _FPU_SETFPSR (ctx->env.__fpsr); +} + +#define libc_feresetround_noex_ctx libc_feresetround_noex_aarch64_ctx +#define libc_feresetround_noexf_ctx libc_feresetround_noex_aarch64_ctx +#define libc_feresetround_noexl_ctx libc_feresetround_noex_aarch64_ctx + +#include_next <math_private.h> + +#endif diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_ceil.c b/REORG.TODO/sysdeps/aarch64/fpu/s_ceil.c new file mode 100644 index 0000000000..d0a8bd8981 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_ceil.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC ceil +#define INSN "frintp" +#include <s_frint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_ceilf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_ceilf.c new file mode 100644 index 0000000000..b9c2e7c3e5 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_ceilf.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC ceilf +#define INSN "frintp" +#include <s_frintf.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_floor.c b/REORG.TODO/sysdeps/aarch64/fpu/s_floor.c new file mode 100644 index 0000000000..f7f8731d98 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_floor.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC floor +#define INSN "frintm" +#include <s_frint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_floorf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_floorf.c new file mode 100644 index 0000000000..7be63b5a04 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_floorf.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC floorf +#define INSN "frintm" +#include <s_frintf.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_fma.c b/REORG.TODO/sysdeps/aarch64/fpu/s_fma.c new file mode 100644 index 0000000000..6f62ce2365 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_fma.c @@ -0,0 +1,45 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#ifndef FUNC +# define FUNC fma +#endif + +#ifndef TYPE +# define TYPE double +# define REGS "d" +#else +# ifndef REGS +# error REGS not defined +# endif +#endif + +#define __CONCATX(a,b) __CONCAT(a,b) + +TYPE +__CONCATX(__,FUNC) (TYPE x, TYPE y, TYPE z) +{ + TYPE result; + asm ( "fmadd" "\t%" REGS "0, %" REGS "1, %" REGS "2, %" REGS "3" + : "=w" (result) : "w" (x), "w" (y), "w" (z) ); + return result; +} + +weak_alias (__CONCATX(__,FUNC), FUNC) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_fmaf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_fmaf.c new file mode 100644 index 0000000000..880a22dfd4 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_fmaf.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC fmaf +#define TYPE float +#define REGS "s" +#include <s_fma.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_fmax.c b/REORG.TODO/sysdeps/aarch64/fpu/s_fmax.c new file mode 100644 index 0000000000..395a9bacfd --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_fmax.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC fmax +#define INSN "fmaxnm" +#include <fpu/s_fmin.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_fmaxf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_fmaxf.c new file mode 100644 index 0000000000..f450d9fe82 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_fmaxf.c @@ -0,0 +1,23 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC fmaxf +#define INSN "fmaxnm" +#define TYPE float +#define REGS "s" +#include <fpu/s_fmin.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_fmin.c b/REORG.TODO/sysdeps/aarch64/fpu/s_fmin.c new file mode 100644 index 0000000000..b6d32d5050 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_fmin.c @@ -0,0 +1,49 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#ifndef FUNC +# define FUNC fmin +#endif + +#ifndef INSN +# define INSN "fminnm" +#endif + +#ifndef TYPE +# define TYPE double +# define REGS "d" +#else +# ifndef REGS +# error REGS not defined +# endif +#endif + +#define __CONCATX(a,b) __CONCAT(a,b) + +TYPE +__CONCATX(__,FUNC) (TYPE x, TYPE y) +{ + TYPE result; + asm ( INSN "\t%" REGS "0, %" REGS "1, %" REGS "2" + : "=w" (result) : "w" (x), "w" (y) ); + return result; +} + +weak_alias (__CONCATX(__,FUNC), FUNC) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_fminf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_fminf.c new file mode 100644 index 0000000000..032262d953 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_fminf.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC fminf +#define TYPE float +#define REGS "s" +#include <fpu/s_fmin.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_frint.c b/REORG.TODO/sysdeps/aarch64/fpu/s_frint.c new file mode 100644 index 0000000000..48881f5868 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_frint.c @@ -0,0 +1,49 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#ifndef FUNC +# error FUNC not defined +#endif + +#ifndef TYPE +# define TYPE double +# define REGS "d" +#else +# ifndef REGS +# error REGS not defined +# endif +#endif + +#ifndef INSN +# error INSN not defined +#endif + +#define __CONCATX(a,b) __CONCAT(a,b) + +TYPE +__CONCATX(__,FUNC) (TYPE x) +{ + TYPE result; + asm ( INSN "\t%" REGS "0, %" REGS "1" : + "=w" (result) : "w" (x) ); + return result; +} + +weak_alias (__CONCATX(__,FUNC), FUNC) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_frintf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_frintf.c new file mode 100644 index 0000000000..dae99d7816 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_frintf.c @@ -0,0 +1,24 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef FUNC +# error FUNC not defined +#endif +#define TYPE float +#define REGS "s" +#include <s_frint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_llrint.c b/REORG.TODO/sysdeps/aarch64/fpu/s_llrint.c new file mode 100644 index 0000000000..c0d0d0e879 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_llrint.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC llrint +#define OTYPE long long int +#include <s_lrint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_llrintf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_llrintf.c new file mode 100644 index 0000000000..67724c6d47 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_llrintf.c @@ -0,0 +1,23 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC llrintf +#define ITYPE float +#define IREGS "s" +#define OTYPE long long int +#include <s_lrint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_llround.c b/REORG.TODO/sysdeps/aarch64/fpu/s_llround.c new file mode 100644 index 0000000000..ed4b192d5c --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_llround.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC llround +#define OTYPE long long int +#include <s_lround.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_llroundf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_llroundf.c new file mode 100644 index 0000000000..360ce8b4c5 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_llroundf.c @@ -0,0 +1,23 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC llroundf +#define ITYPE float +#define IREGS "s" +#define OTYPE long long int +#include <s_lround.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_lrint.c b/REORG.TODO/sysdeps/aarch64/fpu/s_lrint.c new file mode 100644 index 0000000000..8c61a039bf --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_lrint.c @@ -0,0 +1,53 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#ifndef FUNC +# define FUNC lrint +#endif + +#ifndef ITYPE +# define ITYPE double +# define IREGS "d" +#else +# ifndef IREGS +# error IREGS not defined +# endif +#endif + +#ifndef OTYPE +# define OTYPE long int +#endif + +#define OREGS "x" + +#define __CONCATX(a,b) __CONCAT(a,b) + +OTYPE +__CONCATX(__,FUNC) (ITYPE x) +{ + OTYPE result; + ITYPE temp; + asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t" + "fcvtzs" "\t%" OREGS "0, %" IREGS "1" + : "=r" (result), "=w" (temp) : "w" (x) ); + return result; +} + +weak_alias (__CONCATX(__,FUNC), FUNC) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_lrintf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_lrintf.c new file mode 100644 index 0000000000..a995e4b96f --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_lrintf.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC lrintf +#define ITYPE float +#define IREGS "s" +#include <s_lrint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_lround.c b/REORG.TODO/sysdeps/aarch64/fpu/s_lround.c new file mode 100644 index 0000000000..9be9e7fb0f --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_lround.c @@ -0,0 +1,51 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#ifndef FUNC +# define FUNC lround +#endif + +#ifndef ITYPE +# define ITYPE double +# define IREGS "d" +#else +# ifndef IREGS +# error IREGS not defined +# endif +#endif + +#ifndef OTYPE +# define OTYPE long int +#endif + +#define OREGS "x" + +#define __CONCATX(a,b) __CONCAT(a,b) + +OTYPE +__CONCATX(__,FUNC) (ITYPE x) +{ + OTYPE result; + asm ( "fcvtas" "\t%" OREGS "0, %" IREGS "1" + : "=r" (result) : "w" (x) ); + return result; +} + +weak_alias (__CONCATX(__,FUNC), FUNC) diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_lroundf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_lroundf.c new file mode 100644 index 0000000000..4a066d4816 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_lroundf.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC lroundf +#define ITYPE float +#define IREGS "s" +#include <s_lround.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_nearbyint.c b/REORG.TODO/sysdeps/aarch64/fpu/s_nearbyint.c new file mode 100644 index 0000000000..51067f23c8 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_nearbyint.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC nearbyint +#define INSN "frinti" +#include <s_frint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_nearbyintf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_nearbyintf.c new file mode 100644 index 0000000000..8125646c2e --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_nearbyintf.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC nearbyintf +#define INSN "frinti" +#include <s_frintf.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_rint.c b/REORG.TODO/sysdeps/aarch64/fpu/s_rint.c new file mode 100644 index 0000000000..73b4e26786 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_rint.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC rint +#define INSN "frintx" +#include <s_frint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_rintf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_rintf.c new file mode 100644 index 0000000000..3560dc2827 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_rintf.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC rintf +#define INSN "frintx" +#include <s_frintf.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_round.c b/REORG.TODO/sysdeps/aarch64/fpu/s_round.c new file mode 100644 index 0000000000..67817485c3 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_round.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC round +#define INSN "frinta" +#include <s_frint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_roundf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_roundf.c new file mode 100644 index 0000000000..ef6f672c7d --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_roundf.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC roundf +#define INSN "frinta" +#include <s_frintf.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_trunc.c b/REORG.TODO/sysdeps/aarch64/fpu/s_trunc.c new file mode 100644 index 0000000000..2bf5474a7e --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_trunc.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC trunc +#define INSN "frintz" +#include <s_frint.c> diff --git a/REORG.TODO/sysdeps/aarch64/fpu/s_truncf.c b/REORG.TODO/sysdeps/aarch64/fpu/s_truncf.c new file mode 100644 index 0000000000..94865a470b --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/fpu/s_truncf.c @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FUNC truncf +#define INSN "frintz" +#include <s_frintf.c> diff --git a/REORG.TODO/sysdeps/aarch64/jmpbuf-offsets.h b/REORG.TODO/sysdeps/aarch64/jmpbuf-offsets.h new file mode 100644 index 0000000000..b877392f7d --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/jmpbuf-offsets.h @@ -0,0 +1,60 @@ +/* Copyright (C) 2006-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define JB_X19 0 +#define JB_X20 1 +#define JB_X21 2 +#define JB_X22 3 +#define JB_X23 4 +#define JB_X24 5 +#define JB_X25 6 +#define JB_X26 7 +#define JB_X27 8 +#define JB_X28 9 +#define JB_X29 10 +#define JB_LR 11 +#define JB_SP 13 + +#define JB_D8 14 +#define JB_D9 15 +#define JB_D10 16 +#define JB_D11 17 +#define JB_D12 18 +#define JB_D13 19 +#define JB_D14 20 +#define JB_D15 21 + +#ifndef __ASSEMBLER__ +#include <setjmp.h> +#include <stdint.h> +#include <sysdep.h> + +static inline uintptr_t __attribute__ ((unused)) +_jmpbuf_sp (__jmp_buf jmpbuf) +{ + uintptr_t sp = jmpbuf[JB_SP]; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (sp); +#endif + return sp; +} +#endif + +/* Helper for generic ____longjmp_chk(). */ +#define JB_FRAME_ADDRESS(buf) \ + ((void *) _jmpbuf_sp (buf)) diff --git a/REORG.TODO/sysdeps/aarch64/jmpbuf-unwind.h b/REORG.TODO/sysdeps/aarch64/jmpbuf-unwind.h new file mode 100644 index 0000000000..8073508884 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/jmpbuf-unwind.h @@ -0,0 +1,37 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <setjmp.h> +#include <jmpbuf-offsets.h> +#include <stdint.h> +#include <unwind.h> + +/* Test if longjmp to JMPBUF would unwind the frame + containing a local variable at ADDRESS. */ +#define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \ + ((void *) (address) < (void *) demangle (jmpbuf[JB_SP])) + +#define _JMPBUF_CFA_UNWINDS_ADJ(jmpbuf, context, adj) \ + _JMPBUF_UNWINDS_ADJ (jmpbuf, (void *) (uintptr_t) _Unwind_GetCFA (context), \ + adj) + +#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \ + ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj)) + +/* We use the normal longjmp for unwinding. */ +#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val) diff --git a/REORG.TODO/sysdeps/aarch64/ldsodefs.h b/REORG.TODO/sysdeps/aarch64/ldsodefs.h new file mode 100644 index 0000000000..ba4ada3a04 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/ldsodefs.h @@ -0,0 +1,49 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _AARCH64_LDSODEFS_H +#define _AARCH64_LDSODEFS_H 1 + +#include <elf.h> +#include <cpu-features.h> + +struct La_aarch64_regs; +struct La_aarch64_retval; + +#define ARCH_PLTENTER_MEMBERS \ + ElfW(Addr) (*aarch64_gnu_pltenter) (ElfW(Sym) *, \ + unsigned int, \ + uintptr_t *, \ + uintptr_t *, \ + struct La_aarch64_regs *, \ + unsigned int *, \ + const char *, \ + long int *) + +#define ARCH_PLTEXIT_MEMBERS \ + ElfW(Addr) (*aarch64_gnu_pltexit) (ElfW(Sym) *, \ + unsigned int, \ + uintptr_t *, \ + uintptr_t *, \ + const struct La_aarch64_regs *, \ + struct La_aarch64_retval *, \ + const char *) + +#include_next <ldsodefs.h> + +#endif diff --git a/REORG.TODO/sysdeps/aarch64/libc-tls.c b/REORG.TODO/sysdeps/aarch64/libc-tls.c new file mode 100644 index 0000000000..4aef6f1c11 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/libc-tls.c @@ -0,0 +1,32 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <csu/libc-tls.c> +#include <dl-tls.h> + +/* On AArch64, linker optimizations are not required, so __tls_get_addr + can be called even in statically linked binaries. In this case module + must be always 1 and PT_TLS segment exist in the binary, otherwise it + would not link. */ + +void * +__tls_get_addr (tls_index *ti) +{ + dtv_t *dtv = THREAD_DTV (); + return (char *) dtv[1].pointer.val + ti->ti_offset; +} diff --git a/REORG.TODO/sysdeps/aarch64/libm-test-ulps b/REORG.TODO/sysdeps/aarch64/libm-test-ulps new file mode 100644 index 0000000000..0f987bca59 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/libm-test-ulps @@ -0,0 +1,2276 @@ +# Begin of automatic generation + +# Maximal error of functions: +Function: "acos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "acosh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "asin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "asinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "asinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "asinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "atan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atan2_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "atan2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "atan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "atanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "atanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cabs": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacos": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacos_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cacos_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "cacos_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cacos_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "cacos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cacos_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 7 +ldouble: 7 + +Function: Real part of "cacosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacosh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "cacosh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cacosh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "cacosh_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "cacosh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cacosh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "carg": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "carg_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "carg_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "carg_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casin": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casin_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "casin_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casin_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 7 +ldouble: 7 + +Function: Real part of "casinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casinh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "casinh_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casinh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casinh_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casinh_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "casinh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "catan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catan_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catan_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "catanh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catanh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "catanh_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cbrt": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_upward": +double: 5 +float: 1 +idouble: 5 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "ccos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccos_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccos_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "ccosh_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccosh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ccosh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "cexp": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cexp_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cexp_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cexp_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "clog": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "clog10": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "clog10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog10_downward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog10_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog10_towardzero": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog10_upward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog10_upward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog_towardzero": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "cos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "cos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "cosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cosh_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 2 + +Function: "cosh_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 2 + +Function: "cosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 3 + +Function: Real part of "cpow": +double: 2 +float: 5 +idouble: 2 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "cpow": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cpow_downward": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cpow_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow_towardzero": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cpow_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow_upward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cpow_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csin": +ildouble: 1 +ldouble: 1 + +Function: Real part of "csin_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csin_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csin_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csinh": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "csinh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csinh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csinh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csinh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csqrt_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctan": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctan_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "ctan_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Real part of "ctan_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "ctan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Real part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "ctanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctanh": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctanh_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Real part of "ctanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctanh_towardzero": +double: 5 +float: 2 +idouble: 5 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "erf": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erfc": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "erfc_downward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "erfc_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "erfc_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "exp": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp10": +double: 2 +idouble: 2 +ildouble: 2 +ldouble: 2 + +Function: "exp10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "exp10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "exp10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "exp2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_downward": +double: 1 +idouble: 1 + +Function: "exp_towardzero": +double: 1 +idouble: 1 + +Function: "exp_upward": +double: 1 +idouble: 1 + +Function: "expm1": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "expm1_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "expm1_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "gamma": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "gamma_downward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: "gamma_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "gamma_upward": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "hypot": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "j0": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j0_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "j0_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "j0_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "j1": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "j1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "j1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "j1_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "jn": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: "jn_downward": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "jn_towardzero": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "jn_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: "lgamma": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "lgamma_downward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: "lgamma_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "lgamma_upward": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: "log": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log10_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log1p": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log1p_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log1p_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log1p_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log2_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "log2_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log2_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log_downward": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log_towardzero": +float: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "pow": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "pow10": +double: 2 +idouble: 2 +ildouble: 2 +ldouble: 2 + +Function: "pow10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "pow10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "pow10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "pow_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "pow_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "pow_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sin_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sincos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sincos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sincos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "sincos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "sinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "sinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tan": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "tan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "tan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "tan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "tanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "tanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "tanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "tgamma": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "tgamma_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "y0": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "y0_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "y0_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "y0_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "y1": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "y1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_upward": +double: 5 +float: 2 +idouble: 5 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "yn": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "yn_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "yn_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "yn_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +# end of automatic generation diff --git a/REORG.TODO/sysdeps/aarch64/libm-test-ulps-name b/REORG.TODO/sysdeps/aarch64/libm-test-ulps-name new file mode 100644 index 0000000000..1f66c5cda0 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/libm-test-ulps-name @@ -0,0 +1 @@ +AArch64 diff --git a/REORG.TODO/sysdeps/aarch64/linkmap.h b/REORG.TODO/sysdeps/aarch64/linkmap.h new file mode 100644 index 0000000000..0ce1f646ef --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/linkmap.h @@ -0,0 +1,23 @@ +/* Copyright (C) 2009-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +struct link_map_machine +{ + ElfW(Addr) plt; /* Address of .plt */ + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ +}; diff --git a/REORG.TODO/sysdeps/aarch64/machine-gmon.h b/REORG.TODO/sysdeps/aarch64/machine-gmon.h new file mode 100644 index 0000000000..14fe3654d0 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/machine-gmon.h @@ -0,0 +1,34 @@ +/* AArch64 definitions for profiling support. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Accept 'frompc' address as argument from the function that calls + __mcount for profiling. Use __builtin_return_address (0) + for the 'selfpc' address. */ + +#include <sysdep.h> + +static void mcount_internal (u_long frompc, u_long selfpc); + +#define _MCOUNT_DECL(frompc, selfpc) \ +static inline void mcount_internal (u_long frompc, u_long selfpc) + +#define MCOUNT \ +void __mcount (void *frompc) \ +{ \ + mcount_internal ((u_long) frompc, (u_long) RETURN_ADDRESS (0)); \ +} diff --git a/REORG.TODO/sysdeps/aarch64/math-tests.h b/REORG.TODO/sysdeps/aarch64/math-tests.h new file mode 100644 index 0000000000..0e5cbcf621 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/math-tests.h @@ -0,0 +1,22 @@ +/* Configuration for math tests. AArch64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Trapping exceptions are optional on AArch64. */ +#define EXCEPTION_ENABLE_SUPPORTED(EXCEPT) ((EXCEPT) == 0) + +#include_next <math-tests.h> diff --git a/REORG.TODO/sysdeps/aarch64/mcount.c b/REORG.TODO/sysdeps/aarch64/mcount.c new file mode 100644 index 0000000000..52456f0da8 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/mcount.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <shlib-compat.h> + +#include <gmon/mcount.c> + +/* We forgot to add _mcount in glibc 2.17. We added it in 2.18 + therefore we want it to be added with version GLIBC_2_18. However, + setting the version is not straight forward because a generic + Version file includes an earlier 2.xx version for each this symbol + and the linker uses the first version it sees. */ + +#if SHLIB_COMPAT (libc, GLIBC_2_17, GLIBC_2_18) +versioned_symbol (libc, __mcount, _mcount, GLIBC_2_18); +#else +strong_alias (__mcount, _mcount); +#endif diff --git a/REORG.TODO/sysdeps/aarch64/memchr.S b/REORG.TODO/sysdeps/aarch64/memchr.S new file mode 100644 index 0000000000..1cde62345b --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/memchr.S @@ -0,0 +1,157 @@ +/* memchr - find a character in a memory zone + + Copyright (C) 2015-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64 + * Neon Available. + */ + +/* Arguments and results. */ +#define srcin x0 +#define chrin w1 +#define cntin x2 + +#define result x0 + +#define src x3 +#define tmp x4 +#define wtmp2 w5 +#define synd x6 +#define soff x9 +#define cntrem x10 + +#define vrepchr v0 +#define vdata1 v1 +#define vdata2 v2 +#define vhas_chr1 v3 +#define vhas_chr2 v4 +#define vrepmask v5 +#define vend v6 + +/* + * Core algorithm: + * + * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits + * per byte. For each tuple, bit 0 is set if the relevant byte matched the + * requested character and bit 1 is not used (faster than using a 32bit + * syndrome). Since the bits in the syndrome reflect exactly the order in which + * things occur in the original string, counting trailing zeros allows to + * identify exactly which byte has matched. + */ + +ENTRY (__memchr) + /* Do not dereference srcin if no bytes to compare. */ + cbz cntin, L(zero_length) + /* + * Magic constant 0x40100401 allows us to identify which lane matches + * the requested byte. + */ + mov wtmp2, #0x0401 + movk wtmp2, #0x4010, lsl #16 + dup vrepchr.16b, chrin + /* Work with aligned 32-byte chunks */ + bic src, srcin, #31 + dup vrepmask.4s, wtmp2 + ands soff, srcin, #31 + and cntrem, cntin, #31 + b.eq L(loop) + + /* + * Input string is not 32-byte aligned. We calculate the syndrome + * value for the aligned 32 bytes block containing the first bytes + * and mask the irrelevant part. + */ + + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + sub tmp, soff, #32 + adds cntin, cntin, tmp + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b + addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ + addp vend.16b, vend.16b, vend.16b /* 128->64 */ + mov synd, vend.2d[0] + /* Clear the soff*2 lower bits */ + lsl tmp, soff, #1 + lsr synd, synd, tmp + lsl synd, synd, tmp + /* The first block can also be the last */ + b.ls L(masklast) + /* Have we found something already? */ + cbnz synd, L(tail) + +L(loop): + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + subs cntin, cntin, #32 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + /* If we're out of data we finish regardless of the result */ + b.ls L(end) + /* Use a fast check for the termination condition */ + orr vend.16b, vhas_chr1.16b, vhas_chr2.16b + addp vend.2d, vend.2d, vend.2d + mov synd, vend.2d[0] + /* We're not out of data, loop if we haven't found the character */ + cbz synd, L(loop) + +L(end): + /* Termination condition found, let's calculate the syndrome value */ + and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b + addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ + addp vend.16b, vend.16b, vend.16b /* 128->64 */ + mov synd, vend.2d[0] + /* Only do the clear for the last possible block */ + b.hi L(tail) + +L(masklast): + /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */ + add tmp, cntrem, soff + and tmp, tmp, #31 + sub tmp, tmp, #32 + neg tmp, tmp, lsl #1 + lsl synd, synd, tmp + lsr synd, synd, tmp + +L(tail): + /* Count the trailing zeros using bit reversing */ + rbit synd, synd + /* Compensate the last post-increment */ + sub src, src, #32 + /* Check that we have found a character */ + cmp synd, #0 + /* And count the leading zeros */ + clz synd, synd + /* Compute the potential result */ + add result, src, synd, lsr #1 + /* Select result or NULL */ + csel result, xzr, result, eq + ret + +L(zero_length): + mov result, #0 + ret +END (__memchr) +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/aarch64/memcmp.S b/REORG.TODO/sysdeps/aarch64/memcmp.S new file mode 100644 index 0000000000..4cfcb89297 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/memcmp.S @@ -0,0 +1,154 @@ +/* memcmp - compare memory + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64 + */ + +/* Parameters and result. */ +#define src1 x0 +#define src2 x1 +#define limit x2 +#define result x0 + +/* Internal variables. */ +#define data1 x3 +#define data1w w3 +#define data2 x4 +#define data2w w4 +#define has_nul x5 +#define diff x6 +#define endloop x7 +#define tmp1 x8 +#define tmp2 x9 +#define tmp3 x10 +#define pos x11 +#define limit_wd x12 +#define mask x13 + +ENTRY_ALIGN (memcmp, 6) + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) + cbz limit, L(ret0) + eor tmp1, src1, src2 + tst tmp1, #7 + b.ne L(misaligned8) + ands tmp1, src1, #7 + b.ne L(mutual_align) + add limit_wd, limit, #7 + lsr limit_wd, limit_wd, #3 + /* Start of performance-critical section -- one 64B cache line. */ +L(loop_aligned): + ldr data1, [src1], #8 + ldr data2, [src2], #8 +L(start_realigned): + subs limit_wd, limit_wd, #1 + eor diff, data1, data2 /* Non-zero if differences found. */ + csinv endloop, diff, xzr, ne /* Last Dword or differences. */ + cbz endloop, L(loop_aligned) + /* End of performance-critical section -- one 64B cache line. */ + + /* Not reached the limit, must have found a diff. */ + cbnz limit_wd, L(not_limit) + + /* Limit % 8 == 0 => all bytes significant. */ + ands limit, limit, #7 + b.eq L(not_limit) + + lsl limit, limit, #3 /* Bits -> bytes. */ + mov mask, #~0 +#ifdef __AARCH64EB__ + lsr mask, mask, limit +#else + lsl mask, mask, limit +#endif + bic data1, data1, mask + bic data2, data2, mask + + orr diff, diff, mask +L(not_limit): + +#ifndef __AARCH64EB__ + rev diff, diff + rev data1, data1 + rev data2, data2 +#endif + /* The MS-non-zero bit of DIFF marks either the first bit + that is different, or the end of the significant data. + Shifting left now will bring the critical information into the + top bits. */ + clz pos, diff + lsl data1, data1, pos + lsl data2, data2, pos + /* But we need to zero-extend (char is unsigned) the value and then + perform a signed 32-bit subtraction. */ + lsr data1, data1, #56 + sub result, data1, data2, lsr #56 + RET + +L(mutual_align): + /* Sources are mutually aligned, but are not currently at an + alignment boundary. Round down the addresses and then mask off + the bytes that precede the start point. */ + bic src1, src1, #7 + bic src2, src2, #7 + add limit, limit, tmp1 /* Adjust the limit for the extra. */ + lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ + ldr data1, [src1], #8 + neg tmp1, tmp1 /* Bits to alignment -64. */ + ldr data2, [src2], #8 + mov tmp2, #~0 +#ifdef __AARCH64EB__ + /* Big-endian. Early bytes are at MSB. */ + lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ +#else + /* Little-endian. Early bytes are at LSB. */ + lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ +#endif + add limit_wd, limit, #7 + orr data1, data1, tmp2 + orr data2, data2, tmp2 + lsr limit_wd, limit_wd, #3 + b L(start_realigned) + +L(ret0): + mov result, #0 + RET + + .p2align 6 +L(misaligned8): + sub limit, limit, #1 +1: + /* Perhaps we can do better than this. */ + ldrb data1w, [src1], #1 + ldrb data2w, [src2], #1 + subs limit, limit, #1 + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + b.eq 1b + sub result, data1, data2 + RET +END (memcmp) +#undef bcmp +weak_alias (memcmp, bcmp) +libc_hidden_builtin_def (memcmp) diff --git a/REORG.TODO/sysdeps/aarch64/memcpy.S b/REORG.TODO/sysdeps/aarch64/memcpy.S new file mode 100644 index 0000000000..88a3b90e16 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/memcpy.S @@ -0,0 +1,267 @@ +/* Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses. + * + */ + +#define dstin x0 +#define src x1 +#define count x2 +#define dst x3 +#define srcend x4 +#define dstend x5 +#define A_l x6 +#define A_lw w6 +#define A_h x7 +#define A_hw w7 +#define B_l x8 +#define B_lw w8 +#define B_h x9 +#define C_l x10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l src +#define E_h count +#define F_l srcend +#define F_h dst +#define G_l count +#define G_h dst +#define tmp1 x14 + +/* Copies are split into 3 main cases: small copies of up to 16 bytes, + medium copies of 17..96 bytes which are fully unrolled. Large copies + of more than 96 bytes align the destination and use an unrolled loop + processing 64 bytes per iteration. + In order to share code with memmove, small and medium copies read all + data before writing, allowing any kind of overlap. So small, medium + and large backwards memmoves are handled by falling through into memcpy. + Overlapping large forward memmoves use a loop that copies backwards. +*/ + +#ifndef MEMMOVE +# define MEMMOVE memmove +#endif +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +ENTRY_ALIGN (MEMMOVE, 6) + + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) + + sub tmp1, dstin, src + cmp count, 96 + ccmp tmp1, count, 2, hi + b.lo L(move_long) + + /* Common case falls through into memcpy. */ +END (MEMMOVE) +libc_hidden_builtin_def (MEMMOVE) +ENTRY (MEMCPY) + + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) + + prfm PLDL1KEEP, [src] + add srcend, src, count + add dstend, dstin, count + cmp count, 16 + b.ls L(copy16) + cmp count, 96 + b.hi L(copy_long) + + /* Medium copies: 17..96 bytes. */ + sub tmp1, count, 1 + ldp A_l, A_h, [src] + tbnz tmp1, 6, L(copy96) + ldp D_l, D_h, [srcend, -16] + tbz tmp1, 5, 1f + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [srcend, -32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstend, -32] +1: + stp A_l, A_h, [dstin] + stp D_l, D_h, [dstend, -16] + ret + + .p2align 4 + /* Small copies: 0..16 bytes. */ +L(copy16): + cmp count, 8 + b.lo 1f + ldr A_l, [src] + ldr A_h, [srcend, -8] + str A_l, [dstin] + str A_h, [dstend, -8] + ret + .p2align 4 +1: + tbz count, 2, 1f + ldr A_lw, [src] + ldr A_hw, [srcend, -4] + str A_lw, [dstin] + str A_hw, [dstend, -4] + ret + + /* Copy 0..3 bytes. Use a branchless sequence that copies the same + byte 3 times if count==1, or the 2nd byte twice if count==2. */ +1: + cbz count, 2f + lsr tmp1, count, 1 + ldrb A_lw, [src] + ldrb A_hw, [srcend, -1] + ldrb B_lw, [src, tmp1] + strb A_lw, [dstin] + strb B_lw, [dstin, tmp1] + strb A_hw, [dstend, -1] +2: ret + + .p2align 4 + /* Copy 64..96 bytes. Copy 64 bytes from the start and + 32 bytes from the end. */ +L(copy96): + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [src, 32] + ldp D_l, D_h, [src, 48] + ldp E_l, E_h, [srcend, -32] + ldp F_l, F_h, [srcend, -16] + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin, 32] + stp D_l, D_h, [dstin, 48] + stp E_l, E_h, [dstend, -32] + stp F_l, F_h, [dstend, -16] + ret + + /* Align DST to 16 byte alignment so that we don't cross cache line + boundaries on both loads and stores. There are at least 96 bytes + to copy, so copy 16 bytes unaligned and then align. The loop + copies 64 bytes per iteration and prefetches one iteration ahead. */ + + .p2align 4 +L(copy_long): + and tmp1, dstin, 15 + bic dst, dstin, 15 + ldp D_l, D_h, [src] + sub src, src, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_l, A_h, [src, 16] + stp D_l, D_h, [dstin] + ldp B_l, B_h, [src, 32] + ldp C_l, C_h, [src, 48] + ldp D_l, D_h, [src, 64]! + subs count, count, 128 + 16 /* Test and readjust count. */ + b.ls L(last64) +L(loop64): + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [src, 16] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [src, 32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [src, 48] + stp D_l, D_h, [dst, 64]! + ldp D_l, D_h, [src, 64]! + subs count, count, 64 + b.hi L(loop64) + + /* Write the last full set of 64 bytes. The remainder is at most 64 + bytes, so it is safe to always copy 64 bytes from the end even if + there is just 1 byte left. */ +L(last64): + ldp E_l, E_h, [srcend, -64] + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [srcend, -48] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [srcend, -16] + stp D_l, D_h, [dst, 64] + stp E_l, E_h, [dstend, -64] + stp A_l, A_h, [dstend, -48] + stp B_l, B_h, [dstend, -32] + stp C_l, C_h, [dstend, -16] + ret + + .p2align 4 +L(move_long): + cbz tmp1, 3f + + add srcend, src, count + add dstend, dstin, count + + /* Align dstend to 16 byte alignment so that we don't cross cache line + boundaries on both loads and stores. There are at least 96 bytes + to copy, so copy 16 bytes unaligned and then align. The loop + copies 64 bytes per iteration and prefetches one iteration ahead. */ + + and tmp1, dstend, 15 + ldp D_l, D_h, [srcend, -16] + sub srcend, srcend, tmp1 + sub count, count, tmp1 + ldp A_l, A_h, [srcend, -16] + stp D_l, D_h, [dstend, -16] + ldp B_l, B_h, [srcend, -32] + ldp C_l, C_h, [srcend, -48] + ldp D_l, D_h, [srcend, -64]! + sub dstend, dstend, tmp1 + subs count, count, 128 + b.ls 2f + + nop +1: + stp A_l, A_h, [dstend, -16] + ldp A_l, A_h, [srcend, -16] + stp B_l, B_h, [dstend, -32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dstend, -48] + ldp C_l, C_h, [srcend, -48] + stp D_l, D_h, [dstend, -64]! + ldp D_l, D_h, [srcend, -64]! + subs count, count, 64 + b.hi 1b + + /* Write the last full set of 64 bytes. The remainder is at most 64 + bytes, so it is safe to always copy 64 bytes from the start even if + there is just 1 byte left. */ +2: + ldp G_l, G_h, [src, 48] + stp A_l, A_h, [dstend, -16] + ldp A_l, A_h, [src, 32] + stp B_l, B_h, [dstend, -32] + ldp B_l, B_h, [src, 16] + stp C_l, C_h, [dstend, -48] + ldp C_l, C_h, [src] + stp D_l, D_h, [dstend, -64] + stp G_l, G_h, [dstin, 48] + stp A_l, A_h, [dstin, 32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin] +3: ret + +END (MEMCPY) +libc_hidden_builtin_def (MEMCPY) diff --git a/REORG.TODO/sysdeps/aarch64/memmove.S b/REORG.TODO/sysdeps/aarch64/memmove.S new file mode 100644 index 0000000000..0feeac8414 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/memmove.S @@ -0,0 +1 @@ +/* memmove is part of memcpy.S. */ diff --git a/REORG.TODO/sysdeps/aarch64/memset.S b/REORG.TODO/sysdeps/aarch64/memset.S new file mode 100644 index 0000000000..110fd22781 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/memset.S @@ -0,0 +1,195 @@ +/* Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses + * + */ + +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define tmp1 x5 +#define tmp1w w5 +#define tmp2 x6 +#define tmp2w w6 +#define zva_len x7 +#define zva_lenw w7 + +ENTRY_ALIGN (__memset, 6) + + DELOUSE (0) + DELOUSE (2) + + dup v0.16B, valw + add dstend, dstin, count + + cmp count, 96 + b.hi L(set_long) + cmp count, 16 + b.hs L(set_medium) + mov val, v0.D[0] + + /* Set 0..15 bytes. */ + tbz count, 3, 1f + str val, [dstin] + str val, [dstend, -8] + ret + nop +1: tbz count, 2, 2f + str valw, [dstin] + str valw, [dstend, -4] + ret +2: cbz count, 3f + strb valw, [dstin] + tbz count, 1, 3f + strh valw, [dstend, -2] +3: ret + + /* Set 17..96 bytes. */ +L(set_medium): + str q0, [dstin] + tbnz count, 6, L(set96) + str q0, [dstend, -16] + tbz count, 5, 1f + str q0, [dstin, 16] + str q0, [dstend, -32] +1: ret + + .p2align 4 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +L(set96): + str q0, [dstin, 16] + stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -32] + ret + + .p2align 3 + nop +L(set_long): + and valw, valw, 255 + bic dst, dstin, 15 + str q0, [dstin] + cmp count, 256 + ccmp valw, 0, 0, cs + b.eq L(try_zva) +L(no_zva): + sub count, dstend, dst /* Count is 16 too large. */ + add dst, dst, 16 + sub count, count, 64 + 16 /* Adjust count and bias for loop. */ +1: stp q0, q0, [dst], 64 + stp q0, q0, [dst, -32] +L(tail64): + subs count, count, 64 + b.hi 1b +2: stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + + .p2align 3 +L(try_zva): + mrs tmp1, dczid_el0 + tbnz tmp1w, 4, L(no_zva) + and tmp1w, tmp1w, 15 + cmp tmp1w, 4 /* ZVA size is 64 bytes. */ + b.ne L(zva_128) + + /* Write the first and last 64 byte aligned block using stp rather + than using DC ZVA. This is faster on some cores. + */ +L(zva_64): + str q0, [dst, 16] + stp q0, q0, [dst, 32] + bic dst, dst, 63 + stp q0, q0, [dst, 64] + stp q0, q0, [dst, 96] + sub count, dstend, dst /* Count is now 128 too large. */ + sub count, count, 128+64+64 /* Adjust count and bias for loop. */ + add dst, dst, 128 + nop +1: dc zva, dst + add dst, dst, 64 + subs count, count, 64 + b.hi 1b + stp q0, q0, [dst, 0] + stp q0, q0, [dst, 32] + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + + .p2align 3 +L(zva_128): + cmp tmp1w, 5 /* ZVA size is 128 bytes. */ + b.ne L(zva_other) + + str q0, [dst, 16] + stp q0, q0, [dst, 32] + stp q0, q0, [dst, 64] + stp q0, q0, [dst, 96] + bic dst, dst, 127 + sub count, dstend, dst /* Count is now 128 too large. */ + sub count, count, 128+128 /* Adjust count and bias for loop. */ + add dst, dst, 128 +1: dc zva, dst + add dst, dst, 128 + subs count, count, 128 + b.hi 1b + stp q0, q0, [dstend, -128] + stp q0, q0, [dstend, -96] + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + +L(zva_other): + mov tmp2w, 4 + lsl zva_lenw, tmp2w, tmp1w + add tmp1, zva_len, 64 /* Max alignment bytes written. */ + cmp count, tmp1 + blo L(no_zva) + + sub tmp2, zva_len, 1 + add tmp1, dst, zva_len + add dst, dst, 16 + subs count, tmp1, dst /* Actual alignment bytes to write. */ + bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */ + beq 2f +1: stp q0, q0, [dst], 64 + stp q0, q0, [dst, -32] + subs count, count, 64 + b.hi 1b +2: mov dst, tmp1 + sub count, dstend, tmp1 /* Remaining bytes to write. */ + subs count, count, zva_len + b.lo 4f +3: dc zva, dst + add dst, dst, zva_len + subs count, count, zva_len + b.hs 3b +4: add count, count, zva_len + b L(tail64) + +END (__memset) +weak_alias (__memset, memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/aarch64/memusage.h b/REORG.TODO/sysdeps/aarch64/memusage.h new file mode 100644 index 0000000000..908daab462 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/memusage.h @@ -0,0 +1,21 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define GETSP() ({ register uintptr_t stack_ptr asm ("sp"); stack_ptr; }) + +#include <sysdeps/generic/memusage.h> diff --git a/REORG.TODO/sysdeps/aarch64/multiarch/Makefile b/REORG.TODO/sysdeps/aarch64/multiarch/Makefile new file mode 100644 index 0000000000..78d52c717d --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/multiarch/Makefile @@ -0,0 +1,3 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy_generic memcpy_thunderx +endif diff --git a/REORG.TODO/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/REORG.TODO/sysdeps/aarch64/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..c4f23dfb87 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -0,0 +1,51 @@ +/* Enumerate available IFUNC implementations of a function. AARCH64 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ldsodefs.h> +#include <ifunc-impl-list.h> +#include <init-arch.h> +#include <stdio.h> + +/* Maximum number of IFUNC implementations. */ +#define MAX_IFUNC 2 + +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + assert (max >= MAX_IFUNC); + + size_t i = 0; + + INIT_ARCH (); + + /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */ + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, IS_THUNDERX (midr), + __memcpy_thunderx) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic)) + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, IS_THUNDERX (midr), + __memmove_thunderx) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) + + return i; +} diff --git a/REORG.TODO/sysdeps/aarch64/multiarch/init-arch.h b/REORG.TODO/sysdeps/aarch64/multiarch/init-arch.h new file mode 100644 index 0000000000..3af442c538 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/multiarch/init-arch.h @@ -0,0 +1,23 @@ +/* Define INIT_ARCH so that midr is initialized before use by IFUNCs. + This file is part of the GNU C Library. + Copyright (C) 2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <ldsodefs.h> + +#define INIT_ARCH() \ + uint64_t __attribute__((unused)) midr = \ + GLRO(dl_aarch64_cpu_features).midr_el1; diff --git a/REORG.TODO/sysdeps/aarch64/multiarch/memcpy.c b/REORG.TODO/sysdeps/aarch64/multiarch/memcpy.c new file mode 100644 index 0000000000..9f73efbba7 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/multiarch/memcpy.c @@ -0,0 +1,39 @@ +/* Multiple versions of memcpy. AARCH64 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in libc. */ + +#if IS_IN (libc) +/* Redefine memcpy so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memcpy +# define memcpy __redirect_memcpy +# include <string.h> +# include <init-arch.h> + +extern __typeof (__redirect_memcpy) __libc_memcpy; + +extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden; + +libc_ifunc (__libc_memcpy, + IS_THUNDERX (midr) ? __memcpy_thunderx : __memcpy_generic); + +# undef memcpy +strong_alias (__libc_memcpy, memcpy); +#endif diff --git a/REORG.TODO/sysdeps/aarch64/multiarch/memcpy_generic.S b/REORG.TODO/sysdeps/aarch64/multiarch/memcpy_generic.S new file mode 100644 index 0000000000..041a77943d --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/multiarch/memcpy_generic.S @@ -0,0 +1,42 @@ +/* A Generic Optimized memcpy implementation for AARCH64. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The actual memcpy and memmove code is in ../memcpy.S. If we are + building libc this file defines __memcpy_generic and __memmove_generic. + Otherwise the include of ../memcpy.S will define the normal __memcpy + and__memmove entry points. */ + +#include <sysdep.h> + +#if IS_IN (libc) + +# define MEMCPY __memcpy_generic +# define MEMMOVE __memmove_generic + +/* Do not hide the generic versions of memcpy and memmove, we use them + internally. */ +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) + +/* It doesn't make sense to send libc-internal memcpy calls through a PLT. */ + .globl __GI_memcpy; __GI_memcpy = __memcpy_generic + .globl __GI_memmove; __GI_memmove = __memmove_generic + +#endif + +#include "../memcpy.S" diff --git a/REORG.TODO/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/REORG.TODO/sysdeps/aarch64/multiarch/memcpy_thunderx.S new file mode 100644 index 0000000000..5ac9e341bb --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/multiarch/memcpy_thunderx.S @@ -0,0 +1,326 @@ +/* A Thunderx Optimized memcpy implementation for AARCH64. + Copyright (C) 2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The actual code in this memcpy and memmove should be identical to the + generic version except for the code under '#ifdef THUNDERX'. This is + to make is easier to keep this version and the generic version in sync + for changes that are not specific to thunderx. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses. + * + */ + +#define dstin x0 +#define src x1 +#define count x2 +#define dst x3 +#define srcend x4 +#define dstend x5 +#define A_l x6 +#define A_lw w6 +#define A_h x7 +#define A_hw w7 +#define B_l x8 +#define B_lw w8 +#define B_h x9 +#define C_l x10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l src +#define E_h count +#define F_l srcend +#define F_h dst +#define G_l count +#define G_h dst +#define tmp1 x14 + +/* Copies are split into 3 main cases: small copies of up to 16 bytes, + medium copies of 17..96 bytes which are fully unrolled. Large copies + of more than 96 bytes align the destination and use an unrolled loop + processing 64 bytes per iteration. + In order to share code with memmove, small and medium copies read all + data before writing, allowing any kind of overlap. So small, medium + and large backwards memmoves are handled by falling through into memcpy. + Overlapping large forward memmoves use a loop that copies backwards. +*/ + +#ifndef MEMMOVE +# define MEMMOVE memmove +#endif +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#if IS_IN (libc) + +# undef MEMCPY +# define MEMCPY __memcpy_thunderx +# undef MEMMOVE +# define MEMMOVE __memmove_thunderx +# define USE_THUNDERX + +ENTRY_ALIGN (MEMMOVE, 6) + + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) + + sub tmp1, dstin, src + cmp count, 96 + ccmp tmp1, count, 2, hi + b.lo L(move_long) + + /* Common case falls through into memcpy. */ +END (MEMMOVE) +libc_hidden_builtin_def (MEMMOVE) +ENTRY (MEMCPY) + + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) + + prfm PLDL1KEEP, [src] + add srcend, src, count + add dstend, dstin, count + cmp count, 16 + b.ls L(copy16) + cmp count, 96 + b.hi L(copy_long) + + /* Medium copies: 17..96 bytes. */ + sub tmp1, count, 1 + ldp A_l, A_h, [src] + tbnz tmp1, 6, L(copy96) + ldp D_l, D_h, [srcend, -16] + tbz tmp1, 5, 1f + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [srcend, -32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstend, -32] +1: + stp A_l, A_h, [dstin] + stp D_l, D_h, [dstend, -16] + ret + + .p2align 4 + /* Small copies: 0..16 bytes. */ +L(copy16): + cmp count, 8 + b.lo 1f + ldr A_l, [src] + ldr A_h, [srcend, -8] + str A_l, [dstin] + str A_h, [dstend, -8] + ret + .p2align 4 +1: + tbz count, 2, 1f + ldr A_lw, [src] + ldr A_hw, [srcend, -4] + str A_lw, [dstin] + str A_hw, [dstend, -4] + ret + + /* Copy 0..3 bytes. Use a branchless sequence that copies the same + byte 3 times if count==1, or the 2nd byte twice if count==2. */ +1: + cbz count, 2f + lsr tmp1, count, 1 + ldrb A_lw, [src] + ldrb A_hw, [srcend, -1] + ldrb B_lw, [src, tmp1] + strb A_lw, [dstin] + strb B_lw, [dstin, tmp1] + strb A_hw, [dstend, -1] +2: ret + + .p2align 4 + /* Copy 64..96 bytes. Copy 64 bytes from the start and + 32 bytes from the end. */ +L(copy96): + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [src, 32] + ldp D_l, D_h, [src, 48] + ldp E_l, E_h, [srcend, -32] + ldp F_l, F_h, [srcend, -16] + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin, 32] + stp D_l, D_h, [dstin, 48] + stp E_l, E_h, [dstend, -32] + stp F_l, F_h, [dstend, -16] + ret + + /* Align DST to 16 byte alignment so that we don't cross cache line + boundaries on both loads and stores. There are at least 96 bytes + to copy, so copy 16 bytes unaligned and then align. The loop + copies 64 bytes per iteration and prefetches one iteration ahead. */ + + .p2align 4 +L(copy_long): + +# ifdef USE_THUNDERX + + /* On thunderx, large memcpy's are helped by software prefetching. + This loop is identical to the one below it but with prefetching + instructions included. For loops that are less than 32768 bytes, + the prefetching does not help and slow the code down so we only + use the prefetching loop for the largest memcpys. */ + + cmp count, #32768 + b.lo L(copy_long_without_prefetch) + and tmp1, dstin, 15 + bic dst, dstin, 15 + ldp D_l, D_h, [src] + sub src, src, tmp1 + prfm pldl1strm, [src, 384] + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_l, A_h, [src, 16] + stp D_l, D_h, [dstin] + ldp B_l, B_h, [src, 32] + ldp C_l, C_h, [src, 48] + ldp D_l, D_h, [src, 64]! + subs count, count, 128 + 16 /* Test and readjust count. */ + +L(prefetch_loop64): + tbz src, #6, 1f + prfm pldl1strm, [src, 512] +1: + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [src, 16] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [src, 32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [src, 48] + stp D_l, D_h, [dst, 64]! + ldp D_l, D_h, [src, 64]! + subs count, count, 64 + b.hi L(prefetch_loop64) + b L(last64) + +L(copy_long_without_prefetch): +# endif + + and tmp1, dstin, 15 + bic dst, dstin, 15 + ldp D_l, D_h, [src] + sub src, src, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_l, A_h, [src, 16] + stp D_l, D_h, [dstin] + ldp B_l, B_h, [src, 32] + ldp C_l, C_h, [src, 48] + ldp D_l, D_h, [src, 64]! + subs count, count, 128 + 16 /* Test and readjust count. */ + b.ls L(last64) +L(loop64): + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [src, 16] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [src, 32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [src, 48] + stp D_l, D_h, [dst, 64]! + ldp D_l, D_h, [src, 64]! + subs count, count, 64 + b.hi L(loop64) + + /* Write the last full set of 64 bytes. The remainder is at most 64 + bytes, so it is safe to always copy 64 bytes from the end even if + there is just 1 byte left. */ +L(last64): + ldp E_l, E_h, [srcend, -64] + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [srcend, -48] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [srcend, -16] + stp D_l, D_h, [dst, 64] + stp E_l, E_h, [dstend, -64] + stp A_l, A_h, [dstend, -48] + stp B_l, B_h, [dstend, -32] + stp C_l, C_h, [dstend, -16] + ret + + .p2align 4 +L(move_long): + cbz tmp1, 3f + + add srcend, src, count + add dstend, dstin, count + + /* Align dstend to 16 byte alignment so that we don't cross cache line + boundaries on both loads and stores. There are at least 96 bytes + to copy, so copy 16 bytes unaligned and then align. The loop + copies 64 bytes per iteration and prefetches one iteration ahead. */ + + and tmp1, dstend, 15 + ldp D_l, D_h, [srcend, -16] + sub srcend, srcend, tmp1 + sub count, count, tmp1 + ldp A_l, A_h, [srcend, -16] + stp D_l, D_h, [dstend, -16] + ldp B_l, B_h, [srcend, -32] + ldp C_l, C_h, [srcend, -48] + ldp D_l, D_h, [srcend, -64]! + sub dstend, dstend, tmp1 + subs count, count, 128 + b.ls 2f + + nop +1: + stp A_l, A_h, [dstend, -16] + ldp A_l, A_h, [srcend, -16] + stp B_l, B_h, [dstend, -32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dstend, -48] + ldp C_l, C_h, [srcend, -48] + stp D_l, D_h, [dstend, -64]! + ldp D_l, D_h, [srcend, -64]! + subs count, count, 64 + b.hi 1b + + /* Write the last full set of 64 bytes. The remainder is at most 64 + bytes, so it is safe to always copy 64 bytes from the start even if + there is just 1 byte left. */ +2: + ldp G_l, G_h, [src, 48] + stp A_l, A_h, [dstend, -16] + ldp A_l, A_h, [src, 32] + stp B_l, B_h, [dstend, -32] + ldp B_l, B_h, [src, 16] + stp C_l, C_h, [dstend, -48] + ldp C_l, C_h, [src] + stp D_l, D_h, [dstend, -64] + stp G_l, G_h, [dstin, 48] + stp A_l, A_h, [dstin, 32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin] +3: ret + +END (MEMCPY) +libc_hidden_builtin_def (MEMCPY) + +#endif diff --git a/REORG.TODO/sysdeps/aarch64/multiarch/memmove.c b/REORG.TODO/sysdeps/aarch64/multiarch/memmove.c new file mode 100644 index 0000000000..34c6b29bd5 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/multiarch/memmove.c @@ -0,0 +1,39 @@ +/* Multiple versions of memmove. AARCH64 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in libc. */ + +#if IS_IN (libc) +/* Redefine memmove so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memmove +# define memmove __redirect_memmove +# include <string.h> +# include <init-arch.h> + +extern __typeof (__redirect_memmove) __libc_memmove; + +extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden; +extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden; + +libc_ifunc (__libc_memmove, + IS_THUNDERX (midr) ? __memmove_thunderx : __memmove_generic); + +# undef memmove +strong_alias (__libc_memmove, memmove); +#endif diff --git a/REORG.TODO/sysdeps/aarch64/nptl/Makefile b/REORG.TODO/sysdeps/aarch64/nptl/Makefile new file mode 100644 index 0000000000..3627748ad8 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/nptl/Makefile @@ -0,0 +1,21 @@ +# Copyright (C) 2005-2017 Free Software Foundation, Inc. +# +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library. If not, see +# <http://www.gnu.org/licenses/>. + +ifeq ($(subdir),csu) +gen-as-const-headers += tcb-offsets.sym +endif diff --git a/REORG.TODO/sysdeps/aarch64/nptl/bits/pthreadtypes-arch.h b/REORG.TODO/sysdeps/aarch64/nptl/bits/pthreadtypes-arch.h new file mode 100644 index 0000000000..d13a75db07 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/nptl/bits/pthreadtypes-arch.h @@ -0,0 +1,69 @@ +/* Copyright (C) 2002-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_PTHREADTYPES_ARCH_H +#define _BITS_PTHREADTYPES_ARCH_H 1 + +#include <endian.h> + +#ifdef __ILP32__ +# define __SIZEOF_PTHREAD_ATTR_T 32 +# define __SIZEOF_PTHREAD_MUTEX_T 32 +# define __SIZEOF_PTHREAD_MUTEXATTR_T 4 +# define __SIZEOF_PTHREAD_CONDATTR_T 4 +# define __SIZEOF_PTHREAD_RWLOCK_T 48 +# define __SIZEOF_PTHREAD_BARRIER_T 20 +# define __SIZEOF_PTHREAD_BARRIERATTR_T 4 +#else +# define __SIZEOF_PTHREAD_ATTR_T 64 +# define __SIZEOF_PTHREAD_MUTEX_T 48 +# define __SIZEOF_PTHREAD_MUTEXATTR_T 8 +# define __SIZEOF_PTHREAD_CONDATTR_T 8 +# define __SIZEOF_PTHREAD_RWLOCK_T 56 +# define __SIZEOF_PTHREAD_BARRIER_T 32 +# define __SIZEOF_PTHREAD_BARRIERATTR_T 8 +#endif +#define __SIZEOF_PTHREAD_COND_T 48 +#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 + +/* Definitions for internal mutex struct. */ +#define __PTHREAD_COMPAT_PADDING_MID +#define __PTHREAD_COMPAT_PADDING_END +#define __PTHREAD_MUTEX_LOCK_ELISION 0 + +#define __LOCK_ALIGNMENT +#define __ONCE_ALIGNMENT + +struct __pthread_rwlock_arch_t +{ + unsigned int __readers; + unsigned int __writers; + unsigned int __wrphase_futex; + unsigned int __writers_futex; + unsigned int __pad3; + unsigned int __pad4; + int __cur_writer; + int __shared; + unsigned long int __pad1; + unsigned long int __pad2; + unsigned int __flags; +}; + +#define __PTHREAD_RWLOCK_ELISION_EXTRA 0 + +#endif /* bits/pthreadtypes.h */ diff --git a/REORG.TODO/sysdeps/aarch64/nptl/bits/semaphore.h b/REORG.TODO/sysdeps/aarch64/nptl/bits/semaphore.h new file mode 100644 index 0000000000..0c6c05a6b5 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/nptl/bits/semaphore.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2002-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SEMAPHORE_H +# error "Never use <bits/semaphore.h> directly; include <semaphore.h> instead." +#endif + + +#ifdef __ILP32__ +# define __SIZEOF_SEM_T 16 +#else +# define __SIZEOF_SEM_T 32 +#endif + + +/* Value returned if `sem_open' failed. */ +#define SEM_FAILED ((sem_t *) 0) + + +typedef union +{ + char __size[__SIZEOF_SEM_T]; + long long int __align; +} sem_t; diff --git a/REORG.TODO/sysdeps/aarch64/nptl/pthreaddef.h b/REORG.TODO/sysdeps/aarch64/nptl/pthreaddef.h new file mode 100644 index 0000000000..d0411a57a1 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/nptl/pthreaddef.h @@ -0,0 +1,32 @@ +/* Copyright (C) 2002-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Default stack size. */ +#define ARCH_STACK_DEFAULT_SIZE (2 * 1024 * 1024) + +/* Required stack pointer alignment at beginning. */ +#define STACK_ALIGN 16 + +/* Minimal stack size after allocating thread descriptor and guard size. */ +#define MINIMAL_REST_STACK 2048 + +/* Alignment requirement for TCB. */ +#define TCB_ALIGNMENT 16 + +/* Location of current stack frame. */ +#define CURRENT_STACK_FRAME __builtin_frame_address (0) diff --git a/REORG.TODO/sysdeps/aarch64/nptl/tcb-offsets.sym b/REORG.TODO/sysdeps/aarch64/nptl/tcb-offsets.sym new file mode 100644 index 0000000000..238647dd47 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/nptl/tcb-offsets.sym @@ -0,0 +1,6 @@ +#include <sysdep.h> +#include <tls.h> + +PTHREAD_MULTIPLE_THREADS_OFFSET offsetof (struct pthread, header.multiple_threads) +PTHREAD_TID_OFFSET offsetof (struct pthread, tid) +PTHREAD_SIZEOF sizeof (struct pthread) diff --git a/REORG.TODO/sysdeps/aarch64/nptl/tls.h b/REORG.TODO/sysdeps/aarch64/nptl/tls.h new file mode 100644 index 0000000000..175df39853 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/nptl/tls.h @@ -0,0 +1,136 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _TLS_H +#define _TLS_H 1 + +#include <dl-sysdep.h> + +#ifndef __ASSEMBLER__ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <dl-dtv.h> + +#else /* __ASSEMBLER__ */ +# include <tcb-offsets.h> +#endif /* __ASSEMBLER__ */ + +#ifndef __ASSEMBLER__ + +/* Get system call information. */ +# include <sysdep.h> + +/* The TP points to the start of the thread blocks. */ +# define TLS_DTV_AT_TP 1 +# define TLS_TCB_AT_TP 0 + +/* Get the thread descriptor definition. */ +# include <nptl/descr.h> + +typedef struct +{ + dtv_t *dtv; + void *private; +} tcbhead_t; + +/* This is the size of the initial TCB. */ +# define TLS_INIT_TCB_SIZE sizeof (tcbhead_t) + +/* Alignment requirements for the initial TCB. */ +# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread) + +/* This is the size of the TCB. */ +# define TLS_TCB_SIZE sizeof (tcbhead_t) + +/* This is the size we need before TCB. */ +# define TLS_PRE_TCB_SIZE sizeof (struct pthread) + +/* Alignment requirements for the TCB. */ +# define TLS_TCB_ALIGN __alignof__ (struct pthread) + +/* Install the dtv pointer. The pointer passed is to the element with + index -1 which contain the length. */ +# define INSTALL_DTV(tcbp, dtvp) \ + (((tcbhead_t *) (tcbp))->dtv = (dtvp) + 1) + +/* Install new dtv for current thread. */ +# define INSTALL_NEW_DTV(dtv) \ + (THREAD_DTV() = (dtv)) + +/* Return dtv of given thread descriptor. */ +# define GET_DTV(tcbp) \ + (((tcbhead_t *) (tcbp))->dtv) + +/* Code to initially initialize the thread pointer. This might need + special attention since 'errno' is not yet available and if the + operation can cause a failure 'errno' must not be touched. */ +# define TLS_INIT_TP(tcbp) \ + ({ __asm __volatile ("msr tpidr_el0, %0" : : "r" (tcbp)); NULL; }) + +/* Value passed to 'clone' for initialization of the thread register. */ +# define TLS_DEFINE_INIT_TP(tp, pd) void *tp = (pd) + 1 + +/* Return the address of the dtv for the current thread. */ +# define THREAD_DTV() \ + (((tcbhead_t *) __builtin_thread_pointer ())->dtv) + +/* Return the thread descriptor for the current thread. */ +# define THREAD_SELF \ + ((struct pthread *)__builtin_thread_pointer () - 1) + +/* Magic for libthread_db to know how to do THREAD_SELF. */ +# define DB_THREAD_SELF \ + CONST_THREAD_AREA (64, sizeof (struct pthread)) + +/* Access to data in the thread descriptor is easy. */ +# define THREAD_GETMEM(descr, member) \ + descr->member +# define THREAD_GETMEM_NC(descr, member, idx) \ + descr->member[idx] +# define THREAD_SETMEM(descr, member, value) \ + descr->member = (value) +# define THREAD_SETMEM_NC(descr, member, idx, value) \ + descr->member[idx] = (value) + +/* Get and set the global scope generation counter in struct pthread. */ +# define THREAD_GSCOPE_FLAG_UNUSED 0 +# define THREAD_GSCOPE_FLAG_USED 1 +# define THREAD_GSCOPE_FLAG_WAIT 2 +# define THREAD_GSCOPE_RESET_FLAG() \ + do \ + { int __res \ + = atomic_exchange_rel (&THREAD_SELF->header.gscope_flag, \ + THREAD_GSCOPE_FLAG_UNUSED); \ + if (__res == THREAD_GSCOPE_FLAG_WAIT) \ + lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \ + } \ + while (0) +# define THREAD_GSCOPE_SET_FLAG() \ + do \ + { \ + THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED; \ + atomic_write_barrier (); \ + } \ + while (0) +# define THREAD_GSCOPE_WAIT() \ + GL(dl_wait_lookup_done) () + +# endif /* __ASSEMBLER__ */ + +#endif /* tls.h */ diff --git a/REORG.TODO/sysdeps/aarch64/preconfigure b/REORG.TODO/sysdeps/aarch64/preconfigure new file mode 100644 index 0000000000..d9bd1f8558 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/preconfigure @@ -0,0 +1,6 @@ +case "$machine" in +aarch64*) + base_machine=aarch64 + machine=aarch64 + ;; +esac diff --git a/REORG.TODO/sysdeps/aarch64/rawmemchr.S b/REORG.TODO/sysdeps/aarch64/rawmemchr.S new file mode 100644 index 0000000000..a6e1753ddf --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/rawmemchr.S @@ -0,0 +1,42 @@ +/* rawmemchr - find a character in a memory zone + + Copyright (C) 2015-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Special case rawmemchr (s, 0) as strlen, otherwise tailcall memchr. + Call strlen without setting up a full frame - it preserves x14/x15. +*/ + +ENTRY (__rawmemchr) + cbz w1, L(do_strlen) + mov x2, -1 + b __memchr + +L(do_strlen): + mov x15, x30 + cfi_return_column (x15) + mov x14, x0 + bl __strlen + add x0, x14, x0 + ret x15 + +END (__rawmemchr) +weak_alias (__rawmemchr, rawmemchr) +libc_hidden_builtin_def (__rawmemchr) diff --git a/REORG.TODO/sysdeps/aarch64/setjmp.S b/REORG.TODO/sysdeps/aarch64/setjmp.S new file mode 100644 index 0000000000..adbf21083a --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/setjmp.S @@ -0,0 +1,75 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <jmpbuf-offsets.h> +#include <stap-probe.h> + + /* Keep traditional entry points in with sigsetjmp(). */ +ENTRY (setjmp) + mov x1, #1 + b 1f +END (setjmp) + +ENTRY (_setjmp) + mov x1, #0 + b 1f +END (_setjmp) +libc_hidden_def (_setjmp) + +ENTRY (__sigsetjmp) + DELOUSE (0) + +1: + stp x19, x20, [x0, #JB_X19<<3] + stp x21, x22, [x0, #JB_X21<<3] + stp x23, x24, [x0, #JB_X23<<3] + stp x25, x26, [x0, #JB_X25<<3] + stp x27, x28, [x0, #JB_X27<<3] + +#ifdef PTR_MANGLE + PTR_MANGLE (4, 30, 3, 2) + stp x29, x4, [x0, #JB_X29<<3] +#else + stp x29, x30, [x0, #JB_X29<<3] +#endif + /* setjmp probe takes 3 arguments, address of jump buffer + first argument (8@x0), return value second argument (-4@x1), + and target address (8@x30), respectively. */ + LIBC_PROBE (setjmp, 3, 8@x0, -4@x1, 8@x30) + stp d8, d9, [x0, #JB_D8<<3] + stp d10, d11, [x0, #JB_D10<<3] + stp d12, d13, [x0, #JB_D12<<3] + stp d14, d15, [x0, #JB_D14<<3] +#ifdef PTR_MANGLE + mov x4, sp + PTR_MANGLE (5, 4, 3, 2) + str x5, [x0, #JB_SP<<3] +#else + mov x2, sp + str x2, [x0, #JB_SP<<3] +#endif +#if IS_IN (rtld) + /* In ld.so we never save the signal mask */ + mov w0, #0 + RET +#else + b C_SYMBOL_NAME(__sigjmp_save) +#endif +END (__sigsetjmp) +hidden_def (__sigsetjmp) diff --git a/REORG.TODO/sysdeps/aarch64/soft-fp/Makefile b/REORG.TODO/sysdeps/aarch64/soft-fp/Makefile new file mode 100644 index 0000000000..ada13e8b70 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/soft-fp/Makefile @@ -0,0 +1,3 @@ +ifeq ($(subdir),math) +CPPFLAGS += -I../soft-fp +endif diff --git a/REORG.TODO/sysdeps/aarch64/soft-fp/e_sqrtl.c b/REORG.TODO/sysdeps/aarch64/soft-fp/e_sqrtl.c new file mode 100644 index 0000000000..e80a5b0d5b --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/soft-fp/e_sqrtl.c @@ -0,0 +1,39 @@ +/* long double square root in software floating-point emulation. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <soft-fp.h> +#include <quad.h> + +long double +__ieee754_sqrtl (const long double a) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(C); + long double c; + + FP_INIT_ROUNDMODE; + FP_UNPACK_Q(A, a); + FP_SQRT_Q(C, A); + FP_PACK_Q(c, C); + FP_HANDLE_EXCEPTIONS; + return c; +} +strong_alias (__ieee754_sqrtl, __sqrtl_finite) diff --git a/REORG.TODO/sysdeps/aarch64/soft-fp/sfp-machine.h b/REORG.TODO/sysdeps/aarch64/soft-fp/sfp-machine.h new file mode 100644 index 0000000000..3e969952fa --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/soft-fp/sfp-machine.h @@ -0,0 +1,120 @@ +#include <fenv.h> +#include <fpu_control.h> + +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long long +#define _FP_WS_TYPE signed long long +#define _FP_I_TYPE long long + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +/* From my experiments it seems X is chosen unless one of the + NaNs is sNaN, in which case the result is NANSIGN/NANFRAC. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) | \ + _FP_FRAC_HIGH_RAW_##fs(Y)) & _FP_QNANBIT_##fs) \ + { \ + R##_s = _FP_NANSIGN_##fs; \ + _FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define _FP_DECL_EX fpu_control_t _fcw + +#define FP_ROUNDMODE (_fcw & _FPU_FPCR_RM_MASK) + +#define FP_RND_NEAREST FE_TONEAREST +#define FP_RND_ZERO FE_TOWARDZERO +#define FP_RND_PINF FE_UPWARD +#define FP_RND_MINF FE_DOWNWARD + +#define FP_EX_INVALID FE_INVALID +#define FP_EX_OVERFLOW FE_OVERFLOW +#define FP_EX_UNDERFLOW FE_UNDERFLOW +#define FP_EX_DIVZERO FE_DIVBYZERO +#define FP_EX_INEXACT FE_INEXACT + +#define _FP_TININESS_AFTER_ROUNDING 0 + +#define FP_INIT_ROUNDMODE \ +do { \ + _FPU_GETCW (_fcw); \ +} while (0) + +#define FP_HANDLE_EXCEPTIONS \ + do { \ + const float fp_max = __FLT_MAX__; \ + const float fp_min = __FLT_MIN__; \ + const float fp_1e32 = 1.0e32f; \ + const float fp_zero = 0.0; \ + const float fp_one = 1.0; \ + unsigned fpsr; \ + if (_fex & FP_EX_INVALID) \ + { \ + __asm__ __volatile__ ("fdiv\ts0, %s0, %s0" \ + : \ + : "w" (fp_zero) \ + : "s0"); \ + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); \ + } \ + if (_fex & FP_EX_DIVZERO) \ + { \ + __asm__ __volatile__ ("fdiv\ts0, %s0, %s1" \ + : \ + : "w" (fp_one), "w" (fp_zero) \ + : "s0"); \ + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); \ + } \ + if (_fex & FP_EX_OVERFLOW) \ + { \ + __asm__ __volatile__ ("fadd\ts0, %s0, %s1" \ + : \ + : "w" (fp_max), "w" (fp_1e32) \ + : "s0"); \ + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); \ + } \ + if (_fex & FP_EX_UNDERFLOW) \ + { \ + __asm__ __volatile__ ("fmul\ts0, %s0, %s0" \ + : \ + : "w" (fp_min) \ + : "s0"); \ + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); \ + } \ + if (_fex & FP_EX_INEXACT) \ + { \ + __asm__ __volatile__ ("fsub\ts0, %s0, %s1" \ + : \ + : "w" (fp_max), "w" (fp_one) \ + : "s0"); \ + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); \ + } \ + } while (0) + +#define FP_TRAPPING_EXCEPTIONS ((_fcw >> FE_EXCEPT_SHIFT) & FE_ALL_EXCEPT) diff --git a/REORG.TODO/sysdeps/aarch64/sotruss-lib.c b/REORG.TODO/sysdeps/aarch64/sotruss-lib.c new file mode 100644 index 0000000000..adf039d6f4 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/sotruss-lib.c @@ -0,0 +1,51 @@ +/* Override generic sotruss-lib.c to define actual functions for AArch64. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define HAVE_ARCH_PLTENTER +#define HAVE_ARCH_PLTEXIT + +#include <elf/sotruss-lib.c> + +ElfW(Addr) +la_aarch64_gnu_pltenter (ElfW(Sym) *sym __attribute__ ((unused)), + unsigned int ndx __attribute__ ((unused)), + uintptr_t *refcook, uintptr_t *defcook, + La_aarch64_regs *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + print_enter (refcook, defcook, symname, + regs->lr_xreg[0], regs->lr_xreg[1], regs->lr_xreg[2], + *flags); + + /* No need to copy anything, we will not need the parameters in any case. */ + *framesizep = 0; + + return sym->st_value; +} + +unsigned int +la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, + const struct La_aarch64_regs *inregs, + struct La_aarch64_retval *outregs, const char *symname) +{ + print_exit (refcook, defcook, symname, outregs->lrv_xreg[0]); + + return 0; +} diff --git a/REORG.TODO/sysdeps/aarch64/stackinfo.h b/REORG.TODO/sysdeps/aarch64/stackinfo.h new file mode 100644 index 0000000000..ccff0c0986 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/stackinfo.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains a bit of information about the stack allocation + of the processor. */ + +#ifndef _STACKINFO_H +#define _STACKINFO_H 1 + +#include <elf.h> + +/* On AArch64 the stack grows down. */ +#define _STACK_GROWS_DOWN 1 + +/* Default to a non-executable stack. */ +#define DEFAULT_STACK_PERMS (PF_R|PF_W) + +#endif /* stackinfo.h */ diff --git a/REORG.TODO/sysdeps/aarch64/start.S b/REORG.TODO/sysdeps/aarch64/start.S new file mode 100644 index 0000000000..df1c642c12 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/start.S @@ -0,0 +1,94 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* This is the canonical entry point, usually the first thing in the text + segment. + + Note that the code in the .init section has already been run. + This includes _init and _libc_init + + + At this entry point, most registers' values are unspecified, except: + + x0/w0 Contains a function pointer to be registered with `atexit'. + This is how the dynamic linker arranges to have DT_FINI + functions called for shared libraries that have been loaded + before this code runs. + + sp The stack contains the arguments and environment: + 0(sp) argc + 8(sp) argv[0] + ... + (8*argc)(sp) NULL + (8*(argc+1))(sp) envp[0] + ... + NULL + */ + + .text + .globl _start + .type _start,#function +_start: + /* Create an initial frame with 0 LR and FP */ + mov x29, #0 + mov x30, #0 + + /* Setup rtld_fini in argument register */ + mov x5, x0 + + /* Load argc and a pointer to argv */ + ldr PTR_REG (1), [sp, #0] + add x2, sp, #PTR_SIZE + + /* Setup stack limit in argument register */ + mov x6, sp + +#ifdef SHARED + adrp x0, :got:main + ldr PTR_REG (0), [x0, #:got_lo12:main] + + adrp x3, :got:__libc_csu_init + ldr PTR_REG (3), [x3, #:got_lo12:__libc_csu_init] + + adrp x4, :got:__libc_csu_fini + ldr PTR_REG (4), [x4, #:got_lo12:__libc_csu_fini] +#else + /* Set up the other arguments in registers */ + ldr PTR_REG (0), =main + ldr PTR_REG (3), =__libc_csu_init + ldr PTR_REG (4), =__libc_csu_fini +#endif + + /* __libc_start_main (main, argc, argv, init, fini, rtld_fini, + stack_end) */ + + /* Let the libc call main and exit with its return code. */ + bl __libc_start_main + + /* should never get here....*/ + bl abort + + /* Define a symbol for the first piece of initialized data. */ + .data + .globl __data_start +__data_start: + .long 0 + .weak data_start + data_start = __data_start diff --git a/REORG.TODO/sysdeps/aarch64/stpcpy.S b/REORG.TODO/sysdeps/aarch64/stpcpy.S new file mode 100644 index 0000000000..061e2b0342 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/stpcpy.S @@ -0,0 +1,20 @@ +/* stpcpy - copy a string returning pointer to end. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define BUILD_STPCPY +#include "strcpy.S" diff --git a/REORG.TODO/sysdeps/aarch64/strchr.S b/REORG.TODO/sysdeps/aarch64/strchr.S new file mode 100644 index 0000000000..c5bc79a8c3 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/strchr.S @@ -0,0 +1,139 @@ +/* strchr - find a character in a string + + Copyright (C) 2014-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64 + */ + +/* Arguments and results. */ +#define srcin x0 +#define chrin w1 + +#define result x0 + +#define src x2 +#define tmp1 x3 +#define wtmp2 w4 +#define tmp3 x5 + +#define vrepchr v0 +#define vdata1 v1 +#define vdata2 v2 +#define vhas_nul1 v3 +#define vhas_nul2 v4 +#define vhas_chr1 v5 +#define vhas_chr2 v6 +#define vrepmask_0 v7 +#define vrepmask_c v16 +#define vend1 v17 +#define vend2 v18 + + /* Core algorithm. + For each 32-byte hunk we calculate a 64-bit syndrome value, with + two bits per byte (LSB is always in bits 0 and 1, for both big + and little-endian systems). Bit 0 is set iff the relevant byte + matched the requested character. Bit 1 is set iff the + relevant byte matched the NUL end of string (we trigger off bit0 + for the special case of looking for NUL). Since the bits + in the syndrome reflect exactly the order in which things occur + in the original string a count_trailing_zeros() operation will + identify exactly which byte is causing the termination, and why. */ + +/* Locals and temporaries. */ + +ENTRY (strchr) + DELOUSE (0) + mov wtmp2, #0x0401 + movk wtmp2, #0x4010, lsl #16 + dup vrepchr.16b, chrin + bic src, srcin, #31 + dup vrepmask_c.4s, wtmp2 + ands tmp1, srcin, #31 + add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s // lsl #1 + b.eq L(loop) + + /* Input string is not 32-byte aligned. Rather than forcing + the padding bytes to a safe value, we calculate the syndrome + for all the bytes, but then mask off those bits of the + syndrome that are related to the padding. */ + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + neg tmp1, tmp1 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b + and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b + and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b + orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b + orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b + lsl tmp1, tmp1, #1 + addp vend1.16b, vend1.16b, vend2.16b // 256->128 + mov tmp3, #~0 + addp vend1.16b, vend1.16b, vend2.16b // 128->64 + lsr tmp1, tmp3, tmp1 + + mov tmp3, vend1.2d[0] + bic tmp1, tmp3, tmp1 // Mask padding bits. + cbnz tmp1, L(tail) + +L(loop): + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + /* Use a fast check for the termination condition. */ + orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b + orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b + orr vend1.16b, vend1.16b, vend2.16b + addp vend1.2d, vend1.2d, vend1.2d + mov tmp1, vend1.2d[0] + cbz tmp1, L(loop) + + /* Termination condition found. Now need to establish exactly why + we terminated. */ + and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b + and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b + and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b + orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b + orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b + addp vend1.16b, vend1.16b, vend2.16b // 256->128 + addp vend1.16b, vend1.16b, vend2.16b // 128->64 + + mov tmp1, vend1.2d[0] +L(tail): + sub src, src, #32 + rbit tmp1, tmp1 + clz tmp1, tmp1 + /* Tmp1 is even if the target charager was found first. Otherwise + we've found the end of string and we weren't looking for NUL. */ + tst tmp1, #1 + add result, src, tmp1, lsr #1 + csel result, result, xzr, eq + ret +END (strchr) +libc_hidden_builtin_def (strchr) +weak_alias (strchr, index) diff --git a/REORG.TODO/sysdeps/aarch64/strchrnul.S b/REORG.TODO/sysdeps/aarch64/strchrnul.S new file mode 100644 index 0000000000..2a1c4b84fb --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/strchrnul.S @@ -0,0 +1,131 @@ +/* strchrnul - find a character or nul in a string + + Copyright (C) 2014-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64 + * Neon Available. + */ + +/* Arguments and results. */ +#define srcin x0 +#define chrin w1 + +#define result x0 + +/* Locals and temporaries. */ + +#define src x2 +#define tmp1 x3 +#define wtmp2 w4 +#define tmp3 x5 + +#define vrepchr v0 +#define vdata1 v1 +#define vdata2 v2 +#define vhas_nul1 v3 +#define vhas_nul2 v4 +#define vhas_chr1 v5 +#define vhas_chr2 v6 +#define vrepmask v7 +#define vend1 v16 + +/* Core algorithm. + + For each 32-byte hunk we calculate a 64-bit syndrome value, with + two bits per byte (LSB is always in bits 0 and 1, for both big + and little-endian systems). For each tuple, bit 0 is set iff + the relevant byte matched the requested character or nul. Since the + bits in the syndrome reflect exactly the order in which things occur + in the original string a count_trailing_zeros() operation will + identify exactly which byte is causing the termination. */ + +ENTRY (__strchrnul) + DELOUSE (0) + /* Magic constant 0x40100401 to allow us to identify which lane + matches the termination condition. */ + mov wtmp2, #0x0401 + movk wtmp2, #0x4010, lsl #16 + dup vrepchr.16b, chrin + bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ + dup vrepmask.4s, wtmp2 + ands tmp1, srcin, #31 + b.eq L(loop) + + /* Input string is not 32-byte aligned. Rather than forcing + the padding bytes to a safe value, we calculate the syndrome + for all the bytes, but then mask off those bits of the + syndrome that are related to the padding. */ + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + neg tmp1, tmp1 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b + orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b + and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b + lsl tmp1, tmp1, #1 + addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 + mov tmp3, #~0 + addp vend1.16b, vend1.16b, vend1.16b // 128->64 + lsr tmp1, tmp3, tmp1 + + mov tmp3, vend1.2d[0] + bic tmp1, tmp3, tmp1 // Mask padding bits. + cbnz tmp1, L(tail) + +L(loop): + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + /* Use a fast check for the termination condition. */ + orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b + orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b + orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b + addp vend1.2d, vend1.2d, vend1.2d + mov tmp1, vend1.2d[0] + cbz tmp1, L(loop) + + /* Termination condition found. Now need to establish exactly why + we terminated. */ + and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b + addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 + addp vend1.16b, vend1.16b, vend1.16b // 128->64 + + mov tmp1, vend1.2d[0] +L(tail): + /* Count the trailing zeros, by bit reversing... */ + rbit tmp1, tmp1 + /* Re-bias source. */ + sub src, src, #32 + clz tmp1, tmp1 /* ... and counting the leading zeros. */ + /* tmp1 is twice the offset into the fragment. */ + add result, src, tmp1, lsr #1 + ret + +END(__strchrnul) +weak_alias (__strchrnul, strchrnul) diff --git a/REORG.TODO/sysdeps/aarch64/strcmp.S b/REORG.TODO/sysdeps/aarch64/strcmp.S new file mode 100644 index 0000000000..e99d6625b7 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/strcmp.S @@ -0,0 +1,157 @@ +/* Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Assumptions: + * + * ARMv8-a, AArch64 + */ + +#include <sysdep.h> + +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + +/* Parameters and result. */ +#define src1 x0 +#define src2 x1 +#define result x0 + +/* Internal variables. */ +#define data1 x2 +#define data1w w2 +#define data2 x3 +#define data2w w3 +#define has_nul x4 +#define diff x5 +#define syndrome x6 +#define tmp1 x7 +#define tmp2 x8 +#define tmp3 x9 +#define zeroones x10 +#define pos x11 + + /* Start of performance-critical section -- one 64B cache line. */ +ENTRY_ALIGN(strcmp, 6) + + DELOUSE (0) + DELOUSE (1) + eor tmp1, src1, src2 + mov zeroones, #REP8_01 + tst tmp1, #7 + b.ne L(misaligned8) + ands tmp1, src1, #7 + b.ne L(mutual_align) + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. */ +L(loop_aligned): + ldr data1, [src1], #8 + ldr data2, [src2], #8 +L(start_realigned): + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + eor diff, data1, data2 /* Non-zero if differences found. */ + bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ + orr syndrome, diff, has_nul + cbz syndrome, L(loop_aligned) + /* End of performance-critical section -- one 64B cache line. */ + +#ifndef __AARCH64EB__ + rev syndrome, syndrome + rev data1, data1 + /* The MS-non-zero bit of the syndrome marks either the first bit + that is different, or the top bit of the first zero byte. + Shifting left now will bring the critical information into the + top bits. */ + clz pos, syndrome + rev data2, data2 + lsl data1, data1, pos + lsl data2, data2, pos + /* But we need to zero-extend (char is unsigned) the value and then + perform a signed 32-bit subtraction. */ + lsr data1, data1, #56 + sub result, data1, data2, lsr #56 + RET +#else + /* For big-endian we cannot use the trick with the syndrome value + as carry-propagation can corrupt the upper bits if the trailing + bytes in the string contain 0x01. */ + /* However, if there is no NUL byte in the dword, we can generate + the result directly. We can't just subtract the bytes as the + MSB might be significant. */ + cbnz has_nul, 1f + cmp data1, data2 + cset result, ne + cneg result, result, lo + RET +1: + /* Re-compute the NUL-byte detection, using a byte-reversed value. */ + rev tmp3, data1 + sub tmp1, tmp3, zeroones + orr tmp2, tmp3, #REP8_7f + bic has_nul, tmp1, tmp2 + rev has_nul, has_nul + orr syndrome, diff, has_nul + clz pos, syndrome + /* The MS-non-zero bit of the syndrome marks either the first bit + that is different, or the top bit of the first zero byte. + Shifting left now will bring the critical information into the + top bits. */ + lsl data1, data1, pos + lsl data2, data2, pos + /* But we need to zero-extend (char is unsigned) the value and then + perform a signed 32-bit subtraction. */ + lsr data1, data1, #56 + sub result, data1, data2, lsr #56 + RET +#endif + +L(mutual_align): + /* Sources are mutually aligned, but are not currently at an + alignment boundary. Round down the addresses and then mask off + the bytes that preceed the start point. */ + bic src1, src1, #7 + bic src2, src2, #7 + lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ + ldr data1, [src1], #8 + neg tmp1, tmp1 /* Bits to alignment -64. */ + ldr data2, [src2], #8 + mov tmp2, #~0 +#ifdef __AARCH64EB__ + /* Big-endian. Early bytes are at MSB. */ + lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ +#else + /* Little-endian. Early bytes are at LSB. */ + lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ +#endif + orr data1, data1, tmp2 + orr data2, data2, tmp2 + b L(start_realigned) + +L(misaligned8): + /* We can do better than this. */ + ldrb data1w, [src1], #1 + ldrb data2w, [src2], #1 + cmp data1w, #1 + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + b.eq L(misaligned8) + sub result, data1, data2 + RET +END(strcmp) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/aarch64/strcpy.S b/REORG.TODO/sysdeps/aarch64/strcpy.S new file mode 100644 index 0000000000..4ef49816fb --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/strcpy.S @@ -0,0 +1,328 @@ +/* strcpy/stpcpy - copy a string returning pointer to start/end. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* To build as stpcpy, define BUILD_STPCPY before compiling this file. + + To test the page crossing code path more thoroughly, compile with + -DSTRCPY_TEST_PAGE_CROSS - this will force all unaligned copies through + the slower entry path. This option is not intended for production use. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses, min page size 4k. + */ + +/* Arguments and results. */ +#define dstin x0 +#define srcin x1 + +/* Locals and temporaries. */ +#define src x2 +#define dst x3 +#define data1 x4 +#define data1w w4 +#define data2 x5 +#define data2w w5 +#define has_nul1 x6 +#define has_nul2 x7 +#define tmp1 x8 +#define tmp2 x9 +#define tmp3 x10 +#define tmp4 x11 +#define zeroones x12 +#define data1a x13 +#define data2a x14 +#define pos x15 +#define len x16 +#define to_align x17 + +#ifdef BUILD_STPCPY +#define STRCPY __stpcpy +#else +#define STRCPY strcpy +#endif + + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. */ + +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + + /* AArch64 systems have a minimum page size of 4k. We can do a quick + page size check for crossing this boundary on entry and if we + do not, then we can short-circuit much of the entry code. We + expect early page-crossing strings to be rare (probability of + 16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite + predictable, even with random strings. + + We don't bother checking for larger page sizes, the cost of setting + up the correct page size is just not worth the extra gain from + a small reduction in the cases taking the slow path. Note that + we only care about whether the first fetch, which may be + misaligned, crosses a page boundary - after that we move to aligned + fetches for the remainder of the string. */ + +#ifdef STRCPY_TEST_PAGE_CROSS + /* Make everything that isn't Qword aligned look like a page cross. */ +#define MIN_PAGE_P2 4 +#else +#define MIN_PAGE_P2 12 +#endif + +#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2) + +ENTRY_ALIGN (STRCPY, 6) + DELOUSE (0) + DELOUSE (1) + /* For moderately short strings, the fastest way to do the copy is to + calculate the length of the string in the same way as strlen, then + essentially do a memcpy of the result. This avoids the need for + multiple byte copies and further means that by the time we + reach the bulk copy loop we know we can always use DWord + accesses. We expect strcpy to rarely be called repeatedly + with the same source string, so branch prediction is likely to + always be difficult - we mitigate against this by preferring + conditional select operations over branches whenever this is + feasible. */ + and tmp2, srcin, #(MIN_PAGE_SIZE - 1) + mov zeroones, #REP8_01 + and to_align, srcin, #15 + cmp tmp2, #(MIN_PAGE_SIZE - 16) + neg tmp1, to_align + /* The first fetch will straddle a (possible) page boundary iff + srcin + 15 causes bit[MIN_PAGE_P2] to change value. A 16-byte + aligned string will never fail the page align check, so will + always take the fast path. */ + b.gt L(page_cross) + +L(page_cross_ok): + ldp data1, data2, [srcin] +#ifdef __AARCH64EB__ + /* Because we expect the end to be found within 16 characters + (profiling shows this is the most common case), it's worth + swapping the bytes now to save having to recalculate the + termination syndrome later. We preserve data1 and data2 + so that we can re-use the values later on. */ + rev tmp2, data1 + sub tmp1, tmp2, zeroones + orr tmp2, tmp2, #REP8_7f + bics has_nul1, tmp1, tmp2 + b.ne L(fp_le8) + rev tmp4, data2 + sub tmp3, tmp4, zeroones + orr tmp4, tmp4, #REP8_7f +#else + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + bics has_nul1, tmp1, tmp2 + b.ne L(fp_le8) + sub tmp3, data2, zeroones + orr tmp4, data2, #REP8_7f +#endif + bics has_nul2, tmp3, tmp4 + b.eq L(bulk_entry) + + /* The string is short (<=16 bytes). We don't know exactly how + short though, yet. Work out the exact length so that we can + quickly select the optimal copy strategy. */ +L(fp_gt8): + rev has_nul2, has_nul2 + clz pos, has_nul2 + mov tmp2, #56 + add dst, dstin, pos, lsr #3 /* Bits to bytes. */ + sub pos, tmp2, pos +#ifdef __AARCH64EB__ + lsr data2, data2, pos +#else + lsl data2, data2, pos +#endif + str data2, [dst, #1] + str data1, [dstin] +#ifdef BUILD_STPCPY + add dstin, dst, #8 +#endif + ret + +L(fp_le8): + rev has_nul1, has_nul1 + clz pos, has_nul1 + add dst, dstin, pos, lsr #3 /* Bits to bytes. */ + subs tmp2, pos, #24 /* Pos in bits. */ + b.lt L(fp_lt4) +#ifdef __AARCH64EB__ + mov tmp2, #56 + sub pos, tmp2, pos + lsr data2, data1, pos + lsr data1, data1, #32 +#else + lsr data2, data1, tmp2 +#endif + /* 4->7 bytes to copy. */ + str data2w, [dst, #-3] + str data1w, [dstin] +#ifdef BUILD_STPCPY + mov dstin, dst +#endif + ret +L(fp_lt4): + cbz pos, L(fp_lt2) + /* 2->3 bytes to copy. */ +#ifdef __AARCH64EB__ + lsr data1, data1, #48 +#endif + strh data1w, [dstin] + /* Fall-through, one byte (max) to go. */ +L(fp_lt2): + /* Null-terminated string. Last character must be zero! */ + strb wzr, [dst] +#ifdef BUILD_STPCPY + mov dstin, dst +#endif + ret + + .p2align 6 + /* Aligning here ensures that the entry code and main loop all lies + within one 64-byte cache line. */ +L(bulk_entry): + sub to_align, to_align, #16 + stp data1, data2, [dstin] + sub src, srcin, to_align + sub dst, dstin, to_align + b L(entry_no_page_cross) + + /* The inner loop deals with two Dwords at a time. This has a + slightly higher start-up cost, but we should win quite quickly, + especially on cores with a high number of issue slots per + cycle, as we get much better parallelism out of the operations. */ +L(main_loop): + stp data1, data2, [dst], #16 +L(entry_no_page_cross): + ldp data1, data2, [src], #16 + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, #REP8_7f + bic has_nul1, tmp1, tmp2 + bics has_nul2, tmp3, tmp4 + ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ + b.eq L(main_loop) + + /* Since we know we are copying at least 16 bytes, the fastest way + to deal with the tail is to determine the location of the + trailing NUL, then (re)copy the 16 bytes leading up to that. */ + cmp has_nul1, #0 +#ifdef __AARCH64EB__ + /* For big-endian, carry propagation (if the final byte in the + string is 0x01) means we cannot use has_nul directly. The + easiest way to get the correct byte is to byte-swap the data + and calculate the syndrome a second time. */ + csel data1, data1, data2, ne + rev data1, data1 + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + bic has_nul1, tmp1, tmp2 +#else + csel has_nul1, has_nul1, has_nul2, ne +#endif + rev has_nul1, has_nul1 + clz pos, has_nul1 + add tmp1, pos, #72 + add pos, pos, #8 + csel pos, pos, tmp1, ne + add src, src, pos, lsr #3 + add dst, dst, pos, lsr #3 + ldp data1, data2, [src, #-32] + stp data1, data2, [dst, #-16] +#ifdef BUILD_STPCPY + sub dstin, dst, #1 +#endif + ret + +L(page_cross): + bic src, srcin, #15 + /* Start by loading two words at [srcin & ~15], then forcing the + bytes that precede srcin to 0xff. This means they never look + like termination bytes. */ + ldp data1, data2, [src] + lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ + tst to_align, #7 + csetm tmp2, ne +#ifdef __AARCH64EB__ + lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ +#else + lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ +#endif + orr data1, data1, tmp2 + orr data2a, data2, tmp2 + cmp to_align, #8 + csinv data1, data1, xzr, lt + csel data2, data2, data2a, lt + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, #REP8_7f + bic has_nul1, tmp1, tmp2 + bics has_nul2, tmp3, tmp4 + ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ + b.eq L(page_cross_ok) + /* We now need to make data1 and data2 look like they've been + loaded directly from srcin. Do a rotate on the 128-bit value. */ + lsl tmp1, to_align, #3 /* Bytes->bits. */ + neg tmp2, to_align, lsl #3 +#ifdef __AARCH64EB__ + lsl data1a, data1, tmp1 + lsr tmp4, data2, tmp2 + lsl data2, data2, tmp1 + orr tmp4, tmp4, data1a + cmp to_align, #8 + csel data1, tmp4, data2, lt + rev tmp2, data1 + rev tmp4, data2 + sub tmp1, tmp2, zeroones + orr tmp2, tmp2, #REP8_7f + sub tmp3, tmp4, zeroones + orr tmp4, tmp4, #REP8_7f +#else + lsr data1a, data1, tmp1 + lsl tmp4, data2, tmp2 + lsr data2, data2, tmp1 + orr tmp4, tmp4, data1a + cmp to_align, #8 + csel data1, tmp4, data2, lt + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, #REP8_7f +#endif + bic has_nul1, tmp1, tmp2 + cbnz has_nul1, L(fp_le8) + bic has_nul2, tmp3, tmp4 + b L(fp_gt8) +END (STRCPY) + +#ifdef BUILD_STPCPY +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) +#else +libc_hidden_builtin_def (strcpy) +#endif diff --git a/REORG.TODO/sysdeps/aarch64/string_private.h b/REORG.TODO/sysdeps/aarch64/string_private.h new file mode 100644 index 0000000000..09dedbf3db --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/string_private.h @@ -0,0 +1,20 @@ +/* Define _STRING_ARCH_unaligned. AArch64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* AArch64 implementations support efficient unaligned access. */ +#define _STRING_ARCH_unaligned 1 diff --git a/REORG.TODO/sysdeps/aarch64/strlen.S b/REORG.TODO/sysdeps/aarch64/strlen.S new file mode 100644 index 0000000000..4daf8b8500 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/strlen.S @@ -0,0 +1,220 @@ +/* Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses, min page size 4k. + */ + +/* To test the page crossing code path more thoroughly, compile with + -DTEST_PAGE_CROSS - this will force all calls through the slower + entry path. This option is not intended for production use. */ + +/* Arguments and results. */ +#define srcin x0 +#define len x0 + +/* Locals and temporaries. */ +#define src x1 +#define data1 x2 +#define data2 x3 +#define has_nul1 x4 +#define has_nul2 x5 +#define tmp1 x4 +#define tmp2 x5 +#define tmp3 x6 +#define tmp4 x7 +#define zeroones x8 + + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. A faster check + (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives + false hits for characters 129..255. */ + +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + +#ifdef TEST_PAGE_CROSS +# define MIN_PAGE_SIZE 15 +#else +# define MIN_PAGE_SIZE 4096 +#endif + + /* Since strings are short on average, we check the first 16 bytes + of the string for a NUL character. In order to do an unaligned ldp + safely we have to do a page cross check first. If there is a NUL + byte we calculate the length from the 2 8-byte words using + conditional select to reduce branch mispredictions (it is unlikely + strlen will be repeatedly called on strings with the same length). + + If the string is longer than 16 bytes, we align src so don't need + further page cross checks, and process 32 bytes per iteration + using the fast NUL check. If we encounter non-ASCII characters, + fallback to a second loop using the full NUL check. + + If the page cross check fails, we read 16 bytes from an aligned + address, remove any characters before the string, and continue + in the main loop using aligned loads. Since strings crossing a + page in the first 16 bytes are rare (probability of + 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized. + + AArch64 systems have a minimum page size of 4k. We don't bother + checking for larger page sizes - the cost of setting up the correct + page size is just not worth the extra gain from a small reduction in + the cases taking the slow path. Note that we only care about + whether the first fetch, which may be misaligned, crosses a page + boundary. */ + +ENTRY_ALIGN (__strlen, 6) + DELOUSE (0) + DELOUSE (1) + and tmp1, srcin, MIN_PAGE_SIZE - 1 + mov zeroones, REP8_01 + cmp tmp1, MIN_PAGE_SIZE - 16 + b.gt L(page_cross) + ldp data1, data2, [srcin] +#ifdef __AARCH64EB__ + /* For big-endian, carry propagation (if the final byte in the + string is 0x01) means we cannot use has_nul1/2 directly. + Since we expect strings to be small and early-exit, + byte-swap the data now so has_null1/2 will be correct. */ + rev data1, data1 + rev data2, data2 +#endif + sub tmp1, data1, zeroones + orr tmp2, data1, REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, REP8_7f + bics has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + ccmp has_nul2, 0, 0, eq + beq L(main_loop_entry) + + /* Enter with C = has_nul1 == 0. */ + csel has_nul1, has_nul1, has_nul2, cc + mov len, 8 + rev has_nul1, has_nul1 + clz tmp1, has_nul1 + csel len, xzr, len, cc + add len, len, tmp1, lsr 3 + ret + + /* The inner loop processes 32 bytes per iteration and uses the fast + NUL check. If we encounter non-ASCII characters, use a second + loop with the accurate NUL check. */ + .p2align 4 +L(main_loop_entry): + bic src, srcin, 15 + sub src, src, 16 +L(main_loop): + ldp data1, data2, [src, 32]! +L(page_cross_entry): + sub tmp1, data1, zeroones + sub tmp3, data2, zeroones + orr tmp2, tmp1, tmp3 + tst tmp2, zeroones, lsl 7 + bne 1f + ldp data1, data2, [src, 16] + sub tmp1, data1, zeroones + sub tmp3, data2, zeroones + orr tmp2, tmp1, tmp3 + tst tmp2, zeroones, lsl 7 + beq L(main_loop) + add src, src, 16 +1: + /* The fast check failed, so do the slower, accurate NUL check. */ + orr tmp2, data1, REP8_7f + orr tmp4, data2, REP8_7f + bics has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + ccmp has_nul2, 0, 0, eq + beq L(nonascii_loop) + + /* Enter with C = has_nul1 == 0. */ +L(tail): +#ifdef __AARCH64EB__ + /* For big-endian, carry propagation (if the final byte in the + string is 0x01) means we cannot use has_nul1/2 directly. The + easiest way to get the correct byte is to byte-swap the data + and calculate the syndrome a second time. */ + csel data1, data1, data2, cc + rev data1, data1 + sub tmp1, data1, zeroones + orr tmp2, data1, REP8_7f + bic has_nul1, tmp1, tmp2 +#else + csel has_nul1, has_nul1, has_nul2, cc +#endif + sub len, src, srcin + rev has_nul1, has_nul1 + add tmp2, len, 8 + clz tmp1, has_nul1 + csel len, len, tmp2, cc + add len, len, tmp1, lsr 3 + ret + +L(nonascii_loop): + ldp data1, data2, [src, 16]! + sub tmp1, data1, zeroones + orr tmp2, data1, REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, REP8_7f + bics has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + ccmp has_nul2, 0, 0, eq + bne L(tail) + ldp data1, data2, [src, 16]! + sub tmp1, data1, zeroones + orr tmp2, data1, REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, REP8_7f + bics has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + ccmp has_nul2, 0, 0, eq + beq L(nonascii_loop) + b L(tail) + + /* Load 16 bytes from [srcin & ~15] and force the bytes that precede + srcin to 0x7f, so we ignore any NUL bytes before the string. + Then continue in the aligned loop. */ +L(page_cross): + bic src, srcin, 15 + ldp data1, data2, [src] + lsl tmp1, srcin, 3 + mov tmp4, -1 +#ifdef __AARCH64EB__ + /* Big-endian. Early bytes are at MSB. */ + lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ +#else + /* Little-endian. Early bytes are at LSB. */ + lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ +#endif + orr tmp1, tmp1, REP8_80 + orn data1, data1, tmp1 + orn tmp2, data2, tmp1 + tst srcin, 8 + csel data1, data1, tmp4, eq + csel data2, data2, tmp2, eq + b L(page_cross_entry) +END (__strlen) +weak_alias (__strlen, strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/aarch64/strncmp.S b/REORG.TODO/sysdeps/aarch64/strncmp.S new file mode 100644 index 0000000000..3e4d88a5d7 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/strncmp.S @@ -0,0 +1,207 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64 + */ + +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + +/* Parameters and result. */ +#define src1 x0 +#define src2 x1 +#define limit x2 +#define result x0 + +/* Internal variables. */ +#define data1 x3 +#define data1w w3 +#define data2 x4 +#define data2w w4 +#define has_nul x5 +#define diff x6 +#define syndrome x7 +#define tmp1 x8 +#define tmp2 x9 +#define tmp3 x10 +#define zeroones x11 +#define pos x12 +#define limit_wd x13 +#define mask x14 +#define endloop x15 + +ENTRY_ALIGN_AND_PAD (strncmp, 6, 7) + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) + cbz limit, L(ret0) + eor tmp1, src1, src2 + mov zeroones, #REP8_01 + tst tmp1, #7 + b.ne L(misaligned8) + ands tmp1, src1, #7 + b.ne L(mutual_align) + /* Calculate the number of full and partial words -1. */ + sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ + lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ + + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. */ + /* Start of performance-critical section -- one 64B cache line. */ +L(loop_aligned): + ldr data1, [src1], #8 + ldr data2, [src2], #8 +L(start_realigned): + subs limit_wd, limit_wd, #1 + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + eor diff, data1, data2 /* Non-zero if differences found. */ + csinv endloop, diff, xzr, pl /* Last Dword or differences. */ + bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ + ccmp endloop, #0, #0, eq + b.eq L(loop_aligned) + /* End of performance-critical section -- one 64B cache line. */ + + /* Not reached the limit, must have found the end or a diff. */ + tbz limit_wd, #63, L(not_limit) + + /* Limit % 8 == 0 => all bytes significant. */ + ands limit, limit, #7 + b.eq L(not_limit) + + lsl limit, limit, #3 /* Bits -> bytes. */ + mov mask, #~0 +#ifdef __AARCH64EB__ + lsr mask, mask, limit +#else + lsl mask, mask, limit +#endif + bic data1, data1, mask + bic data2, data2, mask + + /* Make sure that the NUL byte is marked in the syndrome. */ + orr has_nul, has_nul, mask + +L(not_limit): + orr syndrome, diff, has_nul + +#ifndef __AARCH64EB__ + rev syndrome, syndrome + rev data1, data1 + /* The MS-non-zero bit of the syndrome marks either the first bit + that is different, or the top bit of the first zero byte. + Shifting left now will bring the critical information into the + top bits. */ + clz pos, syndrome + rev data2, data2 + lsl data1, data1, pos + lsl data2, data2, pos + /* But we need to zero-extend (char is unsigned) the value and then + perform a signed 32-bit subtraction. */ + lsr data1, data1, #56 + sub result, data1, data2, lsr #56 + RET +#else + /* For big-endian we cannot use the trick with the syndrome value + as carry-propagation can corrupt the upper bits if the trailing + bytes in the string contain 0x01. */ + /* However, if there is no NUL byte in the dword, we can generate + the result directly. We can't just subtract the bytes as the + MSB might be significant. */ + cbnz has_nul, 1f + cmp data1, data2 + cset result, ne + cneg result, result, lo + RET +1: + /* Re-compute the NUL-byte detection, using a byte-reversed value. */ + rev tmp3, data1 + sub tmp1, tmp3, zeroones + orr tmp2, tmp3, #REP8_7f + bic has_nul, tmp1, tmp2 + rev has_nul, has_nul + orr syndrome, diff, has_nul + clz pos, syndrome + /* The MS-non-zero bit of the syndrome marks either the first bit + that is different, or the top bit of the first zero byte. + Shifting left now will bring the critical information into the + top bits. */ + lsl data1, data1, pos + lsl data2, data2, pos + /* But we need to zero-extend (char is unsigned) the value and then + perform a signed 32-bit subtraction. */ + lsr data1, data1, #56 + sub result, data1, data2, lsr #56 + RET +#endif + +L(mutual_align): + /* Sources are mutually aligned, but are not currently at an + alignment boundary. Round down the addresses and then mask off + the bytes that precede the start point. + We also need to adjust the limit calculations, but without + overflowing if the limit is near ULONG_MAX. */ + bic src1, src1, #7 + bic src2, src2, #7 + ldr data1, [src1], #8 + neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */ + ldr data2, [src2], #8 + mov tmp2, #~0 + sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ +#ifdef __AARCH64EB__ + /* Big-endian. Early bytes are at MSB. */ + lsl tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */ +#else + /* Little-endian. Early bytes are at LSB. */ + lsr tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */ +#endif + and tmp3, limit_wd, #7 + lsr limit_wd, limit_wd, #3 + /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */ + add limit, limit, tmp1 + add tmp3, tmp3, tmp1 + orr data1, data1, tmp2 + orr data2, data2, tmp2 + add limit_wd, limit_wd, tmp3, lsr #3 + b L(start_realigned) + +L(ret0): + mov result, #0 + RET + + .p2align 6 +L(misaligned8): + sub limit, limit, #1 +1: + /* Perhaps we can do better than this. */ + ldrb data1w, [src1], #1 + ldrb data2w, [src2], #1 + subs limit, limit, #1 + ccmp data1w, #1, #0, cs /* NZCV = 0b0000. */ + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + b.eq 1b + sub result, data1, data2 + RET +END (strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/aarch64/strnlen.S b/REORG.TODO/sysdeps/aarch64/strnlen.S new file mode 100644 index 0000000000..8a4767b75e --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/strnlen.S @@ -0,0 +1,165 @@ +/* strnlen - calculate the length of a string with limit. + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64 + */ + +/* Arguments and results. */ +#define srcin x0 +#define len x0 +#define limit x1 + +/* Locals and temporaries. */ +#define src x2 +#define data1 x3 +#define data2 x4 +#define data2a x5 +#define has_nul1 x6 +#define has_nul2 x7 +#define tmp1 x8 +#define tmp2 x9 +#define tmp3 x10 +#define tmp4 x11 +#define zeroones x12 +#define pos x13 +#define limit_wd x14 + +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + +ENTRY_ALIGN_AND_PAD (__strnlen, 6, 9) + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) + cbz limit, L(hit_limit) + mov zeroones, #REP8_01 + bic src, srcin, #15 + ands tmp1, srcin, #15 + b.ne L(misaligned) + /* Calculate the number of full and partial words -1. */ + sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */ + lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */ + + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. */ + /* The inner loop deals with two Dwords at a time. This has a + slightly higher start-up cost, but we should win quite quickly, + especially on cores with a high number of issue slots per + cycle, as we get much better parallelism out of the operations. */ + + /* Start of critial section -- keep to one 64Byte cache line. */ +L(loop): + ldp data1, data2, [src], #16 +L(realigned): + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, #REP8_7f + bic has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + subs limit_wd, limit_wd, #1 + orr tmp1, has_nul1, has_nul2 + ccmp tmp1, #0, #0, pl /* NZCV = 0000 */ + b.eq L(loop) + /* End of critical section -- keep to one 64Byte cache line. */ + + orr tmp1, has_nul1, has_nul2 + cbz tmp1, L(hit_limit) /* No null in final Qword. */ + + /* We know there's a null in the final Qword. The easiest thing + to do now is work out the length of the string and return + MIN (len, limit). */ + + sub len, src, srcin + cbz has_nul1, L(nul_in_data2) +#ifdef __AARCH64EB__ + mov data2, data1 +#endif + sub len, len, #8 + mov has_nul2, has_nul1 +L(nul_in_data2): +#ifdef __AARCH64EB__ + /* For big-endian, carry propagation (if the final byte in the + string is 0x01) means we cannot use has_nul directly. The + easiest way to get the correct byte is to byte-swap the data + and calculate the syndrome a second time. */ + rev data2, data2 + sub tmp1, data2, zeroones + orr tmp2, data2, #REP8_7f + bic has_nul2, tmp1, tmp2 +#endif + sub len, len, #8 + rev has_nul2, has_nul2 + clz pos, has_nul2 + add len, len, pos, lsr #3 /* Bits to bytes. */ + cmp len, limit + csel len, len, limit, ls /* Return the lower value. */ + RET + +L(misaligned): + /* Deal with a partial first word. + We're doing two things in parallel here; + 1) Calculate the number of words (but avoiding overflow if + limit is near ULONG_MAX) - to do this we need to work out + limit + tmp1 - 1 as a 65-bit value before shifting it; + 2) Load and mask the initial data words - we force the bytes + before the ones we are interested in to 0xff - this ensures + early bytes will not hit any zero detection. */ + sub limit_wd, limit, #1 + neg tmp4, tmp1 + cmp tmp1, #8 + + and tmp3, limit_wd, #15 + lsr limit_wd, limit_wd, #4 + mov tmp2, #~0 + + ldp data1, data2, [src], #16 + lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */ + add tmp3, tmp3, tmp1 + +#ifdef __AARCH64EB__ + /* Big-endian. Early bytes are at MSB. */ + lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */ +#else + /* Little-endian. Early bytes are at LSB. */ + lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */ +#endif + add limit_wd, limit_wd, tmp3, lsr #4 + + orr data1, data1, tmp2 + orr data2a, data2, tmp2 + + csinv data1, data1, xzr, le + csel data2, data2, data2a, le + b L(realigned) + +L(hit_limit): + mov len, limit + RET +END (__strnlen) +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) diff --git a/REORG.TODO/sysdeps/aarch64/strrchr.S b/REORG.TODO/sysdeps/aarch64/strrchr.S new file mode 100644 index 0000000000..f8e5e8e036 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/strrchr.S @@ -0,0 +1,166 @@ +/* strrchr: find the last instance of a character in a string. + + Copyright (C) 2014-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Assumptions: + * + * ARMv8-a, AArch64 + * Neon Available. + */ + +/* Arguments and results. */ +#define srcin x0 +#define chrin w1 + +#define result x0 + +#define src x2 +#define tmp1 x3 +#define wtmp2 w4 +#define tmp3 x5 +#define src_match x6 +#define src_offset x7 +#define const_m1 x8 +#define tmp4 x9 +#define nul_match x10 +#define chr_match x11 + +#define vrepchr v0 +#define vdata1 v1 +#define vdata2 v2 +#define vhas_nul1 v3 +#define vhas_nul2 v4 +#define vhas_chr1 v5 +#define vhas_chr2 v6 +#define vrepmask_0 v7 +#define vrepmask_c v16 +#define vend1 v17 +#define vend2 v18 + +/* Core algorithm. + + For each 32-byte hunk we calculate a 64-bit syndrome value, with + two bits per byte (LSB is always in bits 0 and 1, for both big + and little-endian systems). For each tuple, bit 0 is set iff + the relevant byte matched the requested character; bit 1 is set + iff the relevant byte matched the NUL end of string (we trigger + off bit0 for the special case of looking for NUL). Since the bits + in the syndrome reflect exactly the order in which things occur + in the original string a count_trailing_zeros() operation will + identify exactly which byte is causing the termination, and why. */ + +ENTRY(strrchr) + DELOUSE (0) + cbz x1, L(null_search) + /* Magic constant 0x40100401 to allow us to identify which lane + matches the requested byte. Magic constant 0x80200802 used + similarly for NUL termination. */ + mov wtmp2, #0x0401 + movk wtmp2, #0x4010, lsl #16 + dup vrepchr.16b, chrin + bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ + dup vrepmask_c.4s, wtmp2 + mov src_offset, #0 + ands tmp1, srcin, #31 + add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ + b.eq L(aligned) + + /* Input string is not 32-byte aligned. Rather than forcing + the padding bytes to a safe value, we calculate the syndrome + for all the bytes, but then mask off those bits of the + syndrome that are related to the padding. */ + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + neg tmp1, tmp1 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b + and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b + and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b + addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 + addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 + addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64 + addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 + mov nul_match, vhas_nul1.2d[0] + lsl tmp1, tmp1, #1 + mov const_m1, #~0 + mov chr_match, vhas_chr1.2d[0] + lsr tmp3, const_m1, tmp1 + + bic nul_match, nul_match, tmp3 // Mask padding bits. + bic chr_match, chr_match, tmp3 // Mask padding bits. + cbnz nul_match, L(tail) + +L(loop): + cmp chr_match, #0 + csel src_match, src, src_match, ne + csel src_offset, chr_match, src_offset, ne +L(aligned): + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + addp vend1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 + and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b + addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 + addp vend1.16b, vend1.16b, vend1.16b // 128->64 + addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 + mov nul_match, vend1.2d[0] + mov chr_match, vhas_chr1.2d[0] + cbz nul_match, L(loop) + + and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b + and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b + addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b + addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b + mov nul_match, vhas_nul1.2d[0] + +L(tail): + /* Work out exactly where the string ends. */ + sub tmp4, nul_match, #1 + eor tmp4, tmp4, nul_match + ands chr_match, chr_match, tmp4 + /* And pick the values corresponding to the last match. */ + csel src_match, src, src_match, ne + csel src_offset, chr_match, src_offset, ne + + /* Count down from the top of the syndrome to find the last match. */ + clz tmp3, src_offset + /* Src_match points beyond the word containing the match, so we can + simply subtract half the bit-offset into the syndrome. Because + we are counting down, we need to go back one more character. */ + add tmp3, tmp3, #2 + sub result, src_match, tmp3, lsr #1 + /* But if the syndrome shows no match was found, then return NULL. */ + cmp src_offset, #0 + csel result, result, xzr, ne + + ret +L(null_search): + b __strchrnul + +END(strrchr) +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/aarch64/sysdep.h b/REORG.TODO/sysdeps/aarch64/sysdep.h new file mode 100644 index 0000000000..a749a707ec --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/sysdep.h @@ -0,0 +1,148 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _AARCH64_SYSDEP_H +#define _AARCH64_SYSDEP_H + +#include <sysdeps/generic/sysdep.h> + +#ifdef __LP64__ +# define AARCH64_R(NAME) R_AARCH64_ ## NAME +# define PTR_REG(n) x##n +# define PTR_LOG_SIZE 3 +# define DELOUSE(n) +#else +# define AARCH64_R(NAME) R_AARCH64_P32_ ## NAME +# define PTR_REG(n) w##n +# define PTR_LOG_SIZE 2 +# define DELOUSE(n) mov w##n, w##n +#endif + +#define PTR_SIZE (1<<PTR_LOG_SIZE) + +#ifdef __ASSEMBLER__ + +/* Syntactic details of assembler. */ + +#define ASM_SIZE_DIRECTIVE(name) .size name,.-name + +/* Define an entry point visible from C. */ +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),%function; \ + .align 4; \ + C_LABEL(name) \ + cfi_startproc; \ + CALL_MCOUNT + +/* Define an entry point visible from C. */ +#define ENTRY_ALIGN(name, align) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),%function; \ + .p2align align; \ + C_LABEL(name) \ + cfi_startproc; \ + CALL_MCOUNT + +/* Define an entry point visible from C with a specified alignment and + pre-padding with NOPs. This can be used to ensure that a critical + loop within a function is cache line aligned. Note this version + does not adjust the padding if CALL_MCOUNT is defined. */ + +#define ENTRY_ALIGN_AND_PAD(name, align, padding) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),%function; \ + .p2align align; \ + .rep padding; \ + nop; \ + .endr; \ + C_LABEL(name) \ + cfi_startproc; \ + CALL_MCOUNT + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(name) + +/* If compiled for profiling, call `mcount' at the start of each function. */ +#ifdef PROF +# define CALL_MCOUNT \ + str x30, [sp, #-80]!; \ + cfi_adjust_cfa_offset (80); \ + cfi_rel_offset (x30, 0); \ + stp x0, x1, [sp, #16]; \ + cfi_rel_offset (x0, 16); \ + cfi_rel_offset (x1, 24); \ + stp x2, x3, [sp, #32]; \ + cfi_rel_offset (x2, 32); \ + cfi_rel_offset (x3, 40); \ + stp x4, x5, [sp, #48]; \ + cfi_rel_offset (x4, 48); \ + cfi_rel_offset (x5, 56); \ + stp x6, x7, [sp, #64]; \ + cfi_rel_offset (x6, 64); \ + cfi_rel_offset (x7, 72); \ + mov x0, x30; \ + bl mcount; \ + ldp x0, x1, [sp, #16]; \ + cfi_restore (x0); \ + cfi_restore (x1); \ + ldp x2, x3, [sp, #32]; \ + cfi_restore (x2); \ + cfi_restore (x3); \ + ldp x4, x5, [sp, #48]; \ + cfi_restore (x4); \ + cfi_restore (x5); \ + ldp x6, x7, [sp, #64]; \ + cfi_restore (x6); \ + cfi_restore (x7); \ + ldr x30, [sp], #80; \ + cfi_adjust_cfa_offset (-80); \ + cfi_restore (x30); +#else +# define CALL_MCOUNT /* Do nothing. */ +#endif + +/* Local label name for asm code. */ +#ifndef L +# define L(name) .L##name +#endif + +/* Load or store to/from a pc-relative EXPR into/from R, using T. + Note R and T are register numbers and not register names. */ +#define LDST_PCREL(OP, R, T, EXPR) \ + adrp x##T, EXPR; \ + OP PTR_REG (R), [x##T, #:lo12:EXPR]; \ + +/* Load or store to/from a got-relative EXPR into/from R, using T. + Note R and T are register numbers and not register names. */ +#define LDST_GLOBAL(OP, R, T, EXPR) \ + adrp x##T, :got:EXPR; \ + ldr PTR_REG (T), [x##T, #:got_lo12:EXPR]; \ + OP PTR_REG (R), [x##T]; + +/* Since C identifiers are not normally prefixed with an underscore + on this system, the asm identifier `syscall_error' intrudes on the + C name space. Make sure we use an innocuous name. */ +#define syscall_error __syscall_error +#define mcount _mcount + +#endif /* __ASSEMBLER__ */ + +#endif /* _AARCH64_SYSDEP_H */ diff --git a/REORG.TODO/sysdeps/aarch64/tls-macros.h b/REORG.TODO/sysdeps/aarch64/tls-macros.h new file mode 100644 index 0000000000..358ccbc490 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/tls-macros.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2009-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define TLS_LD(x) TLS_GD(x) + +#define TLS_GD(x) \ + ({ register unsigned long __result asm ("x0"); \ + asm ("adrp %0, :tlsgd:" #x "; " \ + "add %0, %0, #:tlsgd_lo12:" #x "; " \ + "bl __tls_get_addr;" \ + "nop" \ + : "=r" (__result) \ + : \ + : "x1", "x2", "x3", "x4", "x5", "x6", \ + "x7", "x8", "x9", "x10", "x11", "x12", \ + "x13", "x14", "x15", "x16", "x17", "x18", \ + "x30", "memory", "cc"); \ + (int *) (__result); }) + +#define TLS_IE(x) \ + ({ register unsigned long __result asm ("x0"); \ + register unsigned long __t; \ + asm ("mrs %1, tpidr_el0; " \ + "adrp %0, :gottprel:" #x "; " \ + "ldr %0, [%0, #:gottprel_lo12:" #x "]; " \ + "add %0, %0, %1" \ + : "=r" (__result), "=r" (__t)); \ + (int *) (__result); }) + +#define TLS_LE(x) \ + ({ register unsigned long __result asm ("x0"); \ + asm ("mrs %0, tpidr_el0; " \ + "add %0, %0, :tprel_hi12:" #x "; " \ + "add %0, %0, :tprel_lo12_nc:" #x \ + : "=r" (__result)); \ + (int *) (__result); }) diff --git a/REORG.TODO/sysdeps/aarch64/tlsdesc.c b/REORG.TODO/sysdeps/aarch64/tlsdesc.c new file mode 100644 index 0000000000..0b57031126 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/tlsdesc.c @@ -0,0 +1,166 @@ +/* Manage TLS descriptors. AArch64 version. + + Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <link.h> +#include <ldsodefs.h> +#include <elf/dynamic-link.h> +#include <tls.h> +#include <dl-tlsdesc.h> +#include <dl-unmap-segments.h> +#include <tlsdeschtab.h> +#include <atomic.h> + +/* The following functions take an entry_check_offset argument. It's + computed by the caller as an offset between its entry point and the + call site, such that by adding the built-in return address that is + implicitly passed to the function with this offset, we can easily + obtain the caller's entry point to compare with the entry point + given in the TLS descriptor. If it's changed, we want to return + immediately. */ + +/* This function is used to lazily resolve TLS_DESC RELA relocations. + The argument location is used to hold a pointer to the relocation. */ + +void +attribute_hidden +_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l) +{ + const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg); + + /* After GL(dl_load_lock) is grabbed only one caller can see td->entry in + initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent + callers will return and retry calling td->entry. The updated td->entry + synchronizes with the single writer so all read accesses here can use + relaxed order. */ + if (_dl_tlsdesc_resolve_early_return_p + (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr))) + return; + + /* The code below was borrowed from _dl_fixup(), + except for checking for STB_LOCAL. */ + const ElfW(Sym) *const symtab + = (const void *) D_PTR (l, l_info[DT_SYMTAB]); + const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]); + const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; + lookup_t result; + + /* Look up the target symbol. If the normal lookup rules are not + used don't look in the global scope. */ + if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL + && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0) + { + const struct r_found_version *version = NULL; + + if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL) + { + const ElfW(Half) *vernum = + (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]); + ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff; + version = &l->l_versions[ndx]; + if (version->hash == 0) + version = NULL; + } + + result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, + l->l_scope, version, ELF_RTYPE_CLASS_PLT, + DL_LOOKUP_ADD_DEPENDENCY, NULL); + } + else + { + /* We already found the symbol. The module (and therefore its load + address) is also known. */ + result = l; + } + + if (!sym) + { + atomic_store_relaxed (&td->arg, (void *) reloc->r_addend); + /* This release store synchronizes with the ldar acquire load + instruction in _dl_tlsdesc_undefweak. */ + atomic_store_release (&td->entry, _dl_tlsdesc_undefweak); + } + else + { +# ifndef SHARED + CHECK_STATIC_TLS (l, result); +# else + if (!TRY_STATIC_TLS (l, result)) + { + void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value + + reloc->r_addend); + atomic_store_relaxed (&td->arg, p); + /* This release store synchronizes with the ldar acquire load + instruction in _dl_tlsdesc_dynamic. */ + atomic_store_release (&td->entry, _dl_tlsdesc_dynamic); + } + else +# endif + { + void *p = (void*) (sym->st_value + result->l_tls_offset + + reloc->r_addend); + atomic_store_relaxed (&td->arg, p); + /* This release store synchronizes with the ldar acquire load + instruction in _dl_tlsdesc_return_lazy. */ + atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy); + } + } + + _dl_tlsdesc_wake_up_held_fixups (); +} + +/* This function is used to avoid busy waiting for other threads to + complete the lazy relocation. Once another thread wins the race to + relocate a TLS descriptor, it sets the descriptor up such that this + function is called to wait until the resolver releases the + lock. */ + +void +attribute_hidden +_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller) +{ + /* Maybe we're lucky and can return early. */ + if (caller != atomic_load_relaxed (&td->entry)) + return; + + /* Locking here will stop execution until the running resolver runs + _dl_tlsdesc_wake_up_held_fixups(), releasing the lock. + + FIXME: We'd be better off waiting on a condition variable, such + that we didn't have to hold the lock throughout the relocation + processing. */ + __rtld_lock_lock_recursive (GL(dl_load_lock)); + __rtld_lock_unlock_recursive (GL(dl_load_lock)); +} + + +/* Unmap the dynamic object, but also release its TLS descriptor table + if there is one. */ + +void +internal_function +_dl_unmap (struct link_map *map) +{ + _dl_unmap_segments (map); + +#ifdef SHARED + if (map->l_mach.tlsdesc_table) + htab_delete (map->l_mach.tlsdesc_table); +#endif +} diff --git a/REORG.TODO/sysdeps/aarch64/tlsdesc.sym b/REORG.TODO/sysdeps/aarch64/tlsdesc.sym new file mode 100644 index 0000000000..a06a719779 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/tlsdesc.sym @@ -0,0 +1,18 @@ +#include <stddef.h> +#include <sysdep.h> +#include <tls.h> +#include <link.h> +#include <dl-tlsdesc.h> + +-- + +-- Abuse tls.h macros to derive offsets relative to the thread register. + +TLSDESC_ARG offsetof(struct tlsdesc, arg) + +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) +TCBHEAD_DTV offsetof(tcbhead_t, dtv) +DTV_COUNTER offsetof(dtv_t, counter) +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED diff --git a/REORG.TODO/sysdeps/aarch64/tst-audit.h b/REORG.TODO/sysdeps/aarch64/tst-audit.h new file mode 100644 index 0000000000..ea3ed55f11 --- /dev/null +++ b/REORG.TODO/sysdeps/aarch64/tst-audit.h @@ -0,0 +1,25 @@ +/* Definitions for testing PLT entry/exit auditing. AArch64 version. + + Copyright (C) 2005-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define pltenter la_aarch64_gnu_pltenter +#define pltexit la_aarch64_gnu_pltexit +#define La_regs La_aarch64_regs +#define La_retval La_aarch64_retval +#define int_retval lrv_xreg[0] |