diff options
Diffstat (limited to 'REORG.TODO/sysdeps/tile')
95 files changed, 7519 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/tile/Implies b/REORG.TODO/sysdeps/tile/Implies new file mode 100644 index 0000000000..5b29b26128 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/Implies @@ -0,0 +1,2 @@ +ieee754/dbl-64 +ieee754/flt-32 diff --git a/REORG.TODO/sysdeps/tile/Makefile b/REORG.TODO/sysdeps/tile/Makefile new file mode 100644 index 0000000000..3cd4d1e752 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/Makefile @@ -0,0 +1,13 @@ +# We don't support long doubles as a distinct type. We don't need to set +# this variable; it's here mostly for documentational purposes. + +long-double-fcts = no + +ifeq ($(subdir),gmon) +sysdep_routines += _mcount +endif + +ifeq ($(subdir),elf) +# Extra shared linker files to link only into dl-allobjs.so. +sysdep-rtld-routines += dl-start __tls_get_addr +endif diff --git a/REORG.TODO/sysdeps/tile/Versions b/REORG.TODO/sysdeps/tile/Versions new file mode 100644 index 0000000000..b275d7ff3b --- /dev/null +++ b/REORG.TODO/sysdeps/tile/Versions @@ -0,0 +1,6 @@ +libc { + GLIBC_2.12 { + # name requested by gcc community. + __mcount; + } +} diff --git a/REORG.TODO/sysdeps/tile/__longjmp.S b/REORG.TODO/sysdeps/tile/__longjmp.S new file mode 100644 index 0000000000..c9902b1b31 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/__longjmp.S @@ -0,0 +1,57 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <jmpbuf-offsets.h> +#include <asm-syntax.h> +#include <arch/spr_def.h> + +/* PL to return to via iret in longjmp */ +#define RETURN_PL 0 + + .text +ENTRY (__longjmp) + FEEDBACK_ENTER(__longjmp) + +#define RESTORE(r) { LD r, r0 ; ADDI_PTR r0, r0, REGSIZE } + FOR_EACH_CALLEE_SAVED_REG(RESTORE) + + /* Make longjmp(buf, 0) return "1" instead. + At the same time, construct our iret context; we set ICS so + we can validly load EX_CONTEXT for iret without being + interrupted halfway through. */ + { + LD r2, r0 /* retrieve ICS bit from jmp_buf */ + movei r3, 1 + CMPEQI r0, r1, 0 + } + { + mtspr INTERRUPT_CRITICAL_SECTION, r3 + shli r2, r2, SPR_EX_CONTEXT_0_1__ICS_SHIFT + } + { + mtspr EX_CONTEXT_0_0, lr + ori r2, r2, RETURN_PL + } + { + or r0, r1, r0 + mtspr EX_CONTEXT_0_1, r2 + } + iret + jrp lr /* Keep the backtracer happy. */ +END (__longjmp) diff --git a/REORG.TODO/sysdeps/tile/__tls_get_addr.S b/REORG.TODO/sysdeps/tile/__tls_get_addr.S new file mode 100644 index 0000000000..3b2b95522f --- /dev/null +++ b/REORG.TODO/sysdeps/tile/__tls_get_addr.S @@ -0,0 +1,152 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <tls.h> +#include <bits/wordsize.h> + +#if __WORDSIZE == 64 +#define LOG_SIZEOF_DTV_T 4 +#else +#define LOG_SIZEOF_DTV_T 3 +#endif + +/* On entry, r0 points to two words, the module and the offset. + On return, r0 holds the pointer to the relevant TLS memory. + Only registers r25..r29 are clobbered by the call. */ + + .text +ENTRY (__tls_get_addr) + { + lnk r25 + ADDI_PTR r27, tp, DTV_OFFSET + } +.Llnk: +#ifdef __tilegx__ + { + LD_PTR r27, r27 /* r27 = THREAD_DTV() */ + moveli r26, hw1_last(_rtld_local + TLS_GENERATION_OFFSET - .Llnk) + } + shl16insli r26, r26, hw0(_rtld_local + TLS_GENERATION_OFFSET - .Llnk) + { + ADD_PTR r25, r25, r26 + LD_PTR r26, r0 /* r26 = ti_module */ + } +#else + { + LD_PTR r27, r27 /* r27 = THREAD_DTV() */ + addli r25, r25, lo16(_rtld_local + TLS_GENERATION_OFFSET - .Llnk) + } + { + auli r25, r25, ha16(_rtld_local + TLS_GENERATION_OFFSET - .Llnk) + LD_PTR r26, r0 /* r26 = ti_module */ + } +#endif + LD_PTR r25, r25 /* r25 = DL(dl_tls_generation) */ + { + LD_PTR r28, r27 /* r28 = THREAD_DTV()->counter */ + ADDI_PTR r29, r0, __SIZEOF_POINTER__ + } + { + LD_PTR r29, r29 /* r29 = ti_offset */ + CMPEQ r25, r28, r25 /* r25 nonzero if generation OK */ + shli r28, r26, LOG_SIZEOF_DTV_T /* byte index into dtv array */ + } + { + BEQZ r25, .Lslowpath + CMPEQI r25, r26, -1 /* r25 nonzero if ti_module invalid */ + } + { + BNEZ r25, .Lslowpath + ADD_PTR r28, r28, r27 /* pointer into module array */ + } + LD_PTR r26, r28 /* r26 = module TLS pointer */ + CMPEQI r25, r26, -1 /* check r26 == TLS_DTV_UNALLOCATED */ + BNEZ r25, .Lslowpath + { + ADD_PTR r0, r26, r29 + jrp lr + } + +.Lslowpath: + { + ST sp, lr + ADDLI_PTR r29, sp, - (25 * REGSIZE) + } + cfi_offset (lr, 0) + { + ST r29, sp + ADDLI_PTR sp, sp, - (26 * REGSIZE) + } + cfi_def_cfa_offset (26 * REGSIZE) + ADDI_PTR r29, sp, (2 * REGSIZE) + { ST r29, r1; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r2; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r3; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r4; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r5; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r6; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r7; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r8; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r9; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r10; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r11; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r12; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r13; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r14; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r15; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r16; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r17; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r18; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r19; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r20; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r21; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r22; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r23; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r24; ADDI_PTR r29, r29, REGSIZE } + .hidden __tls_get_addr_slow + jal __tls_get_addr_slow + ADDI_PTR r29, sp, (2 * REGSIZE) + { LD r1, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r2, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r3, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r4, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r5, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r6, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r7, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r8, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r9, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r10, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r11, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r12, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r13, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r14, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r15, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r16, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r17, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r18, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r19, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r20, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r21, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r22, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r23, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r24, r29; ADDLI_PTR sp, sp, (26 * REGSIZE) } + cfi_def_cfa_offset (0) + LD lr, sp + jrp lr +END (__tls_get_addr) diff --git a/REORG.TODO/sysdeps/tile/_mcount.S b/REORG.TODO/sysdeps/tile/_mcount.S new file mode 100644 index 0000000000..c350084630 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/_mcount.S @@ -0,0 +1,87 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + Based on work contributed by David Mosberger (davidm@cs.arizona.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Assembly stub to invoke __mcount_internal(). Compiler-generated + code calls mcount after executing a function's prologue, placing + the "lr" register in "r10" for the call. As a result "lr" is the + function that invoked mcount, and "r10" is mcount's caller's + caller. However, we have to save all the parameter registers here + before invoking _mcount_internal. Callee-save and temporary + registers need no special attention. We save r10 and restore it to + lr on the way out, to properly handle the case of ENTRY() in + assembly code, before lr is saved. We use the name __mcount since + the gcc community prefers using the reserved namespace. */ + +#include <sysdep.h> + + .text +ENTRY(__mcount) + { + ST sp, lr + ADDI_PTR r29, sp, - (12 * REGSIZE) + } + cfi_offset (lr, 0) + { + ADDI_PTR sp, sp, - (13 * REGSIZE) + ST r29, sp + ADDI_PTR r29, r29, REGSIZE + } + cfi_def_cfa_offset (13 * REGSIZE) + { ST r29, r0; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r1; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r2; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r3; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r4; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r5; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r6; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r7; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r8; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r9; ADDI_PTR r29, r29, REGSIZE } + { ST r29, r10; ADDI_PTR r29, r29, REGSIZE; move r0, r10 } + { + move r1, lr + jal __mcount_internal + } + { + ADDI_PTR r29, sp, (2 * REGSIZE) + } + { LD r0, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r1, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r2, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r3, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r4, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r5, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r6, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r7, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r8, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r9, r29; ADDI_PTR r29, r29, REGSIZE } + { LD r10, r29; ADDI_PTR sp, sp, (13 * REGSIZE) } + cfi_def_cfa_offset (0) + { + LD lr, sp + } + { + move lr, r10 + jrp lr + } +END(__mcount) + +#undef mcount +weak_alias (__mcount, _mcount) /* exported in gmon/Versions */ +weak_alias (__mcount, mcount) /* exported in stdlib/Versions */ diff --git a/REORG.TODO/sysdeps/tile/abort-instr.h b/REORG.TODO/sysdeps/tile/abort-instr.h new file mode 100644 index 0000000000..0f57da5198 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/abort-instr.h @@ -0,0 +1,2 @@ +/* An instruction which should crash any program is `hlt'. */ +#define ABORT_INSTRUCTION asm ("ill") diff --git a/REORG.TODO/sysdeps/tile/atomic-machine.h b/REORG.TODO/sysdeps/tile/atomic-machine.h new file mode 100644 index 0000000000..32277c960f --- /dev/null +++ b/REORG.TODO/sysdeps/tile/atomic-machine.h @@ -0,0 +1,81 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* The sub-architecture headers provide definitions for these macros + that work for "int" and "long" size values only: + + atomic_compare_and_exchange_val_acq() + atomic_exchange_acq() + atomic_exchange_and_add() + atomic_and_val() + atomic_or_val() + atomic_decrement_if_positive() [tilegx only] + + Here we provide generic definitions true for all Tilera chips. */ + +#include <stdint.h> +#include <features.h> + +typedef int32_t atomic32_t; +typedef uint32_t uatomic32_t; +typedef int_fast32_t atomic_fast32_t; +typedef uint_fast32_t uatomic_fast32_t; + +typedef int64_t atomic64_t; +typedef uint64_t uatomic64_t; +typedef int_fast64_t atomic_fast64_t; +typedef uint_fast64_t uatomic_fast64_t; + +typedef intptr_t atomicptr_t; +typedef uintptr_t uatomicptr_t; +typedef intmax_t atomic_max_t; +typedef uintmax_t uatomic_max_t; + +/* Barrier macro. */ +#define atomic_full_barrier() __sync_synchronize() + +/* APIs with "release" semantics. */ +#define atomic_compare_and_exchange_val_rel(mem, n, o) \ + ({ \ + atomic_full_barrier (); \ + atomic_compare_and_exchange_val_acq ((mem), (n), (o)); \ + }) +#define atomic_exchange_rel(mem, n) \ + ({ \ + atomic_full_barrier (); \ + atomic_exchange_acq ((mem), (n)); \ + }) + +/* Various macros that should just be synonyms. */ +#define catomic_exchange_and_add atomic_exchange_and_add +#define atomic_and(mem, mask) ((void) atomic_and_val ((mem), (mask))) +#define catomic_and atomic_and +#define atomic_or(mem, mask) ((void) atomic_or_val ((mem), (mask))) +#define catomic_or atomic_or + +/* atomic_bit_test_set in terms of atomic_or_val. */ +#define atomic_bit_test_set(mem, bit) \ + ({ __typeof (*(mem)) __att0_mask = ((__typeof (*(mem))) 1 << (bit)); \ + atomic_or_val ((mem), __att0_mask) & __att0_mask; }) + +/* + * This non-existent symbol is called for unsupporrted sizes, + * indicating a bug in the caller. + */ +extern int __atomic_error_bad_argument_size(void) + __attribute__ ((warning ("bad sizeof atomic argument"))); diff --git a/REORG.TODO/sysdeps/tile/backtrace.c b/REORG.TODO/sysdeps/tile/backtrace.c new file mode 100644 index 0000000000..27ce597b39 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/backtrace.c @@ -0,0 +1 @@ +#include <sysdeps/x86_64/backtrace.c> diff --git a/REORG.TODO/sysdeps/tile/bits/byteswap.h b/REORG.TODO/sysdeps/tile/bits/byteswap.h new file mode 100644 index 0000000000..ffbbceaa11 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/bits/byteswap.h @@ -0,0 +1,37 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _BYTESWAP_H && !defined _NETINET_IN_H && !defined _ENDIAN_H +# error "Never use <bits/byteswap.h> directly; include <byteswap.h> instead." +#endif + +#ifndef _BITS_BYTESWAP_H +#define _BITS_BYTESWAP_H 1 + +#include <bits/types.h> + +/* gcc __builtin_bswap64() can constant-fold, etc, so always use it. */ +#define __bswap_16(x) ((unsigned short)(__builtin_bswap32(x) >> 16)) +#define __bswap_32(x) ((unsigned int)__builtin_bswap32(x)) +#define __bswap_64(x) ((__uint64_t)__builtin_bswap64(x)) + +#define __bswap_constant_16(x) __bswap_16(x) +#define __bswap_constant_32(x) __bswap_32(x) +#define __bswap_constant_64(x) __bswap_64(x) + +#endif /* _BITS_BYTESWAP_H */ diff --git a/REORG.TODO/sysdeps/tile/bits/endian.h b/REORG.TODO/sysdeps/tile/bits/endian.h new file mode 100644 index 0000000000..835042a7ac --- /dev/null +++ b/REORG.TODO/sysdeps/tile/bits/endian.h @@ -0,0 +1,11 @@ +/* Set endianness for tile. */ + +#ifndef _ENDIAN_H +# error "Never use <bits/endian.h> directly; include <endian.h> instead." +#endif + +#if defined __BIG_ENDIAN__ +# define __BYTE_ORDER __BIG_ENDIAN +#else +# define __BYTE_ORDER __LITTLE_ENDIAN +#endif diff --git a/REORG.TODO/sysdeps/tile/bits/fenv.h b/REORG.TODO/sysdeps/tile/bits/fenv.h new file mode 100644 index 0000000000..6bbbffc3e4 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/bits/fenv.h @@ -0,0 +1,53 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_H +# error "Never use <bits/fenv.h> directly; include <fenv.h> instead." +#endif + +/* The TILE-Gx hardware does not provide floating-point exception + handling, and TILEPro does not support any floating-point operations. */ +#define FE_ALL_EXCEPT 0 + +/* TILE-Gx supports only round-to-nearest. The software + floating-point support also acts this way. */ +enum + { + __FE_UNDEFINED = 0, + + FE_TONEAREST = +#define FE_TONEAREST 1 + FE_TONEAREST, + }; + +/* Type representing exception flags (if there were any). */ +typedef unsigned int fexcept_t; + +/* Type representing floating-point environment. */ +typedef unsigned int fenv_t; + +/* If the default argument is used we use this value. */ +#define FE_DFL_ENV ((const fenv_t *) -1l) + +#if __GLIBC_USE (IEC_60559_BFP_EXT) +/* Type representing floating-point control modes. */ +typedef unsigned int femode_t; + +/* Default floating-point control modes. */ +# define FE_DFL_MODE ((const femode_t *) -1L) +#endif diff --git a/REORG.TODO/sysdeps/tile/bits/link.h b/REORG.TODO/sysdeps/tile/bits/link.h new file mode 100644 index 0000000000..d29725892e --- /dev/null +++ b/REORG.TODO/sysdeps/tile/bits/link.h @@ -0,0 +1,57 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LINK_H +# error "Never include <bits/link.h> directly; use <link.h> instead." +#endif + +#define __need_int_reg_t +#include <arch/abi.h> + + +/* Registers for entry into PLT. */ +typedef struct La_tile_regs +{ + __uint_reg_t lr_reg[10]; +} La_tile_regs; + +/* Return values for calls from PLT. */ +typedef struct La_tile_retval +{ + /* Up to ten registers can be used for a return value (e.g. small struct). */ + __uint_reg_t lrv_reg[10]; +} La_tile_retval; + + +__BEGIN_DECLS + +extern ElfW(Addr) la_tile_gnu_pltenter (ElfW(Sym) *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_tile_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_tile_gnu_pltexit (ElfW(Sym) *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_tile_regs *__inregs, + La_tile_retval *__outregs, + const char *__symname); + +__END_DECLS diff --git a/REORG.TODO/sysdeps/tile/bits/mathinline.h b/REORG.TODO/sysdeps/tile/bits/mathinline.h new file mode 100644 index 0000000000..5692d91004 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/bits/mathinline.h @@ -0,0 +1,44 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/mathinline.h> directly; include <math.h> instead." +#endif + +#ifndef __extern_always_inline +# define __MATH_INLINE __inline +#else +# define __MATH_INLINE __extern_always_inline +#endif + + +#if defined __USE_ISOC99 && defined __GNUC__ + +/* Test for negative number. Used in the signbit() macro. */ +__MATH_INLINE int +__NTH (__signbitf (float __x)) +{ + return __builtin_signbitf (__x); +} +__MATH_INLINE int +__NTH (__signbit (double __x)) +{ + return __builtin_signbit (__x); +} + +#endif diff --git a/REORG.TODO/sysdeps/tile/bits/setjmp.h b/REORG.TODO/sysdeps/tile/bits/setjmp.h new file mode 100644 index 0000000000..e9efc3b5ef --- /dev/null +++ b/REORG.TODO/sysdeps/tile/bits/setjmp.h @@ -0,0 +1,36 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Define the machine-dependent type `jmp_buf'. TILE version. */ +#ifndef _BITS_SETJMP_H +#define _BITS_SETJMP_H 1 + +#if !defined _SETJMP_H && !defined _PTHREAD_H +# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead." +#endif + +#ifndef _ASM + +#define __need_int_reg_t +#include <arch/abi.h> + +typedef __uint_reg_t __jmp_buf[32]; + +#endif + +#endif /* bits/setjmp.h */ diff --git a/REORG.TODO/sysdeps/tile/bsd-_setjmp.S b/REORG.TODO/sysdeps/tile/bsd-_setjmp.S new file mode 100644 index 0000000000..4e6a2da560 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/bsd-_setjmp.S @@ -0,0 +1 @@ +/* _setjmp is in setjmp.S */ diff --git a/REORG.TODO/sysdeps/tile/bsd-setjmp.S b/REORG.TODO/sysdeps/tile/bsd-setjmp.S new file mode 100644 index 0000000000..1da848d2f1 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/bsd-setjmp.S @@ -0,0 +1 @@ +/* setjmp is in setjmp.S */ diff --git a/REORG.TODO/sysdeps/tile/bzero.S b/REORG.TODO/sysdeps/tile/bzero.S new file mode 100644 index 0000000000..a2c3bd4661 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/bzero.S @@ -0,0 +1,30 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text +ENTRY(__bzero) + FEEDBACK_ENTER(__bzero) + { + move r2, r1 + move r1, zero + } + j __memset +END(__bzero) +weak_alias (__bzero, bzero) diff --git a/REORG.TODO/sysdeps/tile/configure b/REORG.TODO/sysdeps/tile/configure new file mode 100644 index 0000000000..534c7dbc5d --- /dev/null +++ b/REORG.TODO/sysdeps/tile/configure @@ -0,0 +1,7 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/tile. + +# We can always access static and hidden symbols in a position independent way. +$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h + +# work around problem with autoconf and empty lines at the end of files diff --git a/REORG.TODO/sysdeps/tile/configure.ac b/REORG.TODO/sysdeps/tile/configure.ac new file mode 100644 index 0000000000..6e246a3bc9 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/configure.ac @@ -0,0 +1,6 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/tile. + +# We can always access static and hidden symbols in a position independent way. +AC_DEFINE(PI_STATIC_AND_HIDDEN) +# work around problem with autoconf and empty lines at the end of files diff --git a/REORG.TODO/sysdeps/tile/crti.S b/REORG.TODO/sysdeps/tile/crti.S new file mode 100644 index 0000000000..522373aff5 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/crti.S @@ -0,0 +1,124 @@ +/* Special .init and .fini section support for tile. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crti.S puts a function prologue at the beginning of the .init and + .fini sections and defines global symbols for those addresses, so + they can be called as functions. The symbols _init and _fini are + magic and cause the linker to emit DT_INIT and DT_FINI. */ + +#include <libc-symbols.h> +#include <sysdep.h> + +#ifndef PREINIT_FUNCTION +# define PREINIT_FUNCTION __gmon_start__ +#endif + +#ifndef PREINIT_FUNCTION_WEAK +# define PREINIT_FUNCTION_WEAK 1 +#endif + +#if PREINIT_FUNCTION_WEAK + weak_extern (PREINIT_FUNCTION) +#else + .hidden PREINIT_FUNCTION +#endif + + .section .init,"ax",@progbits + .align 8 + .globl _init + .type _init, @function +_init: + { + move r29, sp + ADDI_PTR r28, sp, -REGSIZE + ST sp, lr + } + ADDI_PTR sp, sp, -(2 * REGSIZE) + ST r28, r29 +#if PREINIT_FUNCTION_WEAK + lnk r2 +0: +# ifdef __tilegx__ + moveli r1, hw2_last(_GLOBAL_OFFSET_TABLE_ - 0b) + { + shl16insli r1, r1, hw1(_GLOBAL_OFFSET_TABLE_ - 0b) + moveli r0, hw1_last_got(PREINIT_FUNCTION) + } + { + shl16insli r1, r1, hw0(_GLOBAL_OFFSET_TABLE_ - 0b) + shl16insli r0, r0, hw0_got(PREINIT_FUNCTION) + } +# else + { + moveli r1, lo16(_GLOBAL_OFFSET_TABLE_ - 0b) + moveli r0, got_lo16(PREINIT_FUNCTION) + } + { + auli r1, r1, ha16(_GLOBAL_OFFSET_TABLE_ - 0b) + auli r0, r0, got_ha16(PREINIT_FUNCTION) + } +# endif + ADD_PTR r0, r0, r1 + ADD_PTR r0, r0, r2 + LD_PTR r0, r0 + BEQZ r0, .Lno_weak_fn + jalr r0 +#elif defined(__tilegx__) && !defined(NO_PLT_PCREL) + /* Since we are calling from the start of the object to the PLT, + call by loading the full address into a register. */ + lnk r2 +0: + moveli r0, hw2_last_plt(PREINIT_FUNCTION - 0b) + shl16insli r0, r0, hw1_plt(PREINIT_FUNCTION - 0b) + shl16insli r0, r0, hw0_plt(PREINIT_FUNCTION - 0b) + add r0, r0, r2 + jalr r0 +#else + jal plt(PREINIT_FUNCTION) +#endif +.Lno_weak_fn: + + .section .fini,"ax",@progbits + .align 8 + .globl _fini + .type _fini, @function +_fini: + { + move r29, sp + ADDI_PTR r28, sp, -REGSIZE + ST sp, lr + } + ADDI_PTR sp, sp, -(2 * REGSIZE) + ST r28, r29 diff --git a/REORG.TODO/sysdeps/tile/crtn.S b/REORG.TODO/sysdeps/tile/crtn.S new file mode 100644 index 0000000000..53c6e40d06 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/crtn.S @@ -0,0 +1,55 @@ +/* Special .init and .fini section support for tile. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crtn.S puts function epilogues in the .init and .fini sections + corresponding to the prologues in crti.S. */ + +#include <sysdep.h> + + .section .init,"ax",@progbits + ADDI_PTR r29, sp, (2 * REGSIZE) + { + ADDI_PTR sp, sp, (2 * REGSIZE) + LD lr, r29 + } + jrp lr + + .section .fini,"ax",@progbits + ADDI_PTR r29, sp, (2 * REGSIZE) + { + ADDI_PTR sp, sp, (2 * REGSIZE) + LD lr, r29 + } + jrp lr diff --git a/REORG.TODO/sysdeps/tile/dl-lookupcfg.h b/REORG.TODO/sysdeps/tile/dl-lookupcfg.h new file mode 100644 index 0000000000..18a816ec89 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/dl-lookupcfg.h @@ -0,0 +1,27 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define DL_UNMAP_IS_SPECIAL + +#include_next <dl-lookupcfg.h> + +struct link_map; + +void internal_function _dl_unmap (struct link_map *map); + +#define DL_UNMAP(map) _dl_unmap (map) diff --git a/REORG.TODO/sysdeps/tile/dl-machine.h b/REORG.TODO/sysdeps/tile/dl-machine.h new file mode 100644 index 0000000000..c1d784548c --- /dev/null +++ b/REORG.TODO/sysdeps/tile/dl-machine.h @@ -0,0 +1,919 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + Based on work contributed by by Carl Pederson & Martin Schwidefsky. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef dl_machine_h +#define dl_machine_h + +#ifdef __tilegx__ +#define ELF_MACHINE_NAME "tilegx" +#else +#define ELF_MACHINE_NAME "tilepro" +#endif + +#include <sys/param.h> +#include <string.h> +#include <link.h> +#include <bits/wordsize.h> +#include <arch/icache.h> +#include <arch/opcode.h> + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const ElfW(Ehdr) *ehdr) +{ +#if defined __tilegx__ + if (ehdr->e_machine != EM_TILEGX) + return 0; +# if __WORDSIZE == 32 + return (ehdr->e_ident[EI_CLASS] == ELFCLASS32); +# else + return (ehdr->e_ident[EI_CLASS] == ELFCLASS64); +# endif +#elif defined __tilepro__ + return ehdr->e_machine == EM_TILEPRO; +#else +# error "Unknown tile architecture." +#endif +} + + +/* Return the link-time address of _DYNAMIC. Conveniently, this is the + first element of the GOT. This must be inlined in a function which + uses global data. */ + +static inline ElfW(Addr) +elf_machine_dynamic (void) +{ + ElfW(Addr) *got; + +#ifdef __tilegx__ + ElfW(Addr) tmp; + asm( " { lnk %0; moveli %1, hw2_last(_GLOBAL_OFFSET_TABLE_ - 1f) }\n" + "1: shl16insli %1, %1, hw1(_GLOBAL_OFFSET_TABLE_ - 1b)\n" + " shl16insli %1, %1, hw0(_GLOBAL_OFFSET_TABLE_ - 1b)\n" + " add %0, %0, %1" + : "=r" (got), "=r" (tmp)); +#else + asm( " lnk %0\n" + "1: addli %0, %0, lo16(_GLOBAL_OFFSET_TABLE_ - 1b)\n" + " auli %0, %0, ha16(_GLOBAL_OFFSET_TABLE_ - 1b)" + : "=r" (got)); +#endif + + return *got; +} + + +/* Return the run-time load address of the shared object. */ +static inline ElfW(Addr) +elf_machine_load_address (void) +{ + ElfW(Addr) *got; + ElfW(Addr) dynamic; + +#ifdef __tilegx__ + ElfW(Addr) tmp; + asm( " lnk %2\n" + "1: {\n" + " moveli %0, hw2_last(_GLOBAL_OFFSET_TABLE_ - 1b)\n" + " moveli %1, hw2_last(_DYNAMIC - 1b)\n" + " }\n" + " {\n" + " shl16insli %0, %0, hw1(_GLOBAL_OFFSET_TABLE_ - 1b)\n" + " shl16insli %1, %1, hw1(_DYNAMIC - 1b)\n" + " }\n" + " {\n" + " shl16insli %0, %0, hw0(_GLOBAL_OFFSET_TABLE_ - 1b)\n" + " shl16insli %1, %1, hw0(_DYNAMIC - 1b)\n" + " }\n" + " {\n" + " add %0, %0, %2\n" + " add %1, %1, %2\n" + " }" + : "=r" (got), "=r" (dynamic), "=r" (tmp)); +#else + asm( " lnk %0\n" + "1: {\n" + " addli %0, %0, lo16(_GLOBAL_OFFSET_TABLE_ - 1b)\n" + " addli %1, %0, lo16(_DYNAMIC - 1b)\n" + " }\n" + " {\n" + " auli %0, %0, ha16(_GLOBAL_OFFSET_TABLE_ - 1b)\n" + " auli %1, %1, ha16(_DYNAMIC - 1b)\n" + " }\n" + : "=r" (got), "=r" (dynamic)); +#endif + + return dynamic - *got; +} + +/* Flush some range of the instruction cache. If invoked prior to + actually setting dl_pagesize, we conservatively use 4KB, which + is the smallest page size we could plausibly be running with. */ +static inline void +_dl_flush_icache (const void *addr, unsigned long size) +{ + invalidate_icache (addr, size, GLRO(dl_pagesize) ? : 4096); +} + +/* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + +static inline int __attribute__ ((unused)) +elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) +{ + ElfW(Addr) *gotplt; + extern void _dl_runtime_resolve (ElfW(Word)); + extern void _dl_runtime_profile (ElfW(Word)); + + if (l->l_info[DT_JMPREL] && lazy) + { + gotplt = (ElfW(Addr) *) D_PTR (l, l_info[DT_PLTGOT]); + + /* The GOT entries for functions in the PLT have not yet been filled + in. Their initial contents will arrange when called to put in + registers an offset into the .rel.plt section, and gotplt[0], then + jump to gotplt[1]. */ + + /* Identify this shared object. */ + gotplt[0] = (ElfW(Addr)) l; + + /* The gotplt[1] entry contains the address of a function which gets + called to get the address of a so far unresolved function and jump + to it. The profiling extension of the dynamic linker allows to + intercept the calls to collect information. In this case we don't + store the address in the GOTPLT so that all future calls also end + in this function. */ + if (__builtin_expect (profile, 0)) + { + gotplt[1] = (ElfW(Addr)) &_dl_runtime_profile; + + if (GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), l)) + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = l; + } + else + /* This function will get called to fix up the GOTPLT entry + indicated by the offset on the stack, and then jump to the + resolved address. */ + gotplt[1] = (ElfW(Addr)) &_dl_runtime_resolve; + } + + return lazy; +} + +#if __WORDSIZE == 32 +/* Mask identifying addresses reserved for the user program, + where the dynamic linker should not map anything. */ +#define ELF_MACHINE_USER_ADDRESS_MASK 0xf8000000UL +#endif + +/* Initial entry point code for the dynamic linker. + The C function `_dl_start' is the real entry point; + its return value is the user program's entry point. */ + +#define RTLD_START asm (".globl _dl_start"); + +#ifndef RTLD_START_SPECIAL_INIT +#define RTLD_START_SPECIAL_INIT /* nothing */ +#endif + +/* Wrap a generic Tilera relocation type. */ +#ifdef __tilegx__ +#define R_TILE(x) R_TILEGX_##x +#define __R_TILE_TLS(x,c) R_TILEGX_TLS_##x##c +#define _R_TILE_TLS(x,c) __R_TILE_TLS(x,c) +#define R_TILE_TLS(x) _R_TILE_TLS(x,__ELF_NATIVE_CLASS) +#else +#define R_TILE(x) R_TILEPRO_##x +#define R_TILE_TLS(x) R_TILEPRO_TLS_##x##32 +#endif + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry or + TLS variable, so undefined references should not be allowed to + define the value. + ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to one + of the main executable's symbols, as for a COPY reloc. */ +#define elf_machine_type_class(type) \ + ((((type) == R_TILE(JMP_SLOT) || (type) == R_TILE_TLS(DTPMOD) \ + || (type) == R_TILE_TLS(DTPOFF) || (type) == R_TILE_TLS(TPOFF)) \ + * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_TILE(COPY)) * ELF_RTYPE_CLASS_COPY)) + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_TILE(JMP_SLOT) + +/* TILE never uses Elf32_Rel relocations. */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +/* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ +#define DL_PLATFORM_INIT dl_platform_init () + +static inline void __attribute__ ((unused)) +dl_platform_init (void) +{ + if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') + /* Avoid an empty string which would disturb us. */ + GLRO(dl_platform) = NULL; +} + +static inline ElfW(Addr) +elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const ElfW(Rela) *reloc, + ElfW(Addr) *reloc_addr, ElfW(Addr) value) +{ + return *reloc_addr = value; +} + +/* Return the final value of a plt relocation. */ +static inline ElfW(Addr) +elf_machine_plt_value (struct link_map *map, const ElfW(Rela) *reloc, + ElfW(Addr) value) +{ + return value; +} + +/* Support notifying the simulator about new objects. */ +void internal_function _dl_after_load (struct link_map *l); +#define DL_AFTER_LOAD _dl_after_load + +/* Names of the architecture-specific auditing callback functions. */ +#define ARCH_LA_PLTENTER tile_gnu_pltenter +#define ARCH_LA_PLTEXIT tile_gnu_pltexit + +#endif /* !dl_machine_h */ + + +#ifdef RESOLVE_MAP + +struct reloc_howto +{ + /* Right shift operand by this number of bits. */ + unsigned char right_shift; + +#ifdef __tilegx__ + /* If nonzero, this is updating a code bundle. */ + unsigned char is_bundle_update; +#else + /* If nonzero, add 0x8000 to the value. */ + unsigned char add_0x8000; +#endif + + /* If nonzero, subtract the containing address from the address. */ + unsigned char is_pcrel; + + /* Size in bytes, or 0 if this table entry should be ignored. */ + unsigned char byte_size; +}; + +/* Relocation information. Cannot contain create_* function pointers + because then the table would not be position-independent. */ +static const struct reloc_howto howto[] = +{ +#ifdef __tilegx__ + +# if __WORDSIZE == 32 + /* The GX -m32 loader only handles 32-bit types, so it will be confused + by shifts larger than that. We convert them to just sign-extend; + they usually indicate a program bug or missed optimization, but we + have to handle them correctly anyway. */ +# define S32 31 +# define S48 31 +# else +# define S32 32 +# define S48 48 +# endif + + /* R_TILEGX_NONE */ { 0, 0, 0, 0 }, + /* R_TILEGX_64 */ { 0, 0, 0, 8 }, + /* R_TILEGX_32 */ { 0, 0, 0, 4 }, + /* R_TILEGX_16 */ { 0, 0, 0, 2 }, + /* R_TILEGX_8 */ { 0, 0, 0, 1 }, + /* R_TILEGX_64_PCREL */ { 0, 0, 1, 8 }, + /* R_TILEGX_32_PCREL */ { 0, 0, 1, 4 }, + /* R_TILEGX_16_PCREL */ { 0, 0, 1, 2 }, + /* R_TILEGX_8_PCREL */ { 0, 0, 1, 1 }, + /* R_TILEGX_HW0 */ { 0, 0, 0, 0 }, + /* R_TILEGX_HW1 */ { 16, 0, 0, 0 }, + /* R_TILEGX_HW2 */ { S32, 0, 0, 0 }, + /* R_TILEGX_HW3 */ { S48, 0, 0, 0 }, + /* R_TILEGX_HW0_LAST */ { 0, 0, 0, 0 }, + /* R_TILEGX_HW1_LAST */ { 16, 0, 0, 0 }, + /* R_TILEGX_HW2_LAST */ { S32, 0, 0, 0 }, + /* R_TILEGX_COPY */ { 0, 0, 0, 0 }, + /* R_TILEGX_GLOB_DAT */ { 0, 0, 0, 8 }, + /* R_TILEGX_JMP_SLOT */ { 0, 0, 0, 0 }, + /* R_TILEGX_RELATIVE */ { 0, 0, 0, 0 }, + /* R_TILEGX_BROFF_X1 */ { 3, 1, 1, 8 }, + /* R_TILEGX_JUMPOFF_X1 */ { 3, 1, 1, 8 }, + /* R_TILEGX_JUMPOFF_X1_PLT */ { 3, 1, 1, 8 }, + /* R_TILEGX_IMM8_X0 */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM8_Y0 */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM8_X1 */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM8_Y1 */ { 0, 1, 0, 8 }, + /* R_TILEGX_DEST_IMM8_X1 */ { 0, 1, 0, 8 }, + /* R_TILEGX_MT_IMM14_X1 */ { 0, 1, 0, 8 }, + /* R_TILEGX_MF_IMM14_X1 */ { 0, 1, 0, 8 }, + /* R_TILEGX_MMSTART_X0 */ { 0, 1, 0, 8 }, + /* R_TILEGX_MMEND_X0 */ { 0, 1, 0, 8 }, + /* R_TILEGX_SHAMT_X0 */ { 0, 1, 0, 8 }, + /* R_TILEGX_SHAMT_X1 */ { 0, 1, 0, 8 }, + /* R_TILEGX_SHAMT_Y0 */ { 0, 1, 0, 8 }, + /* R_TILEGX_SHAMT_Y1 */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW0 */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW0 */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW1 */ { 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW1 */ { 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW2 */ { S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW2 */ { S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW3 */ { S48, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW3 */ { S48, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW0_LAST */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW0_LAST */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW1_LAST */ { 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW1_LAST */ { 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW2_LAST */ { S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW2_LAST */ { S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW0_PCREL */ { 0, 1, 1, 8 }, + /* R_TILEGX_IMM16_X1_HW0_PCREL */ { 0, 1, 1, 8 }, + /* R_TILEGX_IMM16_X0_HW1_PCREL */ { 16, 1, 1, 8 }, + /* R_TILEGX_IMM16_X1_HW1_PCREL */ { 16, 1, 1, 8 }, + /* R_TILEGX_IMM16_X0_HW2_PCREL */ { S32, 1, 1, 8 }, + /* R_TILEGX_IMM16_X1_HW2_PCREL */ { S32, 1, 1, 8 }, + /* R_TILEGX_IMM16_X0_HW3_PCREL */ { S48, 1, 1, 8 }, + /* R_TILEGX_IMM16_X1_HW3_PCREL */ { S48, 1, 1, 8 }, + /* R_TILEGX_IMM16_X0_HW0_LAST_PCREL */ { 0, 1, 1, 8 }, + /* R_TILEGX_IMM16_X1_HW0_LAST_PCREL */ { 0, 1, 1, 8 }, + /* R_TILEGX_IMM16_X0_HW1_LAST_PCREL */ { 16, 1, 1, 8 }, + /* R_TILEGX_IMM16_X1_HW1_LAST_PCREL */ { 16, 1, 1, 8 }, + /* R_TILEGX_IMM16_X0_HW2_LAST_PCREL */ { S32, 1, 1, 8 }, + /* R_TILEGX_IMM16_X1_HW2_LAST_PCREL */ { S32, 1, 1, 8 }, + /* R_TILEGX_IMM16_X0_HW0_GOT */ { 0, 1, 0, 0 }, + /* R_TILEGX_IMM16_X1_HW0_GOT */ { 0, 1, 0, 0 }, + /* R_TILEGX_IMM16_X0_HW1_GOT */ { 16, 1, 0, 0 }, + /* R_TILEGX_IMM16_X1_HW1_GOT */ { 16, 1, 0, 0 }, + /* R_TILEGX_IMM16_X0_HW2_GOT */ { S32, 1, 0, 0 }, + /* R_TILEGX_IMM16_X1_HW2_GOT */ { S32, 1, 0, 0 }, + /* R_TILEGX_IMM16_X0_HW3_GOT */ { S48, 1, 0, 0 }, + /* R_TILEGX_IMM16_X1_HW3_GOT */ { S48, 1, 0, 0 }, + /* R_TILEGX_IMM16_X0_HW0_LAST_GOT */ { 0, 1, 0, 0 }, + /* R_TILEGX_IMM16_X1_HW0_LAST_GOT */ { 0, 1, 0, 0 }, + /* R_TILEGX_IMM16_X0_HW1_LAST_GOT */ { 16, 1, 0, 0 }, + /* R_TILEGX_IMM16_X1_HW1_LAST_GOT */ { 16, 1, 0, 0 }, + /* R_TILEGX_IMM16_X0_HW2_LAST_GOT */ { S32, 1, 0, 0 }, + /* R_TILEGX_IMM16_X1_HW2_LAST_GOT */ { S32, 1, 0, 0 }, + /* R_TILEGX_IMM16_X0_HW0_TLS_GD */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW0_TLS_GD */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW1_TLS_GD */ { 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW1_TLS_GD */ { 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW2_TLS_GD */ { S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW2_TLS_GD */ { S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW3_TLS_GD */ { S48, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW3_TLS_GD */ { S48, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW0_LAST_TLS_GD */{ 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW0_LAST_TLS_GD */{ 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW1_LAST_TLS_GD */{ 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW1_LAST_TLS_GD */{ 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW2_LAST_TLS_GD */{ S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW2_LAST_TLS_GD */{ S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW0_TLS_IE */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW0_TLS_IE */ { 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW1_TLS_IE */ { 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW1_TLS_IE */ { 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW2_TLS_IE */ { S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW2_TLS_IE */ { S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW3_TLS_IE */ { S48, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW3_TLS_IE */ { S48, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW0_LAST_TLS_IE */{ 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW0_LAST_TLS_IE */{ 0, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW1_LAST_TLS_IE */{ 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW1_LAST_TLS_IE */{ 16, 1, 0, 8 }, + /* R_TILEGX_IMM16_X0_HW2_LAST_TLS_IE */{ S32, 1, 0, 8 }, + /* R_TILEGX_IMM16_X1_HW2_LAST_TLS_IE */{ S32, 1, 0, 8 }, + /* R_TILEGX_TLS_DTPMOD64 */ { 0, 0, 0, 0 }, + /* R_TILEGX_TLS_DTPOFF64 */ { 0, 0, 0, 0 }, + /* R_TILEGX_TLS_TPOFF64 */ { 0, 0, 0, 0 }, + /* R_TILEGX_TLS_DTPMOD32 */ { 0, 0, 0, 0 }, + /* R_TILEGX_TLS_DTPOFF32 */ { 0, 0, 0, 0 }, + /* R_TILEGX_TLS_TPOFF32 */ { 0, 0, 0, 0 } +#else + /* R_TILEPRO_NONE */ { 0, 0, 0, 0 }, + /* R_TILEPRO_32 */ { 0, 0, 0, 4 }, + /* R_TILEPRO_16 */ { 0, 0, 0, 2 }, + /* R_TILEPRO_8 */ { 0, 0, 0, 1 }, + /* R_TILEPRO_32_PCREL */ { 0, 0, 1, 4 }, + /* R_TILEPRO_16_PCREL */ { 0, 0, 1, 2 }, + /* R_TILEPRO_8_PCREL */ { 0, 0, 1, 1 }, + /* R_TILEPRO_LO16 */ { 0, 0, 0, 2 }, + /* R_TILEPRO_HI16 */ { 16, 0, 0, 2 }, + /* R_TILEPRO_HA16 */ { 16, 1, 0, 2 }, + /* R_TILEPRO_COPY */ { 0, 0, 0, 0 }, + /* R_TILEPRO_GLOB_DAT */ { 0, 0, 0, 4 }, + /* R_TILEPRO_JMP_SLOT */ { 0, 0, 0, 0 }, + /* R_TILEPRO_RELATIVE */ { 0, 0, 0, 0 }, + /* R_TILEPRO_BROFF_X1 */ { 3, 0, 1, 8 }, + /* R_TILEPRO_JOFFLONG_X1 */ { 3, 0, 1, 8 }, + /* R_TILEPRO_JOFFLONG_X1_PLT */ { 3, 0, 1, 8 }, + /* R_TILEPRO_IMM8_X0 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM8_Y0 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM8_X1 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM8_Y1 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_MT_IMM15_X1 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_MF_IMM15_X1 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X1 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0_LO */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X1_LO */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0_HI */ { 16, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X1_HI */ { 16, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0_HA */ { 16, 1, 0, 8 }, + /* R_TILEPRO_IMM16_X1_HA */ { 16, 1, 0, 8 }, + /* R_TILEPRO_IMM16_X0_PCREL */ { 0, 0, 1, 8 }, + /* R_TILEPRO_IMM16_X1_PCREL */ { 0, 0, 1, 8 }, + /* R_TILEPRO_IMM16_X0_LO_PCREL */ { 0, 0, 1, 8 }, + /* R_TILEPRO_IMM16_X1_LO_PCREL */ { 0, 0, 1, 8 }, + /* R_TILEPRO_IMM16_X0_HI_PCREL */ { 16, 0, 1, 8 }, + /* R_TILEPRO_IMM16_X1_HI_PCREL */ { 16, 0, 1, 8 }, + /* R_TILEPRO_IMM16_X0_HA_PCREL */ { 16, 1, 1, 8 }, + /* R_TILEPRO_IMM16_X1_HA_PCREL */ { 16, 1, 1, 8 }, + /* R_TILEPRO_IMM16_X0_GOT */ { 0, 0, 0, 0 }, + /* R_TILEPRO_IMM16_X1_GOT */ { 0, 0, 0, 0 }, + /* R_TILEPRO_IMM16_X0_GOT_LO */ { 0, 0, 0, 0 }, + /* R_TILEPRO_IMM16_X1_GOT_LO */ { 0, 0, 0, 0 }, + /* R_TILEPRO_IMM16_X0_GOT_HI */ { 0, 0, 0, 0 }, + /* R_TILEPRO_IMM16_X1_GOT_HI */ { 0, 0, 0, 0 }, + /* R_TILEPRO_IMM16_X0_GOT_HA */ { 0, 0, 0, 0 }, + /* R_TILEPRO_IMM16_X1_GOT_HA */ { 0, 0, 0, 0 }, + /* R_TILEPRO_MMSTART_X0 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_MMEND_X0 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_MMSTART_X1 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_MMEND_X1 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_SHAMT_X0 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_SHAMT_X1 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_SHAMT_Y0 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_SHAMT_Y1 */ { 0, 0, 0, 8 }, + /* R_TILEPRO_SN_BROFF */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_IMM8 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_UIMM8 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_BYTE0 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_BYTE1 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_BYTE2 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_BYTE3 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_SPCREL0 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_SPCREL1 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_SPCREL2 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_SN_SPCREL3 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_IMM16_X0_TLS_GD */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X1_TLS_GD */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0_TLS_GD_LO */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X1_TLS_GD_LO */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0_TLS_GD_HI */ { 16, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X1_TLS_GD_HI */ { 16, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0_TLS_GD_HA */ { 16, 1, 0, 8 }, + /* R_TILEPRO_IMM16_X1_TLS_GD_HA */ { 16, 1, 0, 8 }, + /* R_TILEPRO_IMM16_X0_TLS_IE */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X1_TLS_IE */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0_TLS_IE_LO */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X1_TLS_IE_LO */ { 0, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0_TLS_IE_HI */ { 16, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X1_TLS_IE_HI */ { 16, 0, 0, 8 }, + /* R_TILEPRO_IMM16_X0_TLS_IE_HA */ { 16, 1, 0, 8 }, + /* R_TILEPRO_IMM16_X1_TLS_IE_HA */ { 16, 1, 0, 8 }, + /* R_TILEPRO_TLS_DTPMOD32 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_TLS_DTPOFF32 */ { 0, 0, 0, 0 }, + /* R_TILEPRO_TLS_TPOFF32 */ { 0, 0, 0, 0 }, +#endif +}; + +#if __ELF_NATIVE_CLASS == 32 +#define ELFW_R_TYPE ELF32_R_TYPE +#define ELFW_ST_TYPE ELF32_ST_TYPE +#else +#define ELFW_R_TYPE ELF64_R_TYPE +#define ELFW_ST_TYPE ELF64_ST_TYPE +#endif + +/* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +auto inline void __attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + const ElfW(Sym) *sym, const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) +{ + ElfW(Addr) *const reloc_addr = reloc_addr_arg; + const unsigned int r_type = ELFW_R_TYPE (reloc->r_info); + +#if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC + if (__builtin_expect (r_type == R_TILE(RELATIVE), 0)) + { +# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC + /* This is defined in rtld.c, but nowhere in the static libc.a; + make the reference weak so static programs can still link. + This declaration cannot be done when compiling rtld.c + (i.e. #ifdef RTLD_BOOTSTRAP) because rtld.c contains the + common defn for _dl_rtld_map, which is incompatible with a + weak decl in the same file. */ +# ifndef SHARED + weak_extern (GL(dl_rtld_map)); +# endif + if (map != &GL(dl_rtld_map)) /* Already done in rtld itself. */ +# endif + *reloc_addr = map->l_addr + reloc->r_addend; + return; + } +#endif + + if (__builtin_expect (r_type == R_TILE(NONE), 0)) + return; + +#if !defined RTLD_BOOTSTRAP && !defined RESOLVE_CONFLICT_FIND_MAP + const ElfW(Sym) *const refsym = sym; +#endif + struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); + ElfW(Addr) value; + + if (sym == NULL) + value = 0; + else if (ELFW_ST_TYPE (sym->st_info) == STT_SECTION) + value = map->l_addr; /* like a RELATIVE reloc */ + else + value = sym_map->l_addr + sym->st_value; + + if (sym != NULL + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0) + && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) + && __builtin_expect (!skip_ifunc, 1)) + value = ((Elf64_Addr (*) (void)) value) (); + + switch (r_type) + { + case R_TILE(JMP_SLOT): + elf_machine_fixup_plt (map, 0, reloc, reloc_addr, + value + reloc->r_addend); + return; + +#ifndef RESOLVE_CONFLICT_FIND_MAP + case R_TILE_TLS(DTPMOD): +# ifdef RTLD_BOOTSTRAP + /* During startup the dynamic linker is always the module + with index 1. + XXX If this relocation is necessary move before RESOLVE + call. */ + *reloc_addr = 1; +# else + /* Get the information from the link map returned by the + resolv function. */ + if (sym_map != NULL) + *reloc_addr = sym_map->l_tls_modid; +# endif + return; + case R_TILE_TLS(DTPOFF): +# ifndef RTLD_BOOTSTRAP + /* During relocation all TLS symbols are defined and used. + Therefore the offset is already correct. */ + if (sym != NULL) + *reloc_addr = sym->st_value + reloc->r_addend; +# endif + return; + case R_TILE_TLS(TPOFF): +# ifdef RTLD_BOOTSTRAP + *reloc_addr = sym->st_value + reloc->r_addend + map->l_tls_offset; +# else + if (sym != NULL) + { + CHECK_STATIC_TLS (map, sym_map); + *reloc_addr = (sym->st_value + reloc->r_addend + + sym_map->l_tls_offset); + } +#endif + return; +#endif /* use TLS */ + +#if !defined RTLD_BOOTSTRAP && !defined RESOLVE_CONFLICT_FIND_MAP + /* Not needed in dl-conflict.c. */ + case R_TILE(COPY): + if (sym == NULL) + /* This can happen in trace mode if an object could not be found. */ + return; + if (__builtin_expect (sym->st_size > refsym->st_size, 0) + || (__builtin_expect (sym->st_size < refsym->st_size, 0) + && __builtin_expect (GLRO(dl_verbose), 0))) + { + const char *strtab; + + strtab = (const char *) D_PTR (map,l_info[DT_STRTAB]); + _dl_error_printf ("%s: Symbol `%s' has different size in shared" + " object, consider re-linking\n", + RTLD_PROGNAME, strtab + refsym->st_name); + } + memcpy (reloc_addr_arg, (void *) value, + MIN (sym->st_size, refsym->st_size)); + return; +#endif + } + + /* All remaining relocations must be in the lookup table. */ + const struct reloc_howto *h = &howto[r_type]; + if ((unsigned int) r_type >= sizeof howto / sizeof howto[0] || + h->byte_size == 0) + { +#if !defined RTLD_BOOTSTRAP || defined _NDEBUG + /* We add these checks in the version to relocate ld.so only + if we are still debugging. */ + _dl_reloc_bad_type (map, r_type, 0); +#endif + return; + } + + value += reloc->r_addend; + + /* The lookup table entry knows how to perform this reloc. */ + if (h->is_pcrel) + value -= (ElfW(Addr)) reloc_addr; + +#ifndef __tilegx__ + if (h->add_0x8000) + value += 0x8000; +#endif + + value = ((long) value) >> h->right_shift; + + switch (h->byte_size) + { + case 1: + *(char *) reloc_addr = value; + return; + case 2: + *(short *) reloc_addr = value; + return; + case 4: + *(int *) reloc_addr = value; + return; +#ifdef __tilegx__ + case 8: + if (!h->is_bundle_update) + { + *(ElfW(Addr) *) reloc_addr = value; + return; + } +#endif + } + + /* We are updating a bundle, so use the function pointer that + swizzles the operand bits into the right location. */ + + tile_bundle_bits *p = (tile_bundle_bits *) reloc_addr; + tile_bundle_bits bits = *p; + +#define MUNGE_SIGNED(func, length) do { \ + bits = ((bits & ~create_##func (-1)) | create_##func (value)); \ + ElfW(Addr) result = get_##func (bits); \ + int signbits = __WORDSIZE - length; \ + result = (long) (result << signbits) >> signbits; \ + if (result != value) \ + _dl_signal_error (0, map->l_name, NULL, \ + "relocation value too large for " #func); \ + } while (0) + +#define MUNGE(func) MUNGE_SIGNED(func, __WORDSIZE) + +#define MUNGE_NOCHECK(func) \ + bits = ((bits & ~create_##func (-1)) | create_##func (value)) + + switch (r_type) + { +#ifdef __tilegx__ + case R_TILEGX_BROFF_X1: + MUNGE_SIGNED (BrOff_X1, 17); + break; + case R_TILEGX_JUMPOFF_X1: + case R_TILEGX_JUMPOFF_X1_PLT: + MUNGE_SIGNED (JumpOff_X1, 27); + break; + case R_TILEGX_IMM8_X0: + MUNGE_SIGNED (Imm8_X0, 8); + break; + case R_TILEGX_IMM8_Y0: + MUNGE_SIGNED (Imm8_Y0, 8); + break; + case R_TILEGX_IMM8_X1: + MUNGE_SIGNED (Imm8_X1, 8); + break; + case R_TILEGX_IMM8_Y1: + MUNGE_SIGNED (Imm8_Y1, 8); + break; + case R_TILEGX_MT_IMM14_X1: + MUNGE (MT_Imm14_X1); + break; + case R_TILEGX_MF_IMM14_X1: + MUNGE (MF_Imm14_X1); + break; + case R_TILEGX_IMM16_X0_HW0: + case R_TILEGX_IMM16_X0_HW1: + case R_TILEGX_IMM16_X0_HW2: + case R_TILEGX_IMM16_X0_HW3: + case R_TILEGX_IMM16_X0_HW0_PCREL: + case R_TILEGX_IMM16_X0_HW1_PCREL: + case R_TILEGX_IMM16_X0_HW2_PCREL: + case R_TILEGX_IMM16_X0_HW3_PCREL: + case R_TILEGX_IMM16_X0_HW0_TLS_GD: + case R_TILEGX_IMM16_X0_HW0_TLS_IE: + MUNGE_NOCHECK (Imm16_X0); + break; + case R_TILEGX_IMM16_X0_HW0_LAST: + case R_TILEGX_IMM16_X0_HW1_LAST: + case R_TILEGX_IMM16_X0_HW2_LAST: + case R_TILEGX_IMM16_X0_HW0_LAST_PCREL: + case R_TILEGX_IMM16_X0_HW1_LAST_PCREL: + case R_TILEGX_IMM16_X0_HW2_LAST_PCREL: + case R_TILEGX_IMM16_X0_HW0_LAST_TLS_GD: + case R_TILEGX_IMM16_X0_HW1_LAST_TLS_GD: + case R_TILEGX_IMM16_X0_HW0_LAST_TLS_IE: + case R_TILEGX_IMM16_X0_HW1_LAST_TLS_IE: + MUNGE_SIGNED (Imm16_X0, 16); + break; + case R_TILEGX_IMM16_X1_HW0: + case R_TILEGX_IMM16_X1_HW1: + case R_TILEGX_IMM16_X1_HW2: + case R_TILEGX_IMM16_X1_HW3: + case R_TILEGX_IMM16_X1_HW0_PCREL: + case R_TILEGX_IMM16_X1_HW1_PCREL: + case R_TILEGX_IMM16_X1_HW2_PCREL: + case R_TILEGX_IMM16_X1_HW3_PCREL: + case R_TILEGX_IMM16_X1_HW0_TLS_GD: + case R_TILEGX_IMM16_X1_HW0_TLS_IE: + MUNGE_NOCHECK (Imm16_X1); + break; + case R_TILEGX_IMM16_X1_HW0_LAST: + case R_TILEGX_IMM16_X1_HW1_LAST: + case R_TILEGX_IMM16_X1_HW2_LAST: + case R_TILEGX_IMM16_X1_HW0_LAST_PCREL: + case R_TILEGX_IMM16_X1_HW1_LAST_PCREL: + case R_TILEGX_IMM16_X1_HW2_LAST_PCREL: + case R_TILEGX_IMM16_X1_HW0_LAST_TLS_GD: + case R_TILEGX_IMM16_X1_HW1_LAST_TLS_GD: + case R_TILEGX_IMM16_X1_HW0_LAST_TLS_IE: + case R_TILEGX_IMM16_X1_HW1_LAST_TLS_IE: + MUNGE_SIGNED (Imm16_X1, 16); + break; + case R_TILEGX_MMSTART_X0: + MUNGE (BFStart_X0); + break; + case R_TILEGX_MMEND_X0: + MUNGE (BFEnd_X0); + break; + case R_TILEGX_SHAMT_X0: + MUNGE (ShAmt_X0); + break; + case R_TILEGX_SHAMT_X1: + MUNGE (ShAmt_X1); + break; + case R_TILEGX_SHAMT_Y0: + MUNGE (ShAmt_Y0); + break; + case R_TILEGX_SHAMT_Y1: + MUNGE (ShAmt_Y1); + break; +#else + case R_TILEPRO_BROFF_X1: + MUNGE_SIGNED (BrOff_X1, 17); + break; + case R_TILEPRO_JOFFLONG_X1: + case R_TILEPRO_JOFFLONG_X1_PLT: + MUNGE_NOCHECK (JOffLong_X1); /* holds full 32-bit value */ + break; + case R_TILEPRO_IMM8_X0: + MUNGE_SIGNED (Imm8_X0, 8); + break; + case R_TILEPRO_IMM8_Y0: + MUNGE_SIGNED (Imm8_Y0, 8); + break; + case R_TILEPRO_IMM8_X1: + MUNGE_SIGNED (Imm8_X1, 8); + break; + case R_TILEPRO_IMM8_Y1: + MUNGE_SIGNED (Imm8_Y1, 8); + break; + case R_TILEPRO_MT_IMM15_X1: + MUNGE (MT_Imm15_X1); + break; + case R_TILEPRO_MF_IMM15_X1: + MUNGE (MF_Imm15_X1); + break; + case R_TILEPRO_IMM16_X0_LO: + case R_TILEPRO_IMM16_X0_HI: + case R_TILEPRO_IMM16_X0_HA: + case R_TILEPRO_IMM16_X0_LO_PCREL: + case R_TILEPRO_IMM16_X0_HI_PCREL: + case R_TILEPRO_IMM16_X0_HA_PCREL: + case R_TILEPRO_IMM16_X0_TLS_GD_LO: + case R_TILEPRO_IMM16_X0_TLS_GD_HI: + case R_TILEPRO_IMM16_X0_TLS_GD_HA: + case R_TILEPRO_IMM16_X0_TLS_IE_LO: + case R_TILEPRO_IMM16_X0_TLS_IE_HI: + case R_TILEPRO_IMM16_X0_TLS_IE_HA: + MUNGE_NOCHECK (Imm16_X0); + break; + case R_TILEPRO_IMM16_X0: + case R_TILEPRO_IMM16_X0_PCREL: + case R_TILEPRO_IMM16_X0_TLS_GD: + case R_TILEPRO_IMM16_X0_TLS_IE: + MUNGE_SIGNED (Imm16_X0, 16); + break; + case R_TILEPRO_IMM16_X1_LO: + case R_TILEPRO_IMM16_X1_HI: + case R_TILEPRO_IMM16_X1_HA: + case R_TILEPRO_IMM16_X1_LO_PCREL: + case R_TILEPRO_IMM16_X1_HI_PCREL: + case R_TILEPRO_IMM16_X1_HA_PCREL: + case R_TILEPRO_IMM16_X1_TLS_GD_LO: + case R_TILEPRO_IMM16_X1_TLS_GD_HI: + case R_TILEPRO_IMM16_X1_TLS_GD_HA: + case R_TILEPRO_IMM16_X1_TLS_IE_LO: + case R_TILEPRO_IMM16_X1_TLS_IE_HI: + case R_TILEPRO_IMM16_X1_TLS_IE_HA: + MUNGE_NOCHECK (Imm16_X1); + break; + case R_TILEPRO_IMM16_X1: + case R_TILEPRO_IMM16_X1_PCREL: + case R_TILEPRO_IMM16_X1_TLS_GD: + case R_TILEPRO_IMM16_X1_TLS_IE: + MUNGE_SIGNED (Imm16_X1, 16); + break; + case R_TILEPRO_MMSTART_X0: + MUNGE (MMStart_X0); + break; + case R_TILEPRO_MMEND_X0: + MUNGE (MMEnd_X0); + break; + case R_TILEPRO_MMSTART_X1: + MUNGE (MMStart_X1); + break; + case R_TILEPRO_MMEND_X1: + MUNGE (MMEnd_X1); + break; + case R_TILEPRO_SHAMT_X0: + MUNGE (ShAmt_X0); + break; + case R_TILEPRO_SHAMT_X1: + MUNGE (ShAmt_X1); + break; + case R_TILEPRO_SHAMT_Y0: + MUNGE (ShAmt_Y0); + break; + case R_TILEPRO_SHAMT_Y1: + MUNGE (ShAmt_Y1); + break; +#endif + } +#undef MUNGE + *p = bits; + _dl_flush_icache (p, sizeof (*p)); +} + +auto inline void __attribute__ ((always_inline)) +elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr_arg) +{ + ElfW(Addr) *const reloc_addr = reloc_addr_arg; + *reloc_addr = l_addr + reloc->r_addend; +} + +auto inline void __attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + int skip_ifunc) +{ + const unsigned int r_type = ELF32_R_TYPE (reloc->r_info); + + /* Check for unexpected PLT reloc type. */ + if (__builtin_expect (r_type == R_TILE(JMP_SLOT), 1)) + { + *(ElfW(Addr) *) (l_addr + reloc->r_offset) += l_addr; + } + else + _dl_reloc_bad_type (map, r_type, 1); +} + +#endif /* RESOLVE_MAP */ diff --git a/REORG.TODO/sysdeps/tile/dl-runtime.c b/REORG.TODO/sysdeps/tile/dl-runtime.c new file mode 100644 index 0000000000..3aced41643 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/dl-runtime.c @@ -0,0 +1,159 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Like x86_64, we pass the index of the relocation and not its offset. + In _dl_profile_fixup and _dl_call_pltexit we also use the index. + Therefore it is wasteful to compute the offset in the trampoline + just to reverse the operation immediately afterwards. */ +#define reloc_offset reloc_arg * sizeof (PLTREL) +#define reloc_index reloc_arg + +#include <elf/dl-runtime.c> + +#include <sys/mman.h> +#include <arch/sim.h> +#include <dl-unmap-segments.h> + +/* Like realpath(), but simplified: no dynamic memory use, no lstat(), + no set_errno(), no valid "rpath" on error, etc. This handles some + simple cases where the simulator might not have a valid entry for + a loaded Elf object, in particular dlopen() with a relative path. + For this relatively rare case, one could also imagine using + link_map.l_origin to avoid the getcwd() here, but the simpler code + here seems like a better solution. */ +static char * +dl_realpath (const char *name, char *rpath) +{ + char *dest; + const char *start, *end; + + if (name[0] != '/') + { + if (!__getcwd (rpath, PATH_MAX)) + return NULL; + dest = __rawmemchr (rpath, '\0'); + } + else + { + rpath[0] = '/'; + dest = rpath + 1; + } + + for (start = end = name; *start; start = end) + { + /* Skip sequence of multiple path-separators. */ + while (*start == '/') + ++start; + + /* Find end of path component. */ + for (end = start; *end && *end != '/'; ++end) + /* Nothing. */; + + if (end - start == 0) + break; + else if (end - start == 1 && start[0] == '.') + /* nothing */; + else if (end - start == 2 && start[0] == '.' && start[1] == '.') + { + /* Back up to previous component, ignore if at root already. */ + if (dest > rpath + 1) + while ((--dest)[-1] != '/'); + } + else + { + if (dest[-1] != '/') + *dest++ = '/'; + + if (dest + (end - start) >= rpath + PATH_MAX) + return NULL; + + dest = __mempcpy (dest, start, end - start); + *dest = '\0'; + } + } + if (dest > rpath + 1 && dest[-1] == '/') + --dest; + *dest = '\0'; + + return rpath; +} + +/* Support notifying the simulator about new objects. */ +void internal_function +_dl_after_load (struct link_map *l) +{ + int shift; + char pathbuf[PATH_MAX]; + char *path; + + /* Don't bother if not in the simulator. */ + if (__insn_mfspr (SPR_SIM_CONTROL) == 0) + return; + +#define DLPUTC(c) __insn_mtspr (SPR_SIM_CONTROL, \ + (SIM_CONTROL_DLOPEN \ + | ((c) << _SIM_CONTROL_OPERATOR_BITS))) + + /* Write the library address in hex. */ + DLPUTC ('0'); + DLPUTC ('x'); + for (shift = (int) sizeof (unsigned long) * 8 - 4; shift >= 0; shift -= 4) + DLPUTC ("0123456789abcdef"[(l->l_map_start >> shift) & 0xF]); + DLPUTC (':'); + + /* Write the library path, including the terminating '\0'. */ + path = dl_realpath (l->l_name, pathbuf) ?: l->l_name; + for (size_t i = 0;; i++) + { + DLPUTC (path[i]); + if (path[i] == '\0') + break; + } +#undef DLPUTC +} + +/* Support notifying the simulator about removed objects prior to munmap(). */ +static void +sim_dlclose (ElfW(Addr) map_start) +{ + int shift; + + /* Don't bother if not in the simulator. */ + if (__insn_mfspr (SPR_SIM_CONTROL) == 0) + return; + +#define DLPUTC(c) __insn_mtspr (SPR_SIM_CONTROL, \ + (SIM_CONTROL_DLCLOSE \ + | ((c) << _SIM_CONTROL_OPERATOR_BITS))) + + /* Write the library address in hex. */ + DLPUTC ('0'); + DLPUTC ('x'); + for (shift = (int) sizeof (unsigned long) * 8 - 4; shift >= 0; shift -= 4) + DLPUTC ("0123456789abcdef"[(map_start >> shift) & 0xF]); + DLPUTC ('\0'); + +#undef DLPUTC +} + +void internal_function +_dl_unmap (struct link_map *map) +{ + sim_dlclose (map->l_map_start); + _dl_unmap_segments (map); +} diff --git a/REORG.TODO/sysdeps/tile/dl-start.S b/REORG.TODO/sysdeps/tile/dl-start.S new file mode 100644 index 0000000000..4f807a9e64 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/dl-start.S @@ -0,0 +1,114 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* Get address of "sym" in "reg" assuming r51 holds ".Llink". */ + .macro pic_addr reg, sym +#ifdef __tilegx__ + moveli \reg, hw1_last(\sym - .Llink) + shl16insli \reg, \reg, hw0(\sym - .Llink) + ADD_PTR \reg, r51, \reg +#else + ADDLI_PTR \reg, r51, lo16(\sym - .Llink) + auli \reg, \reg, ha16(\sym - .Llink) +#endif + .endm + + .text +ENTRY (_start) + /* Linux starts us with sp pointing at the conventional Elf layout, + but we need to allow two 'caller' words for our ABI convention. */ + { + move r52, sp + andi sp, sp, -8 + } + cfi_def_cfa_register (r52) + { + /* Point sp at base of ABI area; point r4 to the caller-sp word. */ + ADDI_PTR sp, sp, -(2 * REGSIZE) + ADDI_PTR r4, sp, -REGSIZE + } + { + /* Save zero for caller sp in our 'caller' save area, and make + sure lr has a zero value, to limit backtraces. */ + move lr, zero + ST r4, zero + } + { + move r0, r52 + jal _dl_start + } + /* Save returned start of user program address for later. */ + move r50, r0 + + /* See if we were invoked explicitly with the dynamic loader, + in which case we have to adjust the argument vector. */ + lnk r51; .Llink: + pic_addr r4, _dl_skip_args + LD4U r4, r4 + BEQZT r4, .Lno_skip + + /* Load the argc word at the initial sp and adjust it. + We basically jump "sp" up over the first few argv entries + and write "argc" a little higher up in memory, to be the + base of the new kernel-initialized stack area. */ + LD_PTR r0, r52 + { + sub r0, r0, r4 + SHL_PTR_ADD r52, r4, r52 + } + { + ST_PTR r52, r0 + SHL_PTR_ADD sp, r4, sp + } + andi sp, sp, -8 + +.Lno_skip: + /* Call_dl_init (_dl_loaded, argc, argv, envp). See elf/start.s + for the layout of memory here; r52 is pointing to "+0". */ + pic_addr r0, _rtld_local + { + LD_PTR r1, r52 /* load argc in r1 */ + ADDLI_PTR r2, r52, __SIZEOF_POINTER__ /* point r2 at argv */ + } + { + LD_PTR r0, r0 /* yields _rtld_global._ns_loaded */ + addi r3, r1, 1 + move lr, zero + } + { + SHL_PTR_ADD r3, r3, r2 /* point r3 at envp */ + jal _dl_init + } + + /* Call user program whose address we saved in r50. + We invoke it just like a static binary, but with _dl_fini + in r0 so we can distinguish. */ + + pic_addr r0, _dl_fini + move lr, zero + { + move sp, r52 + jr r50 + } + + /* Tell backtracer to give up (_start has no caller). */ + info 2 /* INFO_OP_CANNOT_BACKTRACE */ + +END (_start) diff --git a/REORG.TODO/sysdeps/tile/dl-tls.c b/REORG.TODO/sysdeps/tile/dl-tls.c new file mode 100644 index 0000000000..eed83e2bb1 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/dl-tls.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef SHARED +/* We provide a fast-path version of __tls_get_addr to allow for + the normal case to be fast, both by coding the function tightly, + and more importantly by fixing its register clobber API so the + compiler can avoid having to set up frames, etc., unnecessarily. */ +#define __tls_get_addr __tls_get_addr_slow +#endif + +#include <elf/dl-tls.c> diff --git a/REORG.TODO/sysdeps/tile/dl-tls.h b/REORG.TODO/sysdeps/tile/dl-tls.h new file mode 100644 index 0000000000..6456aa9319 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/dl-tls.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + + +/* Type used for the representation of TLS information in the GOT. */ +typedef struct +{ + unsigned long int ti_module; + unsigned long int ti_offset; +} tls_index; + +/* Fast-path function to get a TLS pointer. */ +extern void *__tls_get_addr (tls_index *ti); + +/* The thread pointer points to the first static TLS block. */ +#define TLS_TP_OFFSET 0 + +/* Dynamic thread vector pointers at the start of each TLS block. */ +#define TLS_DTV_OFFSET 0 + +/* Compute the value for a GOTTPREL reloc. */ +#define TLS_TPREL_VALUE(sym_map, sym) \ + ((sym_map)->l_tls_offset + (sym)->st_value - TLS_TP_OFFSET) + +/* Compute the value for a DTPREL reloc. */ +#define TLS_DTPREL_VALUE(sym) \ + ((sym)->st_value - TLS_DTV_OFFSET) diff --git a/REORG.TODO/sysdeps/tile/dl-trampoline.S b/REORG.TODO/sysdeps/tile/dl-trampoline.S new file mode 100644 index 0000000000..fd7d63d76b --- /dev/null +++ b/REORG.TODO/sysdeps/tile/dl-trampoline.S @@ -0,0 +1,193 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <arch/abi.h> + +/* This function is called via the PLT header, which is called + from an individual PLT entry. + + At this point we have several values passed in: + + lr: return address to original user code + r28: the tpnt value to pass to _dl_runtime_resolver + r29: the PLT index of the invoked jump table entry. + + We set up a frame entry that looks like this (in int_reg_t units): + + +57: r25 return values from function... + +56: r24 + [...] + +33: r1 + +32: r0 + +31: PLT index + +30: tpnt + +29: stackframe + +28: caller lr + +27: r25 arguments to function... + +26: r24 + [...] + +3: r1 + +2: r0 + +1: standard ABI slot (sp) + +0: standard ABI slot (callee lr) + + The entries from "stackframe" up are only used in _dl_profile_resolve. + We save and restore r0 through r25, rather than the strictly + architected r0 through r9, to support unusual calling conventions; + for example, __tls_get_addr takes r0 and returns r0, but promises + not to clobber r1 through r24 to support its usual fast path. */ + +#define FRAME_SP (1 * REGSIZE) +#define FRAME_REGS (2 * REGSIZE) +#define FRAME_LR (28 * REGSIZE) /* Must follow FRAME_REGS */ +#define FRAME_STACKFRAME (29 * REGSIZE) +#define FRAME_TPNT (30 * REGSIZE) +#define FRAME_INDEX (31 * REGSIZE) +#define FRAME_RETVAL (32 * REGSIZE) + +#define FRAME_SIZE_SMALL (30 * REGSIZE) +#define FRAME_SIZE_LARGE (58 * REGSIZE) + +#define FOR_EACH_REG(f) \ + f(r0); f(r1); f(r2); f(r3); \ + f(r4); f(r5); f(r6); f(r7); \ + f(r8); f(r9); f(r10); f(r11); \ + f(r12); f(r13); f(r14); f(r15); \ + f(r16); f(r17); f(r18); f(r19); \ + f(r20); f(r21); f(r22); f(r23); \ + f(r24); f(r25) + +#define SAVE(REG) { ST r27, REG; ADDI_PTR r27, r27, REGSIZE } +#define RESTORE(REG) { LD REG, r27; ADDI_PTR r27, r27, REGSIZE } + + .macro dl_resolve, name, profile, framesize +.text +.global \name +.hidden \name +/* Note that cpp expands ENTRY(\name) incorrectly. */ +.type \name,@function +.align 8 +\name: + cfi_startproc + { + ST sp, lr + move r26, sp + } + { + ADDLI_PTR sp, sp, -\framesize + ADDLI_PTR r27, sp, FRAME_SP - \framesize + } + cfi_def_cfa_offset (\framesize) + { + ST r27, r26 + ADDI_PTR r27, r27, FRAME_REGS - FRAME_SP + } + FOR_EACH_REG(SAVE) + { + ST r27, lr + ADDLI_PTR r27, sp, FRAME_TPNT + } + cfi_offset (lr, FRAME_LR - \framesize) + .if \profile + { + move r0, r28 /* tpnt value */ + ST r27, r28 + ADDI_PTR r27, r27, FRAME_INDEX - FRAME_TPNT + } + { + move r1, r29 /* PLT index */ + ST r27, r29 + } + { + move r2, lr /* retaddr */ + ADDI_PTR r3, sp, FRAME_REGS /* La_tile_regs pointer */ + } + { + ADDLI_PTR r4, sp, FRAME_STACKFRAME /* framesize pointer */ + jal _dl_profile_fixup + } + ADDLI_PTR r28, sp, FRAME_STACKFRAME + LD_PTR r28, r28 + BGTZ r28, 1f + .else + { + move r0, r28 /* tpnt value 1 */ + move r1, r29 /* PLT index 2 */ + } + jal _dl_fixup + .endif + { + /* Copy aside the return value so we can restore r0 below. */ + move r29, r0 + /* Set up r27 to let us start restoring registers. */ + ADDLI_PTR r27, sp, FRAME_REGS + } + FOR_EACH_REG(RESTORE) + .if \profile + ADDLI_PTR r28, sp, FRAME_STACKFRAME + LD r28, r28 + BGTZ r28, 1f + .endif + { + /* Restore original user return address. */ + LD lr, r27 + /* Pop off our stack frame. */ + ADDLI_PTR sp, sp, \framesize + } + cfi_def_cfa_offset (0) + jr r29 /* Transfer control to freshly loaded code. */ + jrp lr /* Keep backtracer happy. */ + + .if \profile +1: jalr r29 /* Call resolved function. */ + { + ADDLI_PTR r28, sp, FRAME_TPNT + ADDLI_PTR r27, sp, FRAME_RETVAL + } + FOR_EACH_REG(SAVE) + { + LD r0, r28 + ADDI_PTR r28, r28, FRAME_INDEX - FRAME_TPNT + } + { + LD r1, r28 + ADDLI_PTR r2, sp, FRAME_REGS + } + { + ADDLI_PTR r3, sp, FRAME_RETVAL + jal _dl_call_pltexit + } + { + ADDLI_PTR lr, sp, FRAME_LR + ADDLI_PTR r27, sp, FRAME_RETVAL + } + FOR_EACH_REG(RESTORE) + { + LD lr, lr + ADDLI_PTR sp, sp, \framesize + } + jrp lr + .endif +END (\name) + .endm + + dl_resolve _dl_runtime_resolve, 0, FRAME_SIZE_SMALL +#ifndef PROF + dl_resolve _dl_runtime_profile, 1, FRAME_SIZE_LARGE +#endif diff --git a/REORG.TODO/sysdeps/tile/ffs.c b/REORG.TODO/sysdeps/tile/ffs.c new file mode 100644 index 0000000000..90e65d799c --- /dev/null +++ b/REORG.TODO/sysdeps/tile/ffs.c @@ -0,0 +1,36 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <limits.h> +#define ffsl __something_else +#include <string.h> + +#undef ffs +int +__ffs (int x) +{ + return __builtin_ffs (x); +} +weak_alias (__ffs, ffs) +libc_hidden_def (__ffs) +libc_hidden_builtin_def (ffs) + +#if ULONG_MAX == UINT_MAX +#undef ffsl +weak_alias (__ffs, ffsl) +#endif diff --git a/REORG.TODO/sysdeps/tile/ffsll.c b/REORG.TODO/sysdeps/tile/ffsll.c new file mode 100644 index 0000000000..5d2b8a0129 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/ffsll.c @@ -0,0 +1,32 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <limits.h> +#define ffsl __something_else +#include <string.h> + +#undef ffsll +int +ffsll (long long x) +{ + return __builtin_ffsll (x); +} + +#if ULONG_MAX > UINT_MAX +#undef ffsl +weak_alias (ffsll, ffsl) +#endif diff --git a/REORG.TODO/sysdeps/tile/gccframe.h b/REORG.TODO/sysdeps/tile/gccframe.h new file mode 100644 index 0000000000..de9b2b1c9b --- /dev/null +++ b/REORG.TODO/sysdeps/tile/gccframe.h @@ -0,0 +1,21 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define FIRST_PSEUDO_REGISTER 64 + +#include <sysdeps/generic/gccframe.h> diff --git a/REORG.TODO/sysdeps/tile/jmpbuf-offsets.h b/REORG.TODO/sysdeps/tile/jmpbuf-offsets.h new file mode 100644 index 0000000000..b84dd05fa4 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/jmpbuf-offsets.h @@ -0,0 +1,62 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* We don't use most of these symbols; they are here for documentation. */ +#define JB_R30 0 +#define JB_R31 1 +#define JB_R32 2 +#define JB_R33 3 +#define JB_R34 4 +#define JB_R35 5 +#define JB_R36 6 +#define JB_R37 7 +#define JB_R38 8 +#define JB_R39 9 +#define JB_R40 10 +#define JB_R41 11 +#define JB_R42 12 +#define JB_R43 13 +#define JB_R44 14 +#define JB_R45 15 +#define JB_R46 16 +#define JB_R47 17 +#define JB_R48 18 +#define JB_R49 19 +#define JB_R50 20 +#define JB_R51 21 +#define JB_FP 22 /* r52 */ +#define JB_TP 23 /* r53 */ +#define JB_SP 24 /* r54 */ +#define JB_PC 25 /* normally LR, r55 */ +#define JB_ICS 26 /* interrupt critical section bit */ + +/* We save space for some extra state to accommodate future changes. */ +#define JB_LEN 32 /* number of words */ + +#define JB_SIZE (JB_LEN * REGSIZE) + +/* Helper macro used by all the setjmp/longjmp assembly code. */ +#define FOR_EACH_CALLEE_SAVED_REG(f) \ + .no_require_canonical_reg_names; f(r30); f(r31); \ + f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \ + f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \ + f(r48); f(r49); f(r50); f(r51); f(r52); f(r53); f(r54); f(r55) + +/* Helper for generic ____longjmp_chk(). */ +#define JB_FRAME_ADDRESS(buf) \ + ((void *) (unsigned long) (buf[JB_SP])) diff --git a/REORG.TODO/sysdeps/tile/jmpbuf-unwind.h b/REORG.TODO/sysdeps/tile/jmpbuf-unwind.h new file mode 100644 index 0000000000..eccb635d16 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/jmpbuf-unwind.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + Based on work contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <setjmp.h> +#include <jmpbuf-offsets.h> +#include <stdint.h> +#include <unwind.h> +#include <sysdep.h> + +/* Test if longjmp to JMPBUF would unwind the frame + containing a local variable at ADDRESS. */ +#define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \ + ((void *) (address) < (void *) demangle ((jmpbuf)[JB_SP])) + +#define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \ + _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) (long) _Unwind_GetCFA (_context), _adj) + +static inline uintptr_t __attribute__ ((unused)) +_jmpbuf_sp (__jmp_buf regs) +{ + uintptr_t sp = regs[JB_SP]; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (sp); +#endif + return sp; +} + +#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \ + ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj)) + +/* We use the normal longjmp for unwinding. */ +#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val) diff --git a/REORG.TODO/sysdeps/tile/ldsodefs.h b/REORG.TODO/sysdeps/tile/ldsodefs.h new file mode 100644 index 0000000000..f7c2a44c80 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/ldsodefs.h @@ -0,0 +1,40 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _TILE_LDSODEFS_H +#define _TILE_LDSODEFS_H 1 + +#include <elf.h> + +struct La_tile_regs; +struct La_tile_retval; + +#define ARCH_PLTENTER_MEMBERS \ + ElfW(Addr) (*tile_gnu_pltenter) (ElfW(Sym) *, unsigned int, uintptr_t *, \ + uintptr_t *, struct La_tile_regs *, \ + unsigned int *, const char *, \ + long int *) + +#define ARCH_PLTEXIT_MEMBERS \ + ElfW(Addr) (*tile_gnu_pltexit) (ElfW(Sym) *, unsigned int, uintptr_t *, \ + uintptr_t *, const struct La_tile_regs *, \ + struct La_tile_retval *, const char *) + +#include_next <ldsodefs.h> + +#endif diff --git a/REORG.TODO/sysdeps/tile/libm-test-ulps b/REORG.TODO/sysdeps/tile/libm-test-ulps new file mode 100644 index 0000000000..f1a01fdb30 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/libm-test-ulps @@ -0,0 +1,394 @@ +# Begin of automatic generation + +# Maximal error of functions: +Function: "acos": +float: 1 +ifloat: 1 + +Function: "acosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: "asin": +float: 1 +ifloat: 1 + +Function: "asinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: "atan": +float: 1 +ifloat: 1 + +Function: "atan2": +float: 1 +ifloat: 1 + +Function: "atanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: "cabs": +double: 1 +idouble: 1 + +Function: Real part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 + +Function: Imaginary part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 + +Function: Real part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 + +Function: Imaginary part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 + +Function: "carg": +float: 1 +ifloat: 1 + +Function: Real part of "casin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Imaginary part of "casin": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 + +Function: Real part of "casinh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 + +Function: Imaginary part of "casinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Real part of "catan": +float: 1 +ifloat: 1 + +Function: Imaginary part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Real part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Imaginary part of "catanh": +float: 1 +ifloat: 1 + +Function: "cbrt": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 + +Function: Real part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Imaginary part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Real part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Imaginary part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Real part of "cexp": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 + +Function: Imaginary part of "cexp": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 + +Function: Real part of "clog": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 + +Function: Imaginary part of "clog": +float: 1 +ifloat: 1 + +Function: Real part of "clog10": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 + +Function: Imaginary part of "clog10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: "cos": +float: 1 +ifloat: 1 + +Function: "cosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Real part of "cpow": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 + +Function: Imaginary part of "cpow": +float: 2 +ifloat: 2 + +Function: Real part of "csin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Real part of "csinh": +float: 1 +ifloat: 1 + +Function: Imaginary part of "csinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Real part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: Imaginary part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: Real part of "ctan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: Imaginary part of "ctan": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 + +Function: Real part of "ctanh": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 + +Function: Imaginary part of "ctanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: "erf": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: "erfc": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 + +Function: "exp": +float: 1 +ifloat: 1 + +Function: "exp10": +double: 2 +idouble: 2 + +Function: "exp2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: "expm1": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: "gamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 + +Function: "hypot": +double: 1 +idouble: 1 + +Function: "j0": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: "j1": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 + +Function: "jn": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 + +Function: "lgamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 + +Function: "log": +float: 1 +ifloat: 1 + +Function: "log10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: "log1p": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 + +Function: "log2": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 + +Function: "pow": +float: 3 +ifloat: 3 + +Function: "pow10": +double: 2 +idouble: 2 + +Function: "sin": +float: 1 +ifloat: 1 + +Function: "sincos": +float: 1 +ifloat: 1 + +Function: "sinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: "tan": +float: 1 +ifloat: 1 + +Function: "tanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 + +Function: "tgamma": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 + +Function: "y0": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 + +Function: "y1": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 + +Function: "yn": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 + +# end of automatic generation diff --git a/REORG.TODO/sysdeps/tile/libm-test-ulps-name b/REORG.TODO/sysdeps/tile/libm-test-ulps-name new file mode 100644 index 0000000000..69673dcaa2 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/libm-test-ulps-name @@ -0,0 +1 @@ +Tile diff --git a/REORG.TODO/sysdeps/tile/machine-gmon.h b/REORG.TODO/sysdeps/tile/machine-gmon.h new file mode 100644 index 0000000000..105afd92e1 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/machine-gmon.h @@ -0,0 +1,25 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define _MCOUNT_DECL(from, self) \ + void __mcount_internal (u_long from, u_long self) + +/* Call __mcount_internal with our the return PC for our caller, and + the return PC our caller will return to. Empty since we use an + assembly stub instead. */ +#define MCOUNT diff --git a/REORG.TODO/sysdeps/tile/math-tests.h b/REORG.TODO/sysdeps/tile/math-tests.h new file mode 100644 index 0000000000..fb43494aa2 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/math-tests.h @@ -0,0 +1,30 @@ +/* Configuration for math tests. Tile version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Tile hardware/softfloat does not support exceptions and rounding modes. */ +#define ROUNDING_TESTS_float(MODE) ((MODE) == FE_TONEAREST) +#define ROUNDING_TESTS_double(MODE) ((MODE) == FE_TONEAREST) +#define ROUNDING_TESTS_long_double(MODE) ((MODE) == FE_TONEAREST) +#define EXCEPTION_TESTS_float 0 +#define EXCEPTION_TESTS_double 0 +#define EXCEPTION_TESTS_long_double 0 + +/* Tile hardware/softfloat floating-point ops do not preserve NaN payloads. */ +#define SNAN_TESTS_PRESERVE_PAYLOAD 0 + +#include_next <math-tests.h> diff --git a/REORG.TODO/sysdeps/tile/math_private.h b/REORG.TODO/sysdeps/tile/math_private.h new file mode 100644 index 0000000000..99daec4093 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/math_private.h @@ -0,0 +1,44 @@ +#ifndef TILE_MATH_PRIVATE_H +#define TILE_MATH_PRIVATE_H 1 + +/* Internally, we suppress any use of exception or rounding other + than what is supported by the hardware. This does mean that some + code will silently fail to report exceptions, set rounding mode + as expected, etc., but it allows math code to compile that otherwise + wouldn't (such as math/s_fma.c) and so is valuable. + + We intentionally ignore the "exception" arguments of functions that + take an exception, since we can't even evaluate the argument + without causing a build failure. The extra level of statement + expression wrapping avoids "statement with no effect" warnings. + Since the callers don't check for errors anyway, we just claim + success in every case. + + The overrides for libc_ functions must happen before we include + the generic math_private.h, and the overrides for regular + <fenv.h> functions must happen afterwards, to avoid clashing with + the declarations of those functions. */ + +#define libc_fesetround(rnd) ({ 0; }) +#define libc_fetestexcept(exc) ({ 0; }) +#define libc_feholdexcept_setround(env, exc) ({ (void) (env); 0; }) +#define libc_feupdateenv_test(env, exc) ({ (void) (env); 0; }) + +#include_next <math_private.h> + +#define feraiseexcept(excepts) ({ 0; }) +#define __feraiseexcept(excepts) ({ 0; }) +#define feclearexcept(exc) ({ 0; }) +#define fetestexcept(exc) ({ 0; }) +extern inline int fegetenv (fenv_t *__e) { return 0; } +extern inline int __fegetenv (fenv_t *__e) { return 0; } +extern inline int fesetenv (const fenv_t *__e) { return 0; } +extern inline int __fesetenv (const fenv_t *__e) { return 0; } +extern inline int feupdateenv (const fenv_t *__e) { return 0; } +extern inline int __feupdateenv (const fenv_t *__e) { return 0; } +extern inline int fegetround (void) { return FE_TONEAREST; } +extern inline int __fegetround (void) { return FE_TONEAREST; } +extern inline int fesetround (int __d) { return 0; } +extern inline int __fesetround (int __d) { return 0; } + +#endif diff --git a/REORG.TODO/sysdeps/tile/memcmp.c b/REORG.TODO/sysdeps/tile/memcmp.c new file mode 100644 index 0000000000..83c62be059 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/memcmp.c @@ -0,0 +1,357 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#undef __ptr_t +#define __ptr_t void * + +#if defined HAVE_STRING_H || defined _LIBC +# include <string.h> +#endif + +#undef memcmp + +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + +#ifdef _LIBC + +# include <memcopy.h> +# include <endian.h> + +# if __BYTE_ORDER == __BIG_ENDIAN +# define WORDS_BIGENDIAN +# endif + +#else /* Not in the GNU C library. */ + +# include <sys/types.h> + +/* Type to use for aligned memory operations. + This should normally be the biggest type supported by a single load + and store. Must be an unsigned type. */ +# define op_t unsigned long int +# define OPSIZ (sizeof(op_t)) + +/* Threshold value for when to enter the unrolled loops. */ +# define OP_T_THRES 16 + +/* Type to use for unaligned operations. */ +typedef unsigned char byte; + +#endif /* In the GNU C library. */ + +/* Provide the appropriate builtins to shift two registers based on + the alignment of a pointer held in a third register, and to reverse + the bytes in a word. */ +#ifdef __tilegx__ +#define DBLALIGN __insn_dblalign +#define REVBYTES __insn_revbytes +#else +#define DBLALIGN __insn_dword_align +#define REVBYTES __insn_bytex +#endif + +#ifdef WORDS_BIGENDIAN +# define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) +#else +# define CMP_LT_OR_GT(a, b) (REVBYTES(a) > REVBYTES(b) ? 1 : -1) +#endif + +/* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */ + +/* The strategy of this memcmp is: + + 1. Compare bytes until one of the block pointers is aligned. + + 2. Compare using memcmp_common_alignment or + memcmp_not_common_alignment, regarding the alignment of the other + block after the initial byte operations. The maximum number of + full words (of type op_t) are compared in this way. + + 3. Compare the few remaining bytes. */ + +static int memcmp_common_alignment (long, long, size_t) __THROW; + +/* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' + objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for + memory operations on `op_t's. */ +static int +memcmp_common_alignment (long int srcp1, long int srcp2, size_t len) +{ + op_t a0, a1; + op_t b0, b1; + + switch (len % 4) + { + default: /* Avoid warning about uninitialized local variables. */ + case 2: + a0 = ((op_t *) srcp1)[0]; + b0 = ((op_t *) srcp2)[0]; + srcp1 += OPSIZ; + srcp2 += OPSIZ; + len += 2; + goto do1; + case 3: + a1 = ((op_t *) srcp1)[0]; + b1 = ((op_t *) srcp2)[0]; + srcp1 += OPSIZ; + srcp2 += OPSIZ; + len += 1; + goto do2; + case 0: + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + return 0; + a0 = ((op_t *) srcp1)[0]; + b0 = ((op_t *) srcp2)[0]; + srcp1 += OPSIZ; + srcp2 += OPSIZ; + goto do3; + case 1: + a1 = ((op_t *) srcp1)[0]; + b1 = ((op_t *) srcp2)[0]; + srcp1 += OPSIZ; + srcp2 += OPSIZ; + len -= 1; + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + goto do0; + /* Fall through. */ + } + + do + { + a0 = ((op_t *) srcp1)[0]; + b0 = ((op_t *) srcp2)[0]; + srcp1 += OPSIZ; + srcp2 += OPSIZ; + if (__glibc_likely (a1 != b1)) + return CMP_LT_OR_GT (a1, b1); + + do3: + a1 = ((op_t *) srcp1)[0]; + b1 = ((op_t *) srcp2)[0]; + srcp1 += OPSIZ; + srcp2 += OPSIZ; + if (__glibc_likely (a0 != b0)) + return CMP_LT_OR_GT (a0, b0); + + do2: + a0 = ((op_t *) srcp1)[0]; + b0 = ((op_t *) srcp2)[0]; + srcp1 += OPSIZ; + srcp2 += OPSIZ; + if (__glibc_likely (a1 != b1)) + return CMP_LT_OR_GT (a1, b1); + + do1: + a1 = ((op_t *) srcp1)[0]; + b1 = ((op_t *) srcp2)[0]; + srcp1 += OPSIZ; + srcp2 += OPSIZ; + if (__glibc_likely (a0 != b0)) + return CMP_LT_OR_GT (a0, b0); + + len -= 4; + } + while (len != 0); + + /* This is the right position for do0. Please don't move + it into the loop. */ + do0: + if (__glibc_likely (a1 != b1)) + return CMP_LT_OR_GT (a1, b1); + return 0; +} + +static int memcmp_not_common_alignment (long, long, size_t) __THROW; + +/* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN + `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory + operations on `op_t', but SRCP1 *should be unaligned*. */ +static int +memcmp_not_common_alignment (long int srcp1, long int srcp2, size_t len) +{ + void * srcp1i; + op_t a0, a1, a2, a3; + op_t b0, b1, b2, b3; + op_t x; + + /* Calculate how to shift a word read at the memory operation + aligned srcp1 to make it aligned for comparison. */ + + srcp1i = (void *) srcp1; + + /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t' + it points in the middle of. */ + srcp1 &= -OPSIZ; + + switch (len % 4) + { + default: /* Avoid warning about uninitialized local variables. */ + case 2: + a1 = ((op_t *) srcp1)[0]; + a2 = ((op_t *) srcp1)[1]; + b2 = ((op_t *) srcp2)[0]; + srcp1 += 2 * OPSIZ; + srcp2 += 1 * OPSIZ; + len += 2; + goto do1; + case 3: + a0 = ((op_t *) srcp1)[0]; + a1 = ((op_t *) srcp1)[1]; + b1 = ((op_t *) srcp2)[0]; + srcp1 += 2 * OPSIZ; + srcp2 += 1 * OPSIZ; + len += 1; + goto do2; + case 0: + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + return 0; + a3 = ((op_t *) srcp1)[0]; + a0 = ((op_t *) srcp1)[1]; + b0 = ((op_t *) srcp2)[0]; + srcp1 += 2 * OPSIZ; + srcp2 += 1 * OPSIZ; + goto do3; + case 1: + a2 = ((op_t *) srcp1)[0]; + a3 = ((op_t *) srcp1)[1]; + b3 = ((op_t *) srcp2)[0]; + srcp1 += 2 * OPSIZ; + srcp2 += 1 * OPSIZ; + len -= 1; + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + goto do0; + /* Fall through. */ + } + + do + { + a0 = ((op_t *) srcp1)[0]; + b0 = ((op_t *) srcp2)[0]; + x = DBLALIGN (a2, a3, srcp1i); + srcp1 += OPSIZ; + srcp2 += OPSIZ; + if (__glibc_likely (x != b3)) + return CMP_LT_OR_GT (x, b3); + + do3: + a1 = ((op_t *) srcp1)[0]; + b1 = ((op_t *) srcp2)[0]; + x = DBLALIGN (a3, a0, srcp1i); + srcp1 += OPSIZ; + srcp2 += OPSIZ; + if (__glibc_likely (x != b0)) + return CMP_LT_OR_GT (x, b0); + + do2: + a2 = ((op_t *) srcp1)[0]; + b2 = ((op_t *) srcp2)[0]; + x = DBLALIGN (a0, a1, srcp1i); + srcp1 += OPSIZ; + srcp2 += OPSIZ; + if (__glibc_likely (x != b1)) + return CMP_LT_OR_GT (x, b1); + + do1: + a3 = ((op_t *) srcp1)[0]; + b3 = ((op_t *) srcp2)[0]; + x = DBLALIGN (a1, a2, srcp1i); + srcp1 += OPSIZ; + srcp2 += OPSIZ; + if (__glibc_likely (x != b2)) + return CMP_LT_OR_GT (x, b2); + + len -= 4; + } + while (len != 0); + + /* This is the right position for do0. Please don't move + it into the loop. */ + do0: + x = DBLALIGN (a2, a3, srcp1i); + if (__glibc_likely (x != b3)) + return CMP_LT_OR_GT (x, b3); + return 0; +} + +int +MEMCMP (const __ptr_t s1, const __ptr_t s2, size_t len) +{ + op_t a0; + op_t b0; + long int srcp1 = (long int) s1; + long int srcp2 = (long int) s2; + int res; + + if (len >= OP_T_THRES) + { + /* There are at least some bytes to compare. No need to test + for LEN == 0 in this alignment loop. */ + while (srcp2 % OPSIZ != 0) + { + a0 = ((byte *) srcp1)[0]; + b0 = ((byte *) srcp2)[0]; + srcp1 += 1; + srcp2 += 1; + res = a0 - b0; + if (__glibc_likely (res != 0)) + return res; + len -= 1; + } + + /* SRCP2 is now aligned for memory operations on `op_t'. + SRCP1 alignment determines if we can do a simple, + aligned compare or need to shuffle bits. */ + + if (srcp1 % OPSIZ == 0) + res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ); + else + res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ); + if (res != 0) + return res; + + /* Number of bytes remaining in the interval [0..OPSIZ-1]. */ + srcp1 += len & -OPSIZ; + srcp2 += len & -OPSIZ; + len %= OPSIZ; + } + + /* There are just a few bytes to compare. Use byte memory operations. */ + while (len != 0) + { + a0 = ((byte *) srcp1)[0]; + b0 = ((byte *) srcp2)[0]; + srcp1 += 1; + srcp2 += 1; + res = a0 - b0; + if (__glibc_likely (res != 0)) + return res; + len -= 1; + } + + return 0; +} +libc_hidden_builtin_def(memcmp) +#ifdef weak_alias +# undef bcmp +weak_alias (memcmp, bcmp) +#endif diff --git a/REORG.TODO/sysdeps/tile/memcopy.h b/REORG.TODO/sysdeps/tile/memcopy.h new file mode 100644 index 0000000000..d934165d98 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/memcopy.h @@ -0,0 +1,33 @@ +/* memcopy.h -- definitions for memory copy functions. Tile version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/generic/memcopy.h> +#include <bits/wordsize.h> + +/* The tilegx implementation of memcpy is safe to use for memmove. */ +#ifdef __tilegx__ +# undef MEMCPY_OK_FOR_FWD_MEMMOVE +# define MEMCPY_OK_FOR_FWD_MEMMOVE 1 +#endif + +/* Support more efficient copying on tilegx32, which supports + long long as a native 64-bit type. */ +#if defined (__tilegx__) && __WORDSIZE == 32 +# undef op_t +# define op_t unsigned long long int +#endif diff --git a/REORG.TODO/sysdeps/tile/nptl/Makefile b/REORG.TODO/sysdeps/tile/nptl/Makefile new file mode 100644 index 0000000000..b013406abb --- /dev/null +++ b/REORG.TODO/sysdeps/tile/nptl/Makefile @@ -0,0 +1,20 @@ +# Copyright (C) 2002-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library. If not, see +# <http://www.gnu.org/licenses/>. + +ifeq ($(subdir),csu) +gen-as-const-headers += tcb-offsets.sym +endif diff --git a/REORG.TODO/sysdeps/tile/nptl/bits/pthreadtypes-arch.h b/REORG.TODO/sysdeps/tile/nptl/bits/pthreadtypes-arch.h new file mode 100644 index 0000000000..145ee42ddb --- /dev/null +++ b/REORG.TODO/sysdeps/tile/nptl/bits/pthreadtypes-arch.h @@ -0,0 +1,79 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Based on work contributed by Ulrich Drepper <drepper@redhat.com>, 2002. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_PTHREADTYPES_ARCH_H +#define _BITS_PTHREADTYPES_ARCH_H 1 + +#include <bits/wordsize.h> + +#if __WORDSIZE == 64 +# define __SIZEOF_PTHREAD_MUTEX_T 40 +# define __SIZEOF_PTHREAD_ATTR_T 56 +# define __SIZEOF_PTHREAD_RWLOCK_T 56 +# define __SIZEOF_PTHREAD_BARRIER_T 32 +#else +# define __SIZEOF_PTHREAD_MUTEX_T 24 +# define __SIZEOF_PTHREAD_ATTR_T 36 +# define __SIZEOF_PTHREAD_RWLOCK_T 32 +# define __SIZEOF_PTHREAD_BARRIER_T 20 +#endif +#define __SIZEOF_PTHREAD_MUTEXATTR_T 4 +#define __SIZEOF_PTHREAD_COND_T 48 +#define __SIZEOF_PTHREAD_CONDATTR_T 4 +#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 +#define __SIZEOF_PTHREAD_BARRIERATTR_T 4 + +/* Data structure for mutex handling. */ +#define __PTHREAD_COMPAT_PADDING_MID +#define __PTHREAD_COMPAT_PADDING_END +#define __PTHREAD_MUTEX_LOCK_ELISION 0 + +#define __LOCK_ALIGNMENT +#define __ONCE_ALIGNMENT + +struct __pthread_rwlock_arch_t +{ + unsigned int __readers; + unsigned int __writers; + unsigned int __wrphase_futex; + unsigned int __writers_futex; + unsigned int __pad3; + unsigned int __pad4; +#if __WORDSIZE == 64 + int __cur_writer; + int __shared; + unsigned long int __pad1; + unsigned long int __pad2; + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned int __flags; +#else + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned char __flags; + unsigned char __shared; + unsigned char __pad1; + unsigned char __pad2; + int __cur_writer; +#endif +}; + +#define __PTHREAD_RWLOCK_ELISION_EXTRA 0 + +#endif /* bits/pthreadtypes.h */ diff --git a/REORG.TODO/sysdeps/tile/nptl/bits/semaphore.h b/REORG.TODO/sysdeps/tile/nptl/bits/semaphore.h new file mode 100644 index 0000000000..0728a0528a --- /dev/null +++ b/REORG.TODO/sysdeps/tile/nptl/bits/semaphore.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + Based on work contributed by Ulrich Drepper <drepper@redhat.com>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SEMAPHORE_H +# error "Never use <bits/semaphore.h> directly; include <semaphore.h> instead." +#endif + +#include <bits/wordsize.h> + +#if __WORDSIZE == 64 +# define __SIZEOF_SEM_T 32 +#else +# define __SIZEOF_SEM_T 16 +#endif + + +/* Value returned if `sem_open' failed. */ +#define SEM_FAILED ((sem_t *) 0) + + +typedef union +{ + char __size[__SIZEOF_SEM_T]; + long int __align; +} sem_t; diff --git a/REORG.TODO/sysdeps/tile/nptl/pthread_spin_lock.c b/REORG.TODO/sysdeps/tile/nptl/pthread_spin_lock.c new file mode 100644 index 0000000000..16ef021de0 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/nptl/pthread_spin_lock.c @@ -0,0 +1,56 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "pthreadP.h" +#include <arch/spr_def.h> +#include <atomic.h> + +/* Bound point for bounded exponential backoff */ +#define BACKOFF_MAX 2048 + +/* Initial cycle delay for exponential backoff */ +#define BACKOFF_START 32 + +#ifdef __tilegx__ +/* Use cmpexch() after the initial fast-path exch to avoid + invalidating the cache line of the lock holder. */ +# define TNS(p) atomic_exchange_acq((p), 1) +# define CMPTNS(p) atomic_compare_and_exchange_val_acq((p), 1, 0) +#else +# define TNS(p) __insn_tns(p) +# define CMPTNS(p) __insn_tns(p) +# define SPR_CYCLE SPR_CYCLE_LOW /* The low 32 bits are sufficient. */ +#endif + +int +pthread_spin_lock (pthread_spinlock_t *lock) +{ + if (__builtin_expect (TNS (lock) != 0, 0)) + { + unsigned int backoff = BACKOFF_START; + while (CMPTNS (lock) != 0) + { + unsigned int start = __insn_mfspr (SPR_CYCLE); + while (__insn_mfspr (SPR_CYCLE) - start < backoff) + ; + if (backoff < BACKOFF_MAX) + backoff *= 2; + } + } + return 0; +} diff --git a/REORG.TODO/sysdeps/tile/nptl/pthread_spin_trylock.c b/REORG.TODO/sysdeps/tile/nptl/pthread_spin_trylock.c new file mode 100644 index 0000000000..c2d6766fd1 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/nptl/pthread_spin_trylock.c @@ -0,0 +1,32 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "pthreadP.h" +#include <errno.h> + +#ifdef __tilegx__ +#define TNS(p) __insn_exch4((p), 1) +#else +#define TNS(p) __insn_tns(p) +#endif + +int +pthread_spin_trylock (pthread_spinlock_t *lock) +{ + return (TNS (lock) == 0) ? 0 : EBUSY; +} diff --git a/REORG.TODO/sysdeps/tile/nptl/pthread_spin_unlock.c b/REORG.TODO/sysdeps/tile/nptl/pthread_spin_unlock.c new file mode 100644 index 0000000000..63bc814985 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/nptl/pthread_spin_unlock.c @@ -0,0 +1,33 @@ +/* pthread_spin_unlock -- unlock a spin lock. Tile version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "pthreadP.h" +#include <atomic.h> + +int +pthread_spin_unlock (pthread_spinlock_t *lock) +{ +#ifdef __tilegx__ + /* Use exchange() to bypass the write buffer. */ + atomic_exchange_rel (lock, 0); +#else + atomic_full_barrier (); + *lock = 0; +#endif + return 0; +} diff --git a/REORG.TODO/sysdeps/tile/nptl/pthreaddef.h b/REORG.TODO/sysdeps/tile/nptl/pthreaddef.h new file mode 100644 index 0000000000..791fa452e2 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/nptl/pthreaddef.h @@ -0,0 +1,36 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <string.h> + +/* Default stack size. */ +#define ARCH_STACK_DEFAULT_SIZE (2 * 1024 * 1024) + +/* Required stack pointer alignment at beginning. */ +#define STACK_ALIGN 16 + +/* Minimal stack size after allocating thread descriptor and guard size. */ +#define MINIMAL_REST_STACK 2048 + +/* Alignment requirement for TCB. */ +#define TCB_ALIGNMENT 16 + + +/* Location of current stack frame. */ +#define CURRENT_STACK_FRAME __builtin_frame_address (0) diff --git a/REORG.TODO/sysdeps/tile/nptl/tcb-offsets.sym b/REORG.TODO/sysdeps/tile/nptl/tcb-offsets.sym new file mode 100644 index 0000000000..0147ffafb7 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/nptl/tcb-offsets.sym @@ -0,0 +1,16 @@ +#define SHARED /* needed to get struct rtld_global from <ldsodefs.h> */ +#include <sysdep.h> +#include <tls.h> +#include <ldsodefs.h> + +-- + +-- Abuse tls.h macros to derive offsets relative to the thread register. +#define thread_offsetof(mem) (long)(offsetof(struct pthread, mem) - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE) + +MULTIPLE_THREADS_OFFSET thread_offsetof (header.multiple_threads) +TID_OFFSET thread_offsetof (tid) +POINTER_GUARD (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +FEEDBACK_DATA_OFFSET (offsetof (tcbhead_t, feedback_data) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +DTV_OFFSET (offsetof (tcbhead_t, dtv) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +TLS_GENERATION_OFFSET offsetof (struct rtld_global, _dl_tls_generation) diff --git a/REORG.TODO/sysdeps/tile/nptl/tls.h b/REORG.TODO/sysdeps/tile/nptl/tls.h new file mode 100644 index 0000000000..7314843cfb --- /dev/null +++ b/REORG.TODO/sysdeps/tile/nptl/tls.h @@ -0,0 +1,189 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _TLS_H +#define _TLS_H 1 + +# include <dl-sysdep.h> + +#ifndef __ASSEMBLER__ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <dl-dtv.h> + +#else /* __ASSEMBLER__ */ +# include <tcb-offsets.h> +#endif /* __ASSEMBLER__ */ + + +#ifndef __ASSEMBLER__ + +/* Get system call information. */ +# include <sysdep.h> + +/* The TP points to the start of the thread blocks. */ +# define TLS_DTV_AT_TP 1 +# define TLS_TCB_AT_TP 0 + +/* We use the multiple_threads field in the pthread struct */ +#define TLS_MULTIPLE_THREADS_IN_TCB 1 + +/* Get the thread descriptor definition. */ +# include <nptl/descr.h> + +/* The stack_guard is accessed directly by GCC -fstack-protector code, + so it is a part of public ABI. The dtv and pointer_guard fields + are private. */ +typedef struct +{ + void *feedback_data; + uintptr_t pointer_guard; + uintptr_t stack_guard; + dtv_t *dtv; +} tcbhead_t; + +/* This is the size of the initial TCB. Because our TCB is before the thread + pointer, we don't need this. */ +# define TLS_INIT_TCB_SIZE 0 + +/* Alignment requirements for the initial TCB. */ +# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread) + +/* This is the size of the TCB. Because our TCB is before the thread + pointer, we don't need this. */ +# define TLS_TCB_SIZE 0 + +/* Alignment requirements for the TCB. */ +# define TLS_TCB_ALIGN __alignof__ (struct pthread) + +/* This is the size we need before TCB - actually, it includes the TCB. */ +# define TLS_PRE_TCB_SIZE \ + (sizeof (struct pthread) \ + + ((sizeof (tcbhead_t) + TLS_TCB_ALIGN - 1) & ~(TLS_TCB_ALIGN - 1))) + +/* Return the thread descriptor (tp) for the current thread. */ +register void *__thread_pointer asm ("tp"); + +/* The thread pointer (in hardware register tp) points to the end of + the TCB. The pthread_descr structure is immediately in front of the TCB. */ +# define TLS_TCB_OFFSET 0 + +/* Install the dtv pointer. The pointer passed is to the element with + index -1 which contain the length. */ +# define INSTALL_DTV(tcbp, dtvp) \ + (((tcbhead_t *) (tcbp))[-1].dtv = (dtvp) + 1) + +/* Install new dtv for current thread. */ +# define INSTALL_NEW_DTV(dtv) (THREAD_DTV() = (dtv)) + +/* Return dtv of given thread descriptor. */ +# define GET_DTV(tcbp) (((tcbhead_t *) (tcbp))[-1].dtv) + +/* Code to initially initialize the thread pointer (tp). */ +# define TLS_INIT_TP(tcbp) \ + (__thread_pointer = (char *)(tcbp) + TLS_TCB_OFFSET, NULL) + +/* Value passed to 'clone' for initialization of the thread register. */ +# define TLS_DEFINE_INIT_TP(tp, pd) \ + void *tp = (void *) (pd) + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE + +/* Return the address of the dtv for the current thread. */ +# define THREAD_DTV() \ + (((tcbhead_t *) (__thread_pointer - TLS_TCB_OFFSET))[-1].dtv) + +/* Return the thread descriptor for the current thread. */ +# define THREAD_SELF \ + ((struct pthread *) (__thread_pointer \ + - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE)) + +/* Magic for libthread_db to know how to do THREAD_SELF. */ +#ifdef __tilegx__ +# define DB_THREAD_SELF \ + REGISTER (64, 64, REG_TP * 8, - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE) +#else +# define DB_THREAD_SELF \ + REGISTER (32, 32, REG_TP * 4, - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE) +#endif + +/* Read member of the thread descriptor directly. */ +# define THREAD_GETMEM(descr, member) (descr->member) + +/* Same as THREAD_GETMEM, but the member offset can be non-constant. */ +# define THREAD_GETMEM_NC(descr, member, idx) \ + (descr->member[idx]) + +/* Set member of the thread descriptor directly. */ +# define THREAD_SETMEM(descr, member, value) \ + (descr->member = (value)) + +/* Same as THREAD_SETMEM, but the member offset can be non-constant. */ +# define THREAD_SETMEM_NC(descr, member, idx, value) \ + (descr->member[idx] = (value)) + +/* Set the stack guard field in TCB head. */ +# define THREAD_SET_STACK_GUARD(value) \ + (((tcbhead_t *) ((char *) __thread_pointer \ + - TLS_TCB_OFFSET))[-1].stack_guard = (value)) +# define THREAD_COPY_STACK_GUARD(descr) \ + (((tcbhead_t *) ((char *) (descr) \ + + TLS_PRE_TCB_SIZE))[-1].stack_guard \ + = ((tcbhead_t *) ((char *) __thread_pointer \ + - TLS_TCB_OFFSET))[-1].stack_guard) + +/* Set the pointer guard field in TCB head. */ +# define THREAD_GET_POINTER_GUARD() \ + (((tcbhead_t *) ((char *) __thread_pointer \ + - TLS_TCB_OFFSET))[-1].pointer_guard) +# define THREAD_SET_POINTER_GUARD(value) \ + (THREAD_GET_POINTER_GUARD () = (value)) +# define THREAD_COPY_POINTER_GUARD(descr) \ + (((tcbhead_t *) ((char *) (descr) \ + + TLS_PRE_TCB_SIZE))[-1].pointer_guard \ + = THREAD_GET_POINTER_GUARD()) + +/* l_tls_offset == 0 is perfectly valid on Tile, so we have to use some + different value to mean unset l_tls_offset. */ +# define NO_TLS_OFFSET -1 + +/* Get and set the global scope generation counter in struct pthread. */ +#define THREAD_GSCOPE_FLAG_UNUSED 0 +#define THREAD_GSCOPE_FLAG_USED 1 +#define THREAD_GSCOPE_FLAG_WAIT 2 +#define THREAD_GSCOPE_RESET_FLAG() \ + do \ + { int __res \ + = atomic_exchange_rel (&THREAD_SELF->header.gscope_flag, \ + THREAD_GSCOPE_FLAG_UNUSED); \ + if (__res == THREAD_GSCOPE_FLAG_WAIT) \ + lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \ + } \ + while (0) +#define THREAD_GSCOPE_SET_FLAG() \ + do \ + { \ + THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED; \ + atomic_write_barrier (); \ + } \ + while (0) +#define THREAD_GSCOPE_WAIT() \ + GL(dl_wait_lookup_done) () + +#endif /* __ASSEMBLER__ */ + +#endif /* tls.h */ diff --git a/REORG.TODO/sysdeps/tile/preconfigure b/REORG.TODO/sysdeps/tile/preconfigure new file mode 100644 index 0000000000..f3e5d7edae --- /dev/null +++ b/REORG.TODO/sysdeps/tile/preconfigure @@ -0,0 +1,12 @@ +# This is a -*- sh -*- +case "$machine" in + tilepro) + base_machine=tile machine=tile/tilepro ;; + tilegx*) + base_machine=tile + if $CC $CFLAGS $CPPFLAGS -E -dM -xc /dev/null | grep -q __LP64__; then + machine=tile/tilegx/tilegx64 + else + machine=tile/tilegx/tilegx32 + fi ;; +esac diff --git a/REORG.TODO/sysdeps/tile/s_fma.c b/REORG.TODO/sysdeps/tile/s_fma.c new file mode 100644 index 0000000000..d9613fa67c --- /dev/null +++ b/REORG.TODO/sysdeps/tile/s_fma.c @@ -0,0 +1 @@ +#include <soft-fp/fmadf4.c> diff --git a/REORG.TODO/sysdeps/tile/s_fmaf.c b/REORG.TODO/sysdeps/tile/s_fmaf.c new file mode 100644 index 0000000000..aa5c9b2d91 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/s_fmaf.c @@ -0,0 +1 @@ +#include <soft-fp/fmasf4.c> diff --git a/REORG.TODO/sysdeps/tile/setjmp.S b/REORG.TODO/sysdeps/tile/setjmp.S new file mode 100644 index 0000000000..0321c10b43 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/setjmp.S @@ -0,0 +1,47 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <jmpbuf-offsets.h> + + .text + + /* Keep traditional entry points in with sigsetjmp(). */ +ENTRY(setjmp) + { movei r1, 1; j 1f } +END(setjmp) + +ENTRY(_setjmp) + { movei r1, 0; j 1f } +END(_setjmp) +libc_hidden_def (_setjmp) + +ENTRY(__sigsetjmp) + FEEDBACK_ENTER(__sigsetjmp) +1: + move r2, r0 + +#define SAVE(r) { ST r2, r ; ADDI_PTR r2, r2, REGSIZE } + FOR_EACH_CALLEE_SAVED_REG(SAVE) + + mfspr r3, INTERRUPT_CRITICAL_SECTION + ST r2, r3 + j plt(__sigjmp_save) + jrp lr /* Keep the backtracer happy. */ +END(__sigsetjmp) +hidden_def (__sigsetjmp) diff --git a/REORG.TODO/sysdeps/tile/sfp-machine.h b/REORG.TODO/sysdeps/tile/sfp-machine.h new file mode 100644 index 0000000000..1dc92cd4cf --- /dev/null +++ b/REORG.TODO/sysdeps/tile/sfp-machine.h @@ -0,0 +1,99 @@ +/* Machine-dependent software floating-point definitions, tile version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <bits/wordsize.h> + +#define _FP_W_TYPE_SIZE __WORDSIZE +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#if _FP_W_TYPE_SIZE == 64 + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_MUL_MEAT_DW_S(R,X,Y) \ + _FP_MUL_MEAT_DW_1_imm(_FP_WFRACBITS_S,R,X,Y) +#define _FP_MUL_MEAT_DW_D(R,X,Y) \ + _FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_DW_Q(R,X,Y) \ + _FP_MUL_MEAT_DW_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D +#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0 + +#else /* _FP_W_TYPE_SIZE == 32 */ + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_MUL_MEAT_DW_S(R,X,Y) \ + _FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_DW_D(R,X,Y) \ + _FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_DW_Q(R,X,Y) \ + _FP_MUL_MEAT_DW_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D, 0 +#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 + +#endif + +#define _FP_NANSIGN_S 1 +#define _FP_NANSIGN_D 1 +#define _FP_NANSIGN_Q 1 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define _FP_TININESS_AFTER_ROUNDING 0 diff --git a/REORG.TODO/sysdeps/tile/sotruss-lib.c b/REORG.TODO/sysdeps/tile/sotruss-lib.c new file mode 100644 index 0000000000..67fdad84a6 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/sotruss-lib.c @@ -0,0 +1,49 @@ +/* Override generic sotruss-lib.c to define actual functions for tile. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define HAVE_ARCH_PLTENTER +#define HAVE_ARCH_PLTEXIT + +#include <elf/sotruss-lib.c> + +ElfW(Addr) +la_tile_gnu_pltenter (ElfW(Sym) *sym __attribute__ ((unused)), + unsigned int ndx __attribute__ ((unused)), + uintptr_t *refcook, uintptr_t *defcook, + La_tile_regs *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + print_enter (refcook, defcook, symname, + regs->lr_reg[0], regs->lr_reg[1], regs->lr_reg[2], + *flags); + + /* No need to copy anything, we will not need the parameters in any case. */ + *framesizep = 0; + + return sym->st_value; +} + +unsigned int +la_tile_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, const struct La_tile_regs *inregs, + struct La_tile_retval *outregs, const char *symname) +{ + print_exit (refcook, defcook, symname, outregs->lrv_reg[0]); + + return 0; +} diff --git a/REORG.TODO/sysdeps/tile/stackguard-macros.h b/REORG.TODO/sysdeps/tile/stackguard-macros.h new file mode 100644 index 0000000000..f2e041b99b --- /dev/null +++ b/REORG.TODO/sysdeps/tile/stackguard-macros.h @@ -0,0 +1,20 @@ +#include <bits/wordsize.h> + +#ifdef __tilegx__ +# if __WORDSIZE == 64 +# define STACK_CHK_GUARD \ + ({ uintptr_t x; asm ("addi %0, tp, -16; ld %0, %0" : "=r" (x)); x; }) +# define POINTER_CHK_GUARD \ + ({ uintptr_t x; asm ("addi %0, tp, -24; ld %0, %0" : "=r" (x)); x; }) +# else +# define STACK_CHK_GUARD \ + ({ uintptr_t x; asm ("addi %0, tp, -8; ld4s %0, %0" : "=r" (x)); x; }) +# define POINTER_CHK_GUARD \ + ({ uintptr_t x; asm ("addi %0, tp, -12; ld4s %0, %0" : "=r" (x)); x; }) +# endif +#else +# define STACK_CHK_GUARD \ + ({ uintptr_t x; asm ("addi %0, tp, -8; lw %0, %0" : "=r" (x)); x; }) +# define POINTER_CHK_GUARD \ + ({ uintptr_t x; asm ("addi %0, tp, -12; lw %0, %0" : "=r" (x)); x; }) +#endif diff --git a/REORG.TODO/sysdeps/tile/stackinfo.h b/REORG.TODO/sysdeps/tile/stackinfo.h new file mode 100644 index 0000000000..0bef407941 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/stackinfo.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains a bit of information about the stack allocation + of the processor. */ + +#ifndef _STACKINFO_H +#define _STACKINFO_H 1 + +#include <elf.h> + +/* On tile the stack grows down. */ +#define _STACK_GROWS_DOWN 1 + +/* Default to a non-executable stack. */ +#define DEFAULT_STACK_PERMS (PF_R|PF_W) + +/* Access to the stack pointer. The macros are used in alloca_account + for which they need to act as barriers as well, hence the additional + (unnecessary) parameters. */ +#define stackinfo_get_sp() \ + ({ void *p__; asm volatile ("move %0, sp" : "=r" (p__)); p__; }) +#if defined __tilegx__ && __WORDSIZE == 32 +#define __stackinfo_sub "subx" +#else +#define __stackinfo_sub "sub" +#endif +#define stackinfo_sub_sp(ptr) \ + ({ ptrdiff_t d__; \ + asm volatile (__stackinfo_sub " %0, %0, sp" : "=r" (d__) : "0" (ptr)); \ + d__; }) + +#endif /* stackinfo.h */ diff --git a/REORG.TODO/sysdeps/tile/start.S b/REORG.TODO/sysdeps/tile/start.S new file mode 100644 index 0000000000..c790c265e3 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/start.S @@ -0,0 +1,203 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This is the canonical entry point, usually the first thing in the text + segment. The ELF standard tells us that the stack is set up like this on + entry (the left side is the offset from "sp"), in units of + __SIZEOF_POINTER__ entries: + + +0 argc + +1 argv[0] + ... + +(argc+1) NULL + +(argc+2) envp[0] + ... + NULL + ... ElfInfo + + The ElfInfo is pairs of key/value long words following the envp + pointers and terminated by a zero-valued key. + + Although not mandated by the standard, it happens to be the case + that we store the actual argv and envp strings immediately after + the ElfInfo data on the stack. + + On entry r0 points to the shared library termination function, or 0 + if there isn't one. +*/ + +#include <features.h> +#include <sysdep.h> +#include <arch/abi.h> + +/* Just create no-ops if we don't support PC-relative PLT relocations. */ +#ifdef NO_PLT_PCREL +# define hw2_last_plt(x) 0 +# define hw1_plt(x) 0 +# define hw0_plt(x) 0 +#endif + + .text + .global _start + .type _start,@function + .align 8 +_start: + /* Linux starts us with sp pointing at the conventional Elf layout, + but we need to allow two "caller" words for our ABI convention. */ + { + /* Load argc (stored as a "long", equivalent to a pointer type). */ + LD_PTR r1, sp + + /* Save incoming 'sp', which points to the Elf argument block. */ + move r52, sp + } + + { + /* Allocate stack frame callee space for __libc_start_main. */ + ADDI_PTR r12, sp, -(2 * REGSIZE) + } + + { + /* Get our PC. */ + lnk r13 + + /* sp is not necessarily properly aligned on startup because + of the way ld.so pops off leading argv elements. So align it. */ + andi sp, r12, -8 + } +.Lmy_pc: + + { + /* Pass the address of the shared library termination function. */ + move r5, r0 + + /* Compute location where __libc_start_main's caller is supposed to + store its frame pointer. */ + ADDI_PTR r12, sp, REGSIZE + + /* Zero out callee space for return address. Unnecessary but free. + This is just paranoia to help backtracing not go awry. */ + ST sp, zero + } + { + /* Zero out our frame pointer for __libc_start_main. */ + ST r12, zero + + /* Zero out lr to make __libc_start_main the end of backtrace. */ + move lr, zero + + /* Compute a pointer to argv. envp will be determined + later in __libc_start_main. We set up the first argument + (the address of main) below. */ + ADDI_PTR r2, r52, __SIZEOF_POINTER__ + } + { + /* Pass the highest stack address to user code. */ + ADDI_PTR r6, sp, (2 * REGSIZE) + + /* Pass address of main() in r0, and of our own entry + points to .fini and .init in r3 and r4. */ +#ifdef __tilegx__ + moveli r0, hw2_last(main - .Lmy_pc) + } + { + shl16insli r0, r0, hw1(main - .Lmy_pc) + moveli r3, hw2_last(__libc_csu_init - .Lmy_pc) + } + { + shl16insli r0, r0, hw0(main - .Lmy_pc) + shl16insli r3, r3, hw1(__libc_csu_init - .Lmy_pc) + } + { + ADD_PTR r0, r0, r13 + shl16insli r3, r3, hw0(__libc_csu_init - .Lmy_pc) + } + { + moveli r12, hw2_last_plt(__libc_start_main - .Lmy_pc) + ADD_PTR r3, r3, r13 + } + { + shl16insli r12, r12, hw1_plt(__libc_start_main - .Lmy_pc) + moveli r4, hw2_last(__libc_csu_fini - .Lmy_pc) + } + { + shl16insli r12, r12, hw0_plt(__libc_start_main - .Lmy_pc) + shl16insli r4, r4, hw1(__libc_csu_fini - .Lmy_pc) + } + { + ADD_PTR r12, r12, r13 + shl16insli r4, r4, hw0(__libc_csu_fini - .Lmy_pc) + } + { + ADD_PTR r4, r4, r13 +# ifdef NO_PLT_PCREL + j plt(__libc_start_main) +# else + jr r12 +# endif + } +#else + addli r0, r13, lo16(main - .Lmy_pc) + } + { + auli r0, r0, ha16(main - .Lmy_pc) + addli r3, r13, lo16(__libc_csu_init - .Lmy_pc) + } + { + auli r3, r3, ha16(__libc_csu_init - .Lmy_pc) + addli r4, r13, lo16(__libc_csu_fini - .Lmy_pc) + } + { + auli r4, r4, ha16(__libc_csu_fini - .Lmy_pc) + /* Call the user's main function, and exit with its value. + But let the libc call main. */ + j plt(__libc_start_main) + } +#endif + + { + /* Tell backtracer to give up (_start has no caller). */ + info INFO_OP_CANNOT_BACKTRACE + } +.size _start, .-_start + +/* Define a symbol for the first piece of initialized data. */ + .data + .global __data_start + .align 8 +__data_start: + .long 0 + .weak data_start + data_start = __data_start diff --git a/REORG.TODO/sysdeps/tile/sysdep.h b/REORG.TODO/sysdeps/tile/sysdep.h new file mode 100644 index 0000000000..cd94e7745b --- /dev/null +++ b/REORG.TODO/sysdeps/tile/sysdep.h @@ -0,0 +1,110 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/generic/sysdep.h> +#include <bits/wordsize.h> +#include <arch/abi.h> + +#if defined __ASSEMBLER__ || defined REQUEST_ASSEMBLER_MACROS + +#include <feedback.h> + +/* Make use of .size directive. */ +#define ASM_SIZE_DIRECTIVE(name) .size name,.-name; + +/* Define an entry point visible from C. */ +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ + .align 8; \ + C_LABEL(name) \ + cfi_startproc; \ + CALL_MCOUNT + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(name) + +/* Since C identifiers are not normally prefixed with an underscore + on this system, the asm identifier `syscall_error' intrudes on the + C name space. Make sure we use an innocuous name. */ +#define syscall_error __syscall_error +#define mcount __mcount + +/* If compiled for profiling, call `mcount' at the start of each function. + The mcount code requires the caller PC in r10. The `mcount' function + sets lr back to the value r10 had on entry when it returns. */ +#ifdef PROF +#define CALL_MCOUNT { move r10, lr; jal mcount } +#else +#define CALL_MCOUNT /* Do nothing. */ +#endif + +/* Local label name for asm code. */ +#define L(name) .L##name + +/* Specify the size in bytes of a machine register. */ +#ifdef __tilegx__ +#define REGSIZE 8 +#else +#define REGSIZE 4 +#endif + +/* Support a limited form of shared assembly between tilepro and tilegx. + The presumption is that LD/ST are used for manipulating registers. + Since opcode parsing is case-insensitive, we don't need to provide + definitions for these on tilegx. */ +#ifndef __tilegx__ +#define LD lw +#define LD4U lw +#define ST sw +#define ST4 sw +#define BNEZ bnz +#define BEQZ bz +#define BEQZT bzt +#define BGTZ bgz +#define CMPEQI seqi +#define CMPEQ seq +#define CMOVEQZ mvz +#define CMOVNEZ mvnz +#endif + +/* Provide "pointer-oriented" instruction variants. These differ not + just for tilepro vs tilegx, but also for tilegx -m64 vs -m32. */ +#if defined __tilegx__ && __WORDSIZE == 32 +#define ADD_PTR addx +#define ADDI_PTR addxi +#define ADDLI_PTR addxli +#define LD_PTR ld4s +#define ST_PTR st4 +#define SHL_PTR_ADD shl2add +#else +#define ADD_PTR add +#define ADDI_PTR addi +#define ADDLI_PTR addli +#define LD_PTR LD +#define ST_PTR ST +#ifdef __tilegx__ +#define SHL_PTR_ADD shl3add +#else +#define SHL_PTR_ADD s2a +#endif +#endif + +#endif /* __ASSEMBLER__ */ diff --git a/REORG.TODO/sysdeps/tile/tilegx/Implies b/REORG.TODO/sysdeps/tile/tilegx/Implies new file mode 100644 index 0000000000..ade71c1957 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/Implies @@ -0,0 +1 @@ +ieee754/dbl-64/wordsize-64 diff --git a/REORG.TODO/sysdeps/tile/tilegx/Makefile b/REORG.TODO/sysdeps/tile/tilegx/Makefile new file mode 100644 index 0000000000..4281dd98fc --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/Makefile @@ -0,0 +1,35 @@ +include $(common-objpfx)cflags-mcmodel-large.mk + +# Check for gcc to support the command-line switch, and for +# binutils to support the hwN_plt() assembly operators and relocations. +$(common-objpfx)cflags-mcmodel-large.mk: $(common-objpfx)config.make + mcmodel=no; \ + (echo 'int main() { return getuid(); }' | \ + $(CC) -o /dev/null -xc - -mcmodel=large -fpic) && mcmodel=yes; \ + echo "cflags-mcmodel-large = $$mcmodel" > $@ + +ifeq (yes,$(cflags-mcmodel-large)) + +ifeq ($(subdir),csu) +# elf-init.c is in libc_nonshared.o (the end of the shared object) but +# must reach the _init symbol at the very start of the shared object. +CFLAGS-elf-init.c += -mcmodel=large + +# __gmon_start__ is at the very start of the shared object when linked +# with profiling, but calls to libc.so via the PLT at the very end. +CFLAGS-gmon-start.c += -mcmodel=large +endif + +else + +# Don't try to compile assembly code with hwN_plt() directives if the +# toolchain doesn't support -mcmodel=large. +ifeq ($(subdir),csu) +CPPFLAGS-start.S += -DNO_PLT_PCREL +CPPFLAGS-crti.S += -DNO_PLT_PCREL +endif +ifeq ($(subdir),nptl) +CPPFLAGS-pt-crti.S += -DNO_PLT_PCREL +endif + +endif diff --git a/REORG.TODO/sysdeps/tile/tilegx/atomic-machine.h b/REORG.TODO/sysdeps/tile/tilegx/atomic-machine.h new file mode 100644 index 0000000000..e77f6707b0 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/atomic-machine.h @@ -0,0 +1,61 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _ATOMIC_MACHINE_H +#define _ATOMIC_MACHINE_H 1 + +#include <arch/spr_def.h> + +#ifdef _LP64 +# define __HAVE_64B_ATOMICS 1 +#else +/* tilegx32 does have 64-bit atomics, but assumptions in the semaphore + code mean that unaligned 64-bit atomics will be used if this symbol + is true, and unaligned atomics are not supported on tile. */ +# define __HAVE_64B_ATOMICS 0 +#endif + +#define USE_ATOMIC_COMPILER_BUILTINS 0 +#define ATOMIC_EXCHANGE_USES_CAS 0 + +/* Pick appropriate 8- or 4-byte instruction. */ +#define __atomic_update(mem, v, op) \ + ((__typeof (*(mem))) (__typeof (*(mem) - *(mem))) \ + ((sizeof (*(mem)) == 8) ? \ + __insn_##op ((void *) (mem), (int64_t) (__typeof((v) - (v))) (v)) : \ + (sizeof (*(mem)) == 4) ? \ + __insn_##op##4 ((void *) (mem), (int32_t) (__typeof ((v) - (v))) (v)) : \ + __atomic_error_bad_argument_size())) + +#define atomic_compare_and_exchange_val_acq(mem, n, o) \ + ({ __insn_mtspr (SPR_CMPEXCH_VALUE, (int64_t) (__typeof ((o) - (o))) (o)); \ + __atomic_update (mem, n, cmpexch); }) +#define atomic_exchange_acq(mem, newvalue) \ + __atomic_update (mem, newvalue, exch) +#define atomic_exchange_and_add(mem, value) \ + __atomic_update (mem, value, fetchadd) +#define atomic_and_val(mem, mask) \ + __atomic_update (mem, mask, fetchand) +#define atomic_or_val(mem, mask) \ + __atomic_update (mem, mask, fetchor) +#define atomic_decrement_if_positive(mem) \ + __atomic_update (mem, -1, fetchaddgez) + +#include <sysdeps/tile/atomic-machine.h> + +#endif /* atomic-machine.h */ diff --git a/REORG.TODO/sysdeps/tile/tilegx/bits/wordsize.h b/REORG.TODO/sysdeps/tile/tilegx/bits/wordsize.h new file mode 100644 index 0000000000..9dc4da5de9 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/bits/wordsize.h @@ -0,0 +1,11 @@ +/* Determine the wordsize from the preprocessor defines. */ + +#ifdef __LP64__ +# define __WORDSIZE 64 +# define __WORDSIZE_TIME64_COMPAT32 1 +#else +# define __WORDSIZE 32 +# define __WORDSIZE_TIME64_COMPAT32 0 +# define __WORDSIZE32_SIZE_ULONG 0 +# define __WORDSIZE32_PTRDIFF_LONG 0 +#endif diff --git a/REORG.TODO/sysdeps/tile/tilegx/memchr.c b/REORG.TODO/sysdeps/tile/tilegx/memchr.c new file mode 100644 index 0000000000..7da0f79da2 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/memchr.c @@ -0,0 +1,77 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include "string-endian.h" + +void * +__memchr (const void *s, int c, size_t n) +{ + const uint64_t *last_word_ptr; + const uint64_t *p; + const char *last_byte_ptr; + uintptr_t s_int; + uint64_t goal, before_mask, v, bits; + char *ret; + + if (__builtin_expect (n == 0, 0)) + { + /* Don't dereference any memory if the array is empty. */ + return NULL; + } + + /* Get an aligned pointer. */ + s_int = (uintptr_t) s; + p = (const uint64_t *) (s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + goal = copy_byte(c); + + /* Read the first word, but munge it so that bytes before the array + will not match goal. */ + before_mask = MASK (s_int); + v = (*p | before_mask) ^ (goal & before_mask); + + /* Compute the address of the last byte. */ + last_byte_ptr = (const char *) s + n - 1; + + /* Handle possible addition overflow. */ + if (__glibc_unlikely ((uintptr_t) last_byte_ptr < (uintptr_t) s)) + last_byte_ptr = (const char *) UINTPTR_MAX; + + /* Compute the address of the word containing the last byte. */ + last_word_ptr = (const uint64_t *) ((uintptr_t) last_byte_ptr & -8); + + while ((bits = __insn_v1cmpeq (v, goal)) == 0) + { + if (__builtin_expect (p == last_word_ptr, 0)) + { + /* We already read the last word in the array, so give up. */ + return NULL; + } + v = *++p; + } + + /* We found a match, but it might be in a byte past the end + of the array. */ + ret = ((char *) p) + (CFZ (bits) >> 3); + return (ret <= last_byte_ptr) ? ret : NULL; +} +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/tile/tilegx/memcpy.c b/REORG.TODO/sysdeps/tile/tilegx/memcpy.c new file mode 100644 index 0000000000..c1a2a29860 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/memcpy.c @@ -0,0 +1,272 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <memcopy.h> +#include <arch/chip.h> + +/* How many cache lines ahead should we prefetch? */ +#define PREFETCH_LINES_AHEAD 3 + +void * inhibit_loop_to_libcall +__memcpy (void *__restrict dstv, const void *__restrict srcv, size_t n) +{ + char *__restrict dst1 = (char *) dstv; + const char *__restrict src1 = (const char *) srcv; + const char *__restrict src1_end; + const char *__restrict prefetch; + op_t *__restrict dst8; /* 8-byte pointer to destination memory. */ + op_t final; /* Final bytes to write to trailing word, if any */ + long i; + + if (n < 16) + { + for (; n; n--) + *dst1++ = *src1++; + return dstv; + } + + /* Locate the end of source memory we will copy. Don't prefetch + past this. */ + src1_end = src1 + n - 1; + + /* Prefetch ahead a few cache lines, but not past the end. */ + prefetch = src1; + for (i = 0; i < PREFETCH_LINES_AHEAD; i++) + { + __insn_prefetch (prefetch); + prefetch += CHIP_L2_LINE_SIZE (); + prefetch = (prefetch < src1_end) ? prefetch : src1; + } + + /* Copy bytes until dst is word-aligned. */ + for (; (uintptr_t) dst1 & (sizeof (op_t) - 1); n--) + *dst1++ = *src1++; + + /* 8-byte pointer to destination memory. */ + dst8 = (op_t *) dst1; + + if (__builtin_expect ((uintptr_t) src1 & (sizeof (op_t) - 1), 0)) + { + /* Misaligned copy. Use glibc's _wordcopy_fwd_dest_aligned, but + inline it to avoid prologue/epilogue. TODO: Consider + prefetching and using wh64 as well. */ + void * srci; + op_t a0, a1, a2, a3; + long int dstp = (long int) dst1; + long int srcp = (long int) src1; + long int len = n / OPSIZ; + + /* Save the initial source pointer so we know the number of + bytes to shift for merging two unaligned results. */ + srci = (void *) srcp; + + /* Make SRCP aligned by rounding it down to the beginning of the + `op_t' it points in the middle of. */ + srcp &= -OPSIZ; + + switch (len % 4) + { + case 2: + a1 = ((op_t *) srcp)[0]; + a2 = ((op_t *) srcp)[1]; + len += 2; + srcp += 2 * OPSIZ; + goto do1; + case 3: + a0 = ((op_t *) srcp)[0]; + a1 = ((op_t *) srcp)[1]; + len += 1; + srcp += 2 * OPSIZ; + goto do2; + case 0: + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + return dstv; + a3 = ((op_t *) srcp)[0]; + a0 = ((op_t *) srcp)[1]; + len += 0; + srcp += 2 * OPSIZ; + goto do3; + case 1: + a2 = ((op_t *) srcp)[0]; + a3 = ((op_t *) srcp)[1]; + srcp += 2 * OPSIZ; + len -= 1; + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + goto do0; + goto do4; /* No-op. */ + } + + do + { + do4: + a0 = ((op_t *) srcp)[0]; + a2 = __insn_dblalign (a2, a3, srci); + ((op_t *) dstp)[0] = a2; + srcp += OPSIZ; + dstp += OPSIZ; + do3: + a1 = ((op_t *) srcp)[0]; + a3 = __insn_dblalign (a3, a0, srci); + ((op_t *) dstp)[0] = a3; + srcp += OPSIZ; + dstp += OPSIZ; + do2: + a2 = ((op_t *) srcp)[0]; + a0 = __insn_dblalign (a0, a1, srci); + ((op_t *) dstp)[0] = a0; + srcp += OPSIZ; + dstp += OPSIZ; + do1: + a3 = ((op_t *) srcp)[0]; + a1 = __insn_dblalign (a1, a2, srci); + ((op_t *) dstp)[0] = a1; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 4; + } + while (len != 0); + + /* This is the right position for do0. Please don't move + it into the loop. */ + do0: + ((op_t *) dstp)[0] = __insn_dblalign (a2, a3, srci); + + n = n % OPSIZ; + if (n == 0) + return dstv; + + a0 = ((const char *) srcp <= src1_end) ? ((op_t *) srcp)[0] : 0; + + final = __insn_dblalign (a3, a0, srci); + dst8 = (op_t *)(dstp + OPSIZ); + } + else + { + /* Aligned copy. */ + + const op_t *__restrict src8 = (const op_t *) src1; + + /* src8 and dst8 are both word-aligned. */ + if (n >= CHIP_L2_LINE_SIZE ()) + { + /* Copy until 'dst' is cache-line-aligned. */ + for (; (uintptr_t) dst8 & (CHIP_L2_LINE_SIZE () - 1); + n -= sizeof (op_t)) + *dst8++ = *src8++; + + for (; n >= CHIP_L2_LINE_SIZE ();) + { + op_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + + /* Prefetch and advance to next line to prefetch, but + don't go past the end. */ + __insn_prefetch (prefetch); + prefetch += CHIP_L2_LINE_SIZE (); + prefetch = (prefetch < src1_end) ? prefetch : + (const char *) src8; + + /* Do all the loads before wh64. This is necessary if + [src8, src8+7] and [dst8, dst8+7] share the same + cache line and dst8 <= src8, as can be the case when + called from memmove, or with code tested on x86 whose + memcpy always works with forward copies. */ + tmp0 = *src8++; + tmp1 = *src8++; + tmp2 = *src8++; + tmp3 = *src8++; + tmp4 = *src8++; + tmp5 = *src8++; + tmp6 = *src8++; + tmp7 = *src8++; + + __insn_wh64 (dst8); + + *dst8++ = tmp0; + *dst8++ = tmp1; + *dst8++ = tmp2; + *dst8++ = tmp3; + *dst8++ = tmp4; + *dst8++ = tmp5; + *dst8++ = tmp6; + *dst8++ = tmp7; + + n -= 64; + } +#if CHIP_L2_LINE_SIZE() != 64 +# error "Fix code that assumes particular L2 cache line size." +#endif + } + + for (; n >= sizeof (op_t); n -= sizeof (op_t)) + *dst8++ = *src8++; + + if (__builtin_expect (n == 0, 1)) + return dstv; + + final = *src8; + } + + /* n != 0 if we get here. Write out any trailing bytes. */ + dst1 = (char *) dst8; +#ifndef __BIG_ENDIAN__ + if (n & 4) + { + *(uint32_t *) dst1 = final; + dst1 += 4; + final >>= 32; + n &= 3; + } + if (n & 2) + { + *(uint16_t *) dst1 = final; + dst1 += 2; + final >>= 16; + n &= 1; + } + if (n) + *(uint8_t *) dst1 = final; +#else + if (n & 4) + { + *(uint32_t *) dst1 = final >> 32; + dst1 += 4; + } + else + { + final >>= 32; + } + if (n & 2) + { + *(uint16_t *) dst1 = final >> 16; + dst1 += 2; + } + else + { + final >>= 16; + } + if (n & 1) + *(uint8_t *) dst1 = final >> 8; +#endif + + return dstv; +} +weak_alias (__memcpy, memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/tile/tilegx/memset.c b/REORG.TODO/sysdeps/tile/tilegx/memset.c new file mode 100644 index 0000000000..c6804a9dc6 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/memset.c @@ -0,0 +1,151 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <arch/chip.h> +#include <string.h> +#include <stdint.h> +#include "string-endian.h" + +void * inhibit_loop_to_libcall +__memset (void *s, int c, size_t n) +{ + uint64_t *out64; + int n64, to_align64; + uint64_t v64; + uint8_t *out8 = s; + + /* Experimentation shows that a trivial tight loop is a win up until + around a size of 20, where writing a word at a time starts to win. */ +#define BYTE_CUTOFF 20 + +#if BYTE_CUTOFF < 7 + /* This must be at least at least this big, or some code later + on doesn't work. */ +# error "BYTE_CUTOFF is too small." +#endif + + if (n < BYTE_CUTOFF) + { + /* Strangely, this turns out to be the tightest way to write + this loop. */ + if (n != 0) + { + do + { + /* Strangely, combining these into one line performs worse. */ + *out8 = c; + out8++; + } + while (--n != 0); + } + + return s; + } + + /* Align 'out8'. We know n >= 7 so this won't write past the end. */ + while (((uintptr_t) out8 & 7) != 0) + { + *out8++ = c; + --n; + } + + /* Align 'n'. */ + while (n & 7) + out8[--n] = c; + + out64 = (uint64_t *) out8; + n64 = n >> 3; + + /* Tile input byte out to 64 bits. */ + v64 = copy_byte(c); + + /* This must be at least 8 or the following loop doesn't work. */ +#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8) + + /* Determine how many words we need to emit before the 'out32' + pointer becomes aligned modulo the cache line size. */ + to_align64 = (-((uintptr_t) out64 >> 3)) & + (CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1); + + /* Only bother aligning and using wh64 if there is at least + one full cache line to process. This check also prevents + overrunning the end of the buffer with alignment words. */ + if (to_align64 <= n64 - CACHE_LINE_SIZE_IN_DOUBLEWORDS) + { + int lines_left; + + /* Align out64 mod the cache line size so we can use wh64. */ + n64 -= to_align64; + for (; to_align64 != 0; to_align64--) + { + *out64 = v64; + out64++; + } + + /* Use unsigned divide to turn this into a right shift. */ + lines_left = (unsigned) n64 / CACHE_LINE_SIZE_IN_DOUBLEWORDS; + + do + { + /* Only wh64 a few lines at a time, so we don't exceed the + maximum number of victim lines. */ + int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS ()) ? lines_left + : CHIP_MAX_OUTSTANDING_VICTIMS ()); + uint64_t *wh = out64; + int i = x; + int j; + + lines_left -= x; + + do + { + __insn_wh64 (wh); + wh += CACHE_LINE_SIZE_IN_DOUBLEWORDS; + } + while (--i); + + for (j = x * (CACHE_LINE_SIZE_IN_DOUBLEWORDS / 4); j != 0; j--) + { + *out64++ = v64; + *out64++ = v64; + *out64++ = v64; + *out64++ = v64; + } + } + while (lines_left != 0); + + /* We processed all full lines above, so only this many + words remain to be processed. */ + n64 &= CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1; + } + + /* Now handle any leftover values. */ + if (n64 != 0) + { + do + { + *out64 = v64; + out64++; + } + while (--n64 != 0); + } + + return s; +} +weak_alias (__memset, memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/tile/tilegx/memusage.h b/REORG.TODO/sysdeps/tile/tilegx/memusage.h new file mode 100644 index 0000000000..c91371adcd --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/memusage.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> +#include <arch/spr_def.h> + +#define GETSP() ({ register uintptr_t stack_ptr asm ("sp"); stack_ptr; }) + +#define GETTIME(low,high) \ + { \ + uint64_t cycles = __insn_mfspr (SPR_CYCLE); \ + low = cycles & 0xffffffff; \ + high = cycles >> 32; \ + } + +#include <sysdeps/generic/memusage.h> diff --git a/REORG.TODO/sysdeps/tile/tilegx/rawmemchr.c b/REORG.TODO/sysdeps/tile/tilegx/rawmemchr.c new file mode 100644 index 0000000000..54b4a5c1b8 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/rawmemchr.c @@ -0,0 +1,45 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include "string-endian.h" + +void * +__rawmemchr (const void *s, int c) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *) (s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + const uint64_t goal = copy_byte(c); + + /* Read the first word, but munge it so that bytes before the array + will not match goal. */ + const uint64_t before_mask = MASK (s_int); + uint64_t v = (*p | before_mask) ^ (goal & before_mask); + + uint64_t bits; + while ((bits = __insn_v1cmpeq (v, goal)) == 0) + v = *++p; + + return ((char *) p) + (CFZ (bits) >> 3); +} +libc_hidden_def (__rawmemchr) +weak_alias (__rawmemchr, rawmemchr) diff --git a/REORG.TODO/sysdeps/tile/tilegx/strcasestr.c b/REORG.TODO/sysdeps/tile/tilegx/strcasestr.c new file mode 100644 index 0000000000..ecb3e623ca --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/strcasestr.c @@ -0,0 +1,53 @@ +/* Return the offset of one string within another. + Copyright (C) 1994-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#include <string.h> + +#include <ctype.h> +#include <stdbool.h> +#include <strings.h> + +#define USE_AS_STRCASESTR +#define STRSTR __strcasestr +#define STRSTR2 strcasestr2 +#define STRCHR strcasechr +#define STRSTR_SCAN strcasestr_scan + +#undef strcasestr +#undef __strcasestr + +#ifndef STRCASESTR +#define STRCASESTR __strcasestr +#endif + +#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) + +#define CANON_ELEMENT(c) TOLOWER (c) +#define CMP_FUNC(p1, p2, l) \ + __strncasecmp ((const char *) (p1), (const char *) (p2), l) + +#include "strstr.c" + +#ifndef NO_ALIAS +weak_alias (__strcasestr, strcasestr) +#endif diff --git a/REORG.TODO/sysdeps/tile/tilegx/strchr.c b/REORG.TODO/sysdeps/tile/tilegx/strchr.c new file mode 100644 index 0000000000..36dfd31391 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/strchr.c @@ -0,0 +1,67 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include "string-endian.h" + +#undef strchr + +char * +strchr (const char *s, int c) +{ + int z, g; + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *) (s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + const uint64_t goal = copy_byte(c); + + /* Read the first aligned word, but force bytes before the string to + match neither zero nor goal (we make sure the high bit of each byte + is 1, and the low 7 bits are all the opposite of the goal byte). */ + const uint64_t before_mask = MASK (s_int); + uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1)); + + uint64_t zero_matches, goal_matches; + while (1) + { + /* Look for a terminating '\0'. */ + zero_matches = __insn_v1cmpeqi (v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_v1cmpeq (v, goal); + + if (__builtin_expect ((zero_matches | goal_matches) != 0, 0)) + break; + + v = *++p; + } + + z = CFZ (zero_matches); + g = CFZ (goal_matches); + + /* If we found c before '\0' we got a match. Note that if c == '\0' + then g == z, and we correctly return the address of the '\0' + rather than NULL. */ + return (g <= z) ? ((char *) p) + (g >> 3) : NULL; +} +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/tile/tilegx/strchrnul.c b/REORG.TODO/sysdeps/tile/tilegx/strchrnul.c new file mode 100644 index 0000000000..e0f13b684e --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/strchrnul.c @@ -0,0 +1,64 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include "string-endian.h" + +char * +__strchrnul (const char *s, int c) +{ + int z, g; + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *) (s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + const uint64_t goal = copy_byte(c); + + /* Read the first aligned word, but force bytes before the string to + match neither zero nor goal (we make sure the high bit of each byte + is 1, and the low 7 bits are all the opposite of the goal byte). */ + const uint64_t before_mask = MASK (s_int); + uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1)); + + uint64_t zero_matches, goal_matches; + while (1) + { + /* Look for a terminating '\0'. */ + zero_matches = __insn_v1cmpeqi (v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_v1cmpeq (v, goal); + + if (__builtin_expect ((zero_matches | goal_matches) != 0, 0)) + break; + + v = *++p; + } + + z = CFZ (zero_matches); + g = CFZ (goal_matches); + + /* Return a pointer to the NUL or goal, whichever is first. */ + if (z < g) + g = z; + return ((char *) p) + (g >> 3); +} +weak_alias (__strchrnul, strchrnul) diff --git a/REORG.TODO/sysdeps/tile/tilegx/string-endian.h b/REORG.TODO/sysdeps/tile/tilegx/string-endian.h new file mode 100644 index 0000000000..fe9b073efb --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/string-endian.h @@ -0,0 +1,58 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <endian.h> +#include <stdint.h> + +/* Provide a set of macros to help keep endianness #ifdefs out of + the string functions. + + MASK: Provide a mask based on the pointer alignment that + sets up non-zero bytes before the beginning of the string. + The MASK expression works because shift counts are taken mod 64. + + NULMASK: Clear bytes beyond a given point in the string. + + CFZ: Find the first zero bit in the 8 string bytes in a long. + + REVCZ: Find the last zero bit in the 8 string bytes in a long. + + STRSHIFT: Shift N bits towards the start of the string. */ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define MASK(x) (__insn_shl(1ULL, (x << 3)) - 1) +#define NULMASK(x) ((2ULL << x) - 1) +#define CFZ(x) __insn_ctz(x) +#define REVCZ(x) __insn_clz(x) +#define STRSHIFT(x,n) ((x) >> n) +#else +#define MASK(x) (__insn_shl(-2LL, ((-x << 3) - 1))) +#define NULMASK(x) (-2LL << (63 - x)) +#define CFZ(x) __insn_clz(x) +#define REVCZ(x) __insn_ctz(x) +#define STRSHIFT(x,n) ((x) << n) +#endif + +/* Create eight copies of the byte in a uint64_t. Byte Shuffle uses + the bytes of srcB as the index into the dest vector to select a + byte. With all indices of zero, the first byte is copied into all + the other bytes. */ +static inline uint64_t copy_byte(uint8_t byte) +{ + return __insn_shufflebytes(byte, 0, 0); +} diff --git a/REORG.TODO/sysdeps/tile/tilegx/strlen.c b/REORG.TODO/sysdeps/tile/tilegx/strlen.c new file mode 100644 index 0000000000..5cd04acc59 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/strlen.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include "string-endian.h" + +size_t +strlen (const char *s) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *) (s_int & -8); + + /* Read and MASK the first word. */ + uint64_t v = *p | MASK (s_int); + + uint64_t bits; + while ((bits = __insn_v1cmpeqi (v, 0)) == 0) + v = *++p; + + return ((const char *) p) + (CFZ (bits) >> 3) - s; +} +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/tile/tilegx/strnlen.c b/REORG.TODO/sysdeps/tile/tilegx/strnlen.c new file mode 100644 index 0000000000..5d73a14926 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/strnlen.c @@ -0,0 +1,57 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include "string-endian.h" + +/* Find the length of S, but scan at most MAXLEN characters. If no + '\0' terminator is found in that many characters, return MAXLEN. */ +size_t +__strnlen (const char *s, size_t maxlen) +{ + /* When maxlen is 0, can't read any bytes or it might cause a page fault. */ + if (maxlen == 0) + return 0; + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *) (s_int & -8); + size_t bytes_read = sizeof (*p) - (s_int & (sizeof (*p) - 1)); + + /* Read and MASK the first word. */ + uint64_t v = *p | MASK (s_int); + + uint64_t bits; + while ((bits = __insn_v1cmpeqi (v, 0)) == 0) + { + if (bytes_read >= maxlen) + { + /* Read maxlen bytes and didn't find the terminator. */ + return maxlen; + } + v = *++p; + bytes_read += sizeof (v); + } + + /* Found '\0', check it is not larger than maxlen */ + size_t len = ((const char *) p) + (CFZ (bits) >> 3) - s; + return (len < maxlen ? len : maxlen); +} +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) diff --git a/REORG.TODO/sysdeps/tile/tilegx/strrchr.c b/REORG.TODO/sysdeps/tile/tilegx/strrchr.c new file mode 100644 index 0000000000..5a9049e1b9 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/strrchr.c @@ -0,0 +1,68 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include "string-endian.h" + +char * +strrchr (const char *s, int c) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *) (s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + const uint64_t goal = copy_byte(c); + + /* Read the first aligned word, but force bytes before the string to + match neither zero nor goal (we make sure the high bit of each byte + is 1, and the low 7 bits are all the opposite of the goal byte). */ + const uint64_t before_mask = MASK (s_int); + uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1)); + const char *found = NULL; + uint64_t zero_matches, goal_matches; + while (1) + { + /* Look for a terminating '\0'. */ + zero_matches = __insn_v1cmpeqi (v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_v1cmpeq (v, goal); + + /* If we found the goal, record the last offset. */ + if (__builtin_expect (goal_matches != 0, 0)) + { + if (__builtin_expect (zero_matches != 0, 0)) + { + /* Clear any goal after the first zero. */ + int first_nul = CFZ (zero_matches); + goal_matches &= NULMASK (first_nul); + } + if (__builtin_expect (goal_matches != 0, 1)) + found = ((char *) p) + 7 - (REVCZ (goal_matches) >> 3); + } + + if (__builtin_expect (zero_matches != 0, 0)) + return (char *) found; + + v = *++p; + } +} +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/tile/tilegx/strstr.c b/REORG.TODO/sysdeps/tile/tilegx/strstr.c new file mode 100644 index 0000000000..548a92045d --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/strstr.c @@ -0,0 +1,270 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* Specification of strstr. */ +#include <string.h> + +#include <stdbool.h> +#include "string-endian.h" + +#define RETURN_TYPE char * +#define AVAILABLE(h, h_l, j, n_l) \ + (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ + && ((h_l) = (j) + (n_l))) +#include "str-two-way.h" +typeof(two_way_short_needle) two_way_short_needle __attribute__((unused)); + +#undef strstr + +#ifndef STRSTR +#define STRSTR strstr +#endif + +#ifndef STRSTR2 +#define STRSTR2 strstr2 +#endif + +#ifndef STRCHR +#define STRCHR strchr +#endif + +#ifndef STRSTR_SCAN +#define STRSTR_SCAN strstr_scan +#endif + +#ifndef TOLOWER +# define TOLOWER(Ch) (Ch) +#endif + +#ifdef USE_AS_STRCASESTR + +static uint64_t +vec_tolower (uint64_t cc) +{ + /* For Uppercases letters, add 32 to convert to lower case. */ + uint64_t less_than_eq_Z = __insn_v1cmpltui (cc, 'Z' + 1); + uint64_t less_than_A = __insn_v1cmpltui (cc, 'A'); + uint64_t is_upper = __insn_v1cmpne (less_than_eq_Z, less_than_A); + return __insn_v1add (cc,__insn_v1shli (is_upper, 5)); +} + +/* There is no strcasechr() defined, but needed for 1 byte case + of strcasestr(), so create it here. */ + +static char * +strcasechr (const char *s, int c) +{ + int z, g; + + c = tolower (c); + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *) (s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + const uint64_t goal = copy_byte(c); + + /* Read the first aligned word, but force bytes before the string to + match neither zero nor goal (we make sure the high bit of each byte + is 1, and the low 7 bits are all the opposite of the goal byte). */ + const uint64_t before_mask = MASK (s_int); + uint64_t v = + (vec_tolower (*p) | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1)); + + uint64_t zero_matches, goal_matches; + while (1) + { + /* Look for a terminating '\0'. */ + zero_matches = __insn_v1cmpeqi (v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_v1cmpeq (v, goal); + + if (__builtin_expect ((zero_matches | goal_matches) != 0, 0)) + break; + + v = vec_tolower (*++p); + } + + z = CFZ (zero_matches); + g = CFZ (goal_matches); + + /* If we found c before '\0' we got a match. Note that if c == '\0' + then g == z, and we correctly return the address of the '\0' + rather than NULL. */ + return (g <= z) ? ((char *) p) + (g >> 3) : NULL; +} + +# define vec_load(p) vec_tolower (*(p)) +# define STRCHR strcasechr +# define CMP_FUNC __strncasecmp + +#else + +# define vec_load(p) (*(p)) +# define STRCHR strchr +# define CMP_FUNC memcmp + +#endif + + +/* Compare 2-character needle using SIMD. */ +static char * +STRSTR2 (const char *haystack_start, const char *needle) +{ + int z, g; + + __insn_prefetch (haystack_start + 64); + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) haystack_start; + const uint64_t *p = (const uint64_t *) (s_int & -8); + + /* Create eight copies of the first byte for which we are looking. */ + const uint64_t byte1 = copy_byte (TOLOWER (*needle)); + /* Create eight copies of the second byte for which we are looking. */ + const uint64_t byte2 = copy_byte (TOLOWER (*(needle + 1))); + + /* Read the first aligned word, but force bytes before the string to + match neither zero nor goal (we make sure the high bit of each byte + is 1, and the low 7 bits are all the opposite of the goal byte). */ + const uint64_t before_mask = MASK (s_int); + uint64_t v = + (vec_load (p) | before_mask) ^ (byte1 & __insn_v1shrui (before_mask, 1)); + + uint64_t zero_matches, goal_matches; + while (1) + { + /* Look for a terminating '\0'. */ + zero_matches = __insn_v1cmpeqi (v, 0); + uint64_t byte1_matches = __insn_v1cmpeq (v, byte1); + if (__builtin_expect (zero_matches != 0, 0)) + { + /* This is the last vector. Don't worry about matches + crossing into the next vector. Shift the second byte + back 1 byte to align it with the first byte, then and to + check for both matching. Each vector has a 1 in the LSB + of the byte if there was match. */ + uint64_t byte2_matches = __insn_v1cmpeq (v, byte2); + goal_matches = byte1_matches & STRSHIFT (byte2_matches, 8); + break; + } + else + { + /* This is not the last vector, so load the next vector now. + And compare byte2 to the 8-bytes starting 1 byte shifted from v, + which goes 1-byte into the next vector. */ + uint64_t v2 = vec_load (p + 1); + if (byte1_matches) + { + /* 8-bytes starting 1 byte into v. */ + v = __insn_dblalign (v, v2, (void*)1); + uint64_t byte2_matches_shifted = __insn_v1cmpeq (v, byte2); + goal_matches = byte1_matches & byte2_matches_shifted; + if (__builtin_expect (goal_matches != 0, 0)) + break; + } + __insn_prefetch (p + 4); + /* Move to next vector. */ + v = v2; + p++; + } + } + + z = CFZ (zero_matches); + g = CFZ (goal_matches); + + /* If we found the match before '\0' we got a true match. Note that + if c == '\0' then g == z, and we correctly return the address of + the '\0' rather than NULL. */ + return (g <= z) ? ((char *) p) + (g >> 3) : NULL; +} + +/* Scan for NEEDLE, using the first two characters as a filter. */ +static char * +STRSTR_SCAN (const char *haystack, const char *needle, + unsigned int needle_len) +{ + char *match; + while (1) + { + match = STRSTR2 (haystack, needle); + if (match == NULL) + return NULL; + /* Found first two characters of needle, check for remainder. */ + if (CMP_FUNC (match + 2, needle + 2, needle_len - 2) == 0) + return match; + /* Move past the previous match. Could be +2 instead of +1 if + first two characters are different, but that tested slower. */ + haystack = match + 1; + } +} + +/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK + if NEEDLE is empty, otherwise NULL if NEEDLE is not found in + HAYSTACK. */ +char * +STRSTR (const char *haystack_start, const char *needle_start) +{ + const char *haystack = haystack_start; + const char *needle = needle_start; + __insn_prefetch (haystack); + size_t needle_len = strlen (needle_start); /* Length of NEEDLE. */ + size_t haystack_len; /* Known minimum length of HAYSTACK. */ + + if (needle_len <= 2) + { + if (needle_len == 1) + return STRCHR (haystack_start, *needle_start); + if (needle_len == 0) + return (char *) haystack_start; + else + return STRSTR2 (haystack_start, needle_start); + } + + /* Fail if NEEDLE is longer than HAYSTACK. */ + if (__strnlen (haystack, needle_len) < needle_len) + return NULL; + + /* Perform the search. Abstract memory is considered to be an array + of 'unsigned char' values, not an array of 'char' values. See + ISO C 99 section 6.2.6.1. */ + if (needle_len < 40) + return STRSTR_SCAN (haystack_start, needle_start, needle_len); + else + { + /* Reduce the size of haystack using STRSTR2, since it has a smaller + linear coefficient than the Two-Way algorithm. */ + haystack = STRSTR2 (haystack_start, needle_start); + if (haystack == NULL) + return NULL; + needle = needle_start; + haystack_len = (haystack > haystack_start + needle_len ? 1 + : needle_len + haystack_start - haystack); + + return two_way_long_needle ((const unsigned char *) haystack, + haystack_len, + (const unsigned char *) needle, needle_len); + } +} +#ifndef USE_AS_STRCASESTR +libc_hidden_builtin_def (STRSTR) +#endif + +#undef LONG_NEEDLE_THRESHOLD diff --git a/REORG.TODO/sysdeps/tile/tilegx/tilegx32/Implies b/REORG.TODO/sysdeps/tile/tilegx/tilegx32/Implies new file mode 100644 index 0000000000..993b7f4cd1 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/tilegx32/Implies @@ -0,0 +1,3 @@ +tile/tilegx +tile +wordsize-32 diff --git a/REORG.TODO/sysdeps/tile/tilegx/tilegx64/Implies b/REORG.TODO/sysdeps/tile/tilegx/tilegx64/Implies new file mode 100644 index 0000000000..eb0686e0e6 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilegx/tilegx64/Implies @@ -0,0 +1,3 @@ +tile/tilegx +tile +wordsize-64 diff --git a/REORG.TODO/sysdeps/tile/tilepro/Implies b/REORG.TODO/sysdeps/tile/tilepro/Implies new file mode 100644 index 0000000000..709e1dc122 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/Implies @@ -0,0 +1,2 @@ +tile +wordsize-32 diff --git a/REORG.TODO/sysdeps/tile/tilepro/atomic-machine.h b/REORG.TODO/sysdeps/tile/tilepro/atomic-machine.h new file mode 100644 index 0000000000..45e36de1e4 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/atomic-machine.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _ATOMIC_MACHINE_H +#define _ATOMIC_MACHINE_H 1 + +#include <asm/unistd.h> + +#define __HAVE_64B_ATOMICS 0 +#define USE_ATOMIC_COMPILER_BUILTINS 0 +#define ATOMIC_EXCHANGE_USES_CAS 0 + +/* 32-bit integer compare-and-exchange. */ +static __inline __attribute__ ((always_inline)) +int __atomic_cmpxchg_32 (volatile int *mem, int newval, int oldval) +{ + int result; + __asm__ __volatile__ ("swint1" + : "=R00" (result), "=m" (*mem) + : "R10" (__NR_FAST_cmpxchg), "R00" (mem), + "R01" (oldval), "R02" (newval), "m" (*mem) + : "r20", "r21", "r22", "r23", "r24", + "r25", "r26", "r27", "r28", "r29", "memory"); + return result; +} + +#define atomic_compare_and_exchange_val_acq(mem, n, o) \ + ({ \ + if (sizeof (*(mem)) != 4) \ + __atomic_error_bad_argument_size (); \ + (__typeof (*(mem))) \ + __atomic_cmpxchg_32 ((int *) (mem), (int) (n), (int) (o)); \ + }) + +/* Atomically compute: + int old = *ptr; + *ptr = (old & mask) + addend; + return old; */ + +static __inline __attribute__ ((always_inline)) +int __atomic_update_32 (volatile int *mem, int mask, int addend) +{ + int result; + __asm__ __volatile__ ("swint1" + : "=R00" (result), "=m" (*mem) + : "R10" (__NR_FAST_atomic_update), "R00" (mem), + "R01" (mask), "R02" (addend), "m" (*mem) + : "r20", "r21", "r22", "r23", "r24", + "r25", "r26", "r27", "r28", "r29", "memory"); + return result; +} + +/* Size-checked verson of __atomic_update_32. */ +#define __atomic_update(mem, mask, addend) \ + ({ \ + if (sizeof (*(mem)) != 4) \ + __atomic_error_bad_argument_size (); \ + (__typeof (*(mem))) \ + __atomic_update_32 ((int *) (mem), (int) (mask), (int) (addend)); \ + }) + +#define atomic_exchange_acq(mem, newvalue) \ + __atomic_update ((mem), 0, (newvalue)) +#define atomic_exchange_and_add(mem, value) \ + __atomic_update ((mem), -1, (value)) +#define atomic_and_val(mem, mask) \ + __atomic_update ((mem), (mask), 0) +#define atomic_or_val(mem, mask) \ + ({ __typeof (mask) __att1_v = (mask); \ + __atomic_update ((mem), ~__att1_v, __att1_v); }) + +/* + * We must use the kernel atomics for atomic_store, since otherwise an + * unsynchronized store could become visible after another core's + * kernel-atomic implementation had read the memory word in question, + * but before it had written the updated value to it, which would + * cause the unsynchronized store to be lost. + */ +#define atomic_store_relaxed(mem, val) atomic_exchange_acq (mem, val) +#define atomic_store_release(mem, val) atomic_exchange_rel (mem, val) + +#include <sysdeps/tile/atomic-machine.h> + +#endif /* atomic-machine.h */ diff --git a/REORG.TODO/sysdeps/tile/tilepro/bits/wordsize.h b/REORG.TODO/sysdeps/tile/tilepro/bits/wordsize.h new file mode 100644 index 0000000000..d2c4a354a7 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/bits/wordsize.h @@ -0,0 +1,6 @@ +/* Determine the wordsize from the preprocessor defines. */ + +#define __WORDSIZE 32 +#define __WORDSIZE_TIME64_COMPAT32 0 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 diff --git a/REORG.TODO/sysdeps/tile/tilepro/memchr.c b/REORG.TODO/sysdeps/tile/tilepro/memchr.c new file mode 100644 index 0000000000..fba1f70c2c --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/memchr.c @@ -0,0 +1,76 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> + +void * +__memchr (const void *s, int c, size_t n) +{ + const uint32_t *last_word_ptr; + const uint32_t *p; + const char *last_byte_ptr; + uintptr_t s_int; + uint32_t goal, before_mask, v, bits; + char *ret; + + if (__builtin_expect (n == 0, 0)) + { + /* Don't dereference any memory if the array is empty. */ + return NULL; + } + + /* Get an aligned pointer. */ + s_int = (uintptr_t) s; + p = (const uint32_t *) (s_int & -4); + + /* Create four copies of the byte for which we are looking. */ + goal = 0x01010101 * (uint8_t) c; + + /* Read the first word, but munge it so that bytes before the array + will not match goal. Note that this shift count expression works + because we know shift counts are taken mod 32. */ + before_mask = (1 << (s_int << 3)) - 1; + v = (*p | before_mask) ^ (goal & before_mask); + + /* Compute the address of the last byte. */ + last_byte_ptr = (const char *) s + n - 1; + + /* Handle possible addition overflow. */ + if (__glibc_unlikely ((uintptr_t) last_byte_ptr < (uintptr_t) s)) + last_byte_ptr = (const char *) UINTPTR_MAX; + + /* Compute the address of the word containing the last byte. */ + last_word_ptr = (const uint32_t *) ((uintptr_t) last_byte_ptr & -4); + + while ((bits = __insn_seqb (v, goal)) == 0) + { + if (__builtin_expect (p == last_word_ptr, 0)) + { + /* We already read the last word in the array, so give up. */ + return NULL; + } + v = *++p; + } + + /* We found a match, but it might be in a byte past the end of the array. */ + ret = ((char *) p) + (__insn_ctz (bits) >> 3); + return (ret <= last_byte_ptr) ? ret : NULL; +} +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/tile/tilepro/memcpy.S b/REORG.TODO/sysdeps/tile/tilepro/memcpy.S new file mode 100644 index 0000000000..0f73ccbc2f --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/memcpy.S @@ -0,0 +1,397 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <arch/chip.h> +#include <sysdep.h> + + .text +ENTRY (__memcpy) + FEEDBACK_ENTER(__memcpy) + + /* r0 is the dest, r1 is the source, r2 is the size. */ + + /* Save aside original dest so we can return it at the end. */ + { sw sp, lr; move r23, r0; or r4, r0, r1 } + cfi_offset (lr, 0) + + /* Check for an empty size. */ + { bz r2, .Ldone; andi r4, r4, 3 } + + /* Check for an unaligned source or dest. */ + { bnz r4, .Lcopy_unaligned_maybe_many; addli r4, r2, -256 } + +.Lcheck_aligned_copy_size: + /* If we are copying < 256 bytes, branch to simple case. */ + { blzt r4, .Lcopy_8_check; slti_u r8, r2, 8 } + + /* Copying >= 256 bytes, so jump to complex prefetching loop. */ + { andi r6, r1, 63; j .Lcopy_many } + +/* Aligned 4 byte at a time copy loop. */ + +.Lcopy_8_loop: + /* Copy two words at a time to hide load latency. */ + { lw r3, r1; addi r1, r1, 4; slti_u r8, r2, 16 } + { lw r4, r1; addi r1, r1, 4 } + { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } + { sw r0, r4; addi r0, r0, 4; addi r2, r2, -4 } +.Lcopy_8_check: + { bzt r8, .Lcopy_8_loop; slti_u r4, r2, 4 } + + /* Copy odd leftover word, if any. */ + { bnzt r4, .Lcheck_odd_stragglers } + { lw r3, r1; addi r1, r1, 4 } + { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } + +.Lcheck_odd_stragglers: + { bnz r2, .Lcopy_unaligned_few } + +.Ldone: + { move r0, r23; jrp lr } + +/* Prefetching multiple cache line copy handler (for large transfers). */ + + /* Copy words until r1 is cache-line-aligned. */ +.Lalign_loop: + { lw r3, r1; addi r1, r1, 4 } + { andi r6, r1, 63 } + { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } +.Lcopy_many: + { bnzt r6, .Lalign_loop; addi r9, r0, 63 } + + { addi r3, r1, 60; andi r9, r9, -64 } + + /* No need to prefetch dst, we'll just do the wh64 + right before we copy a line. */ + { lw r5, r3; addi r3, r3, 64; movei r4, 1 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, .; move r27, lr } + { lw r6, r3; addi r3, r3, 64 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, . } + { lw r7, r3; addi r3, r3, 64 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bz zero, .Lbig_loop2 } + + /* On entry to this loop: + - r0 points to the start of dst line 0 + - r1 points to start of src line 0 + - r2 >= (256 - 60), only the first time the loop trips. + - r3 contains r1 + 128 + 60 [pointer to end of source line 2] + This is our prefetch address. When we get near the end + rather than prefetching off the end this is changed to point + to some "safe" recently loaded address. + - r5 contains *(r1 + 60) [i.e. last word of source line 0] + - r6 contains *(r1 + 64 + 60) [i.e. last word of source line 1] + - r9 contains ((r0 + 63) & -64) + [start of next dst cache line.] */ + +.Lbig_loop: + { jal .Lcopy_line2; add r15, r1, r2 } + +.Lbig_loop2: + /* Copy line 0, first stalling until r5 is ready. */ + { move r12, r5; lw r16, r1 } + { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } + /* Prefetch several lines ahead. */ + { lw r5, r3; addi r3, r3, 64 } + { jal .Lcopy_line } + + /* Copy line 1, first stalling until r6 is ready. */ + { move r12, r6; lw r16, r1 } + { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } + /* Prefetch several lines ahead. */ + { lw r6, r3; addi r3, r3, 64 } + { jal .Lcopy_line } + + /* Copy line 2, first stalling until r7 is ready. */ + { move r12, r7; lw r16, r1 } + { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } + /* Prefetch several lines ahead. */ + { lw r7, r3; addi r3, r3, 64 } + /* Use up a caches-busy cycle by jumping back to the top of the + loop. Might as well get it out of the way now. */ + { j .Lbig_loop } + + + /* On entry: + - r0 points to the destination line. + - r1 points to the source line. + - r3 is the next prefetch address. + - r9 holds the last address used for wh64. + - r12 = WORD_15 + - r16 = WORD_0. + - r17 == r1 + 16. + - r27 holds saved lr to restore. + + On exit: + - r0 is incremented by 64. + - r1 is incremented by 64, unless that would point to a word + beyond the end of the source array, in which case it is redirected + to point to an arbitrary word already in the cache. + - r2 is decremented by 64. + - r3 is unchanged, unless it points to a word beyond the + end of the source array, in which case it is redirected + to point to an arbitrary word already in the cache. + Redirecting is OK since if we are that close to the end + of the array we will not come back to this subroutine + and use the contents of the prefetched address. + - r4 is nonzero iff r2 >= 64. + - r9 is incremented by 64, unless it points beyond the + end of the last full destination cache line, in which + case it is redirected to a "safe address" that can be + clobbered (sp - 64) + - lr contains the value in r27. */ + +/* r26 unused */ + +.Lcopy_line: + /* TODO: when r3 goes past the end, we would like to redirect it + to prefetch the last partial cache line (if any) just once, for the + benefit of the final cleanup loop. But we don't want to + prefetch that line more than once, or subsequent prefetches + will go into the RTF. But then .Lbig_loop should unconditionally + branch to top of loop to execute final prefetch, and its + nop should become a conditional branch. */ + + /* We need two non-memory cycles here to cover the resources + used by the loads initiated by the caller. */ + { add r15, r1, r2 } +.Lcopy_line2: + { slt_u r13, r3, r15; addi r17, r1, 16 } + + /* NOTE: this will stall for one cycle as L1 is busy. */ + + /* Fill second L1D line. */ + { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ + + /* Prepare destination line for writing. */ + { wh64 r9; addi r9, r9, 64 } + + /* Load seven words that are L1D hits to cover wh64 L2 usage. */ + + /* Load the three remaining words from the last L1D line, which + we know has already filled the L1D. */ + { lw r4, r1; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */ + { lw r8, r1; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */ + { lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */ + + /* Load the three remaining words from the first L1D line, first + stalling until it has filled by "looking at" r16. */ + { lw r13, r1; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */ + { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_2 */ + { lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */ + + /* Load second word from the second L1D line, first + stalling until it has filled by "looking at" r17. */ + { lw r19, r1; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */ + + /* Store last word to the destination line, potentially dirtying it + for the first time, which keeps the L2 busy for two cycles. */ + { sw r10, r12 } /* store(WORD_15) */ + + /* Use two L1D hits to cover the sw L2 access above. */ + { lw r10, r1; addi r1, r1, 4 } /* r10 = WORD_6 */ + { lw r12, r1; addi r1, r1, 4 } /* r12 = WORD_7 */ + + /* Fill third L1D line. */ + { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ + + /* Store first L1D line. */ + { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ + { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ + { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ + { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ + + /* Store second L1D line. */ + { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ + { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ + { sw r0, r10; addi r0, r0, 4 } /* store(WORD_6) */ + { sw r0, r12; addi r0, r0, 4 } /* store(WORD_7) */ + + { lw r13, r1; addi r1, r1, 4; move zero, r18 } /* r13 = WORD_9 */ + { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_10 */ + { lw r15, r1; move r1, r20 } /* r15 = WORD_11 */ + + /* Store third L1D line. */ + { sw r0, r18; addi r0, r0, 4 } /* store(WORD_8) */ + { sw r0, r13; addi r0, r0, 4 } /* store(WORD_9) */ + { sw r0, r14; addi r0, r0, 4 } /* store(WORD_10) */ + { sw r0, r15; addi r0, r0, 4 } /* store(WORD_11) */ + + /* Store rest of fourth L1D line. */ + { sw r0, r4; addi r0, r0, 4 } /* store(WORD_12) */ + { + sw r0, r8 /* store(WORD_13) */ + addi r0, r0, 4 + /* Will r2 be > 64 after we subtract 64 below? */ + shri r4, r2, 7 + } + { + sw r0, r11 /* store(WORD_14) */ + addi r0, r0, 8 + /* Record 64 bytes successfully copied. */ + addi r2, r2, -64 + } + + { jrp lr; move lr, r27 } + + /* Convey to the backtrace library that the stack frame is + size zero, and the real return address is on the stack + rather than in 'lr'. */ + { info 8 } + + .align 64 +.Lcopy_unaligned_maybe_many: + /* Skip the setup overhead if we aren't copying many bytes. */ + { slti_u r8, r2, 20; sub r4, zero, r0 } + { bnzt r8, .Lcopy_unaligned_few; andi r4, r4, 3 } + { bz r4, .Ldest_is_word_aligned; add r18, r1, r2 } + +/* Unaligned 4 byte at a time copy handler. */ + + /* Copy single bytes until r0 == 0 mod 4, so we can store words. */ +.Lalign_dest_loop: + { lb_u r3, r1; addi r1, r1, 1; addi r4, r4, -1 } + { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } + { bnzt r4, .Lalign_dest_loop; andi r3, r1, 3 } + + /* If source and dest are now *both* aligned, do an aligned copy. */ + { bz r3, .Lcheck_aligned_copy_size; addli r4, r2, -256 } + +.Ldest_is_word_aligned: + + { andi r8, r0, 63; lwadd_na r6, r1, 4} + { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned } + + /* This copies unaligned words until either there are fewer + than 4 bytes left to copy, or until the destination pointer + is cache-aligned, whichever comes first. + + On entry: + - r0 is the next store address. + - r1 points 4 bytes past the load address corresponding to r0. + - r2 >= 4 + - r6 is the next aligned word loaded. */ +.Lcopy_unaligned_src_words: + { lwadd_na r7, r1, 4; slti_u r8, r2, 4 + 4 } + /* stall */ + { dword_align r6, r7, r1; slti_u r9, r2, 64 + 4 } + { swadd r0, r6, 4; addi r2, r2, -4 } + { bnz r8, .Lcleanup_unaligned_words; andi r8, r0, 63 } + { bnzt r8, .Lcopy_unaligned_src_words; move r6, r7 } + + /* On entry: + - r0 is the next store address. + - r1 points 4 bytes past the load address corresponding to r0. + - r2 >= 4 (# of bytes left to store). + - r6 is the next aligned src word value. + - r9 = (r2 < 64U). + - r18 points one byte past the end of source memory. */ +.Ldest_is_L2_line_aligned: + + { + /* Not a full cache line remains. */ + bnz r9, .Lcleanup_unaligned_words + move r7, r6 + } + + /* r2 >= 64 */ + + /* Kick off two prefetches, but don't go past the end. */ + { addi r3, r1, 63 - 4; addi r8, r1, 64 + 63 - 4 } + { prefetch r3; move r3, r8; slt_u r8, r8, r18 } + { mvz r3, r8, r1; addi r8, r3, 64 } + { prefetch r3; move r3, r8; slt_u r8, r8, r18 } + { mvz r3, r8, r1; movei r17, 0 } + +.Lcopy_unaligned_line: + /* Prefetch another line. */ + { prefetch r3; addi r15, r1, 60; addi r3, r3, 64 } + /* Fire off a load of the last word we are about to copy. */ + { lw_na r15, r15; slt_u r8, r3, r18 } + + { mvz r3, r8, r1; wh64 r0 } + + /* This loop runs twice. + + On entry: + - r17 is even before the first iteration, and odd before + the second. It is incremented inside the loop. Encountering + an even value at the end of the loop makes it stop. */ +.Lcopy_half_an_unaligned_line: + { + /* Stall until the last byte is ready. In the steady state this + guarantees all words to load below will be in the L2 cache, which + avoids shunting the loads to the RTF. */ + move zero, r15 + lwadd_na r7, r1, 16 + } + { lwadd_na r11, r1, 12 } + { lwadd_na r14, r1, -24 } + { lwadd_na r8, r1, 4 } + { lwadd_na r9, r1, 4 } + { + lwadd_na r10, r1, 8 + /* r16 = (r2 < 64), after we subtract 32 from r2 below. */ + slti_u r16, r2, 64 + 32 + } + { lwadd_na r12, r1, 4; addi r17, r17, 1 } + { lwadd_na r13, r1, 8; dword_align r6, r7, r1 } + { swadd r0, r6, 4; dword_align r7, r8, r1 } + { swadd r0, r7, 4; dword_align r8, r9, r1 } + { swadd r0, r8, 4; dword_align r9, r10, r1 } + { swadd r0, r9, 4; dword_align r10, r11, r1 } + { swadd r0, r10, 4; dword_align r11, r12, r1 } + { swadd r0, r11, 4; dword_align r12, r13, r1 } + { swadd r0, r12, 4; dword_align r13, r14, r1 } + { swadd r0, r13, 4; addi r2, r2, -32 } + { move r6, r14; bbst r17, .Lcopy_half_an_unaligned_line } + + { bzt r16, .Lcopy_unaligned_line; move r7, r6 } + + /* On entry: + - r0 is the next store address. + - r1 points 4 bytes past the load address corresponding to r0. + - r2 >= 0 (# of bytes left to store). + - r7 is the next aligned src word value. */ +.Lcleanup_unaligned_words: + /* Handle any trailing bytes. */ + { bz r2, .Lcopy_unaligned_done; slti_u r8, r2, 4 } + { bzt r8, .Lcopy_unaligned_src_words; move r6, r7 } + + /* Move r1 back to the point where it corresponds to r0. */ + { addi r1, r1, -4 } + + /* Fall through */ + +/* 1 byte at a time copy handler. */ + +.Lcopy_unaligned_few: + { lb_u r3, r1; addi r1, r1, 1 } + { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } + { bnzt r2, .Lcopy_unaligned_few } + +.Lcopy_unaligned_done: + + { move r0, r23; jrp lr } + +END (__memcpy) + +weak_alias (__memcpy, memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/tile/tilepro/memset.c b/REORG.TODO/sysdeps/tile/tilepro/memset.c new file mode 100644 index 0000000000..aaeaacaacc --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/memset.c @@ -0,0 +1,151 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> +#include <arch/chip.h> + +void * inhibit_loop_to_libcall +__memset (void *s, int c, size_t n) +{ + uint32_t *out32; + int n32; + uint32_t v16, v32; + uint8_t *out8 = s; + int to_align32; + + /* Experimentation shows that a trivial tight loop is a win up until + around a size of 20, where writing a word at a time starts to win. */ +#define BYTE_CUTOFF 20 + +#if BYTE_CUTOFF < 3 + /* This must be at least at least this big, or some code later + on doesn't work. */ +# error "BYTE_CUTOFF is too small." +#endif + + if (n < BYTE_CUTOFF) + { + /* Strangely, this turns out to be the tightest way to write + this loop. */ + if (n != 0) + { + do + { + /* Strangely, combining these into one line performs worse. */ + *out8 = c; + out8++; + } + while (--n != 0); + } + + return s; + } + + /* Align 'out8'. We know n >= 3 so this won't write past the end. */ + while (((uintptr_t) out8 & 3) != 0) + { + *out8++ = c; + --n; + } + + /* Align 'n'. */ + while (n & 3) + out8[--n] = c; + + out32 = (uint32_t *) out8; + n32 = n >> 2; + + /* Tile input byte out to 32 bits. */ + v16 = __insn_intlb (c, c); + v32 = __insn_intlh (v16, v16); + + /* This must be at least 8 or the following loop doesn't work. */ +#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4) + + /* Determine how many words we need to emit before the 'out32' + pointer becomes aligned modulo the cache line size. */ + to_align32 = (-((uintptr_t) out32 >> 2)) & (CACHE_LINE_SIZE_IN_WORDS - 1); + + /* Only bother aligning and using wh64 if there is at least one full + cache line to process. This check also prevents overrunning the + end of the buffer with alignment words. */ + if (to_align32 <= n32 - CACHE_LINE_SIZE_IN_WORDS) + { + int lines_left; + + /* Align out32 mod the cache line size so we can use wh64. */ + n32 -= to_align32; + for (; to_align32 != 0; to_align32--) + { + *out32 = v32; + out32++; + } + + /* Use unsigned divide to turn this into a right shift. */ + lines_left = (unsigned) n32 / CACHE_LINE_SIZE_IN_WORDS; + + do + { + /* Only wh64 a few lines at a time, so we don't exceed the + maximum number of victim lines. */ + int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS ())? lines_left + : CHIP_MAX_OUTSTANDING_VICTIMS ()); + uint32_t *wh = out32; + int i = x; + int j; + + lines_left -= x; + + do + { + __insn_wh64 (wh); + wh += CACHE_LINE_SIZE_IN_WORDS; + } + while (--i); + + for (j = x * (CACHE_LINE_SIZE_IN_WORDS / 4); j != 0; j--) + { + *out32++ = v32; + *out32++ = v32; + *out32++ = v32; + *out32++ = v32; + } + } + while (lines_left != 0); + + /* We processed all full lines above, so only this many words + remain to be processed. */ + n32 &= CACHE_LINE_SIZE_IN_WORDS - 1; + } + + /* Now handle any leftover values. */ + if (n32 != 0) + { + do + { + *out32 = v32; + out32++; + } + while (--n32 != 0); + } + + return s; +} +weak_alias (__memset, memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/tile/tilepro/memusage.h b/REORG.TODO/sysdeps/tile/tilepro/memusage.h new file mode 100644 index 0000000000..9ed4600f13 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/memusage.h @@ -0,0 +1,29 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <arch/spr_def.h> + +#define GETSP() ({ register uintptr_t stack_ptr asm ("sp"); stack_ptr; }) + +#define GETTIME(low,high) \ + { \ + low = __insn_mfspr (SPR_CYCLE_LOW); \ + high = __insn_mfspr (SPR_CYCLE_HIGH); \ + } + +#include <sysdeps/generic/memusage.h> diff --git a/REORG.TODO/sysdeps/tile/tilepro/rawmemchr.c b/REORG.TODO/sysdeps/tile/tilepro/rawmemchr.c new file mode 100644 index 0000000000..a5e714407d --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/rawmemchr.c @@ -0,0 +1,45 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> + +void * +__rawmemchr (const void *s, int c) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint32_t *p = (const uint32_t *) (s_int & -4); + + /* Create four copies of the byte for which we are looking. */ + const uint32_t goal = 0x01010101 * (uint8_t) c; + + /* Read the first word, but munge it so that bytes before the array + will not match goal. Note that this shift count expression works + because we know shift counts are taken mod 32. */ + const uint32_t before_mask = (1 << (s_int << 3)) - 1; + uint32_t v = (*p | before_mask) ^ (goal & before_mask); + + uint32_t bits; + while ((bits = __insn_seqb (v, goal)) == 0) + v = *++p; + + return ((char *) p) + (__insn_ctz (bits) >> 3); +} +libc_hidden_def (__rawmemchr) +weak_alias (__rawmemchr, rawmemchr) diff --git a/REORG.TODO/sysdeps/tile/tilepro/strchr.c b/REORG.TODO/sysdeps/tile/tilepro/strchr.c new file mode 100644 index 0000000000..11f8cda418 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/strchr.c @@ -0,0 +1,68 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> + +#undef strchr + +char * +strchr (const char *s, int c) +{ + int z, g; + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint32_t *p = (const uint32_t *) (s_int & -4); + + /* Create four copies of the byte for which we are looking. */ + const uint32_t goal = 0x01010101 * (uint8_t) c; + + /* Read the first aligned word, but force bytes before the string to + match neither zero nor goal (we make sure the high bit of each + byte is 1, and the low 7 bits are all the opposite of the goal + byte). Note that this shift count expression works because we + know shift counts are taken mod 32. */ + const uint32_t before_mask = (1 << (s_int << 3)) - 1; + uint32_t v = (*p | before_mask) ^ (goal & __insn_shrib (before_mask, 1)); + + uint32_t zero_matches, goal_matches; + while (1) + { + /* Look for a terminating '\0'. */ + zero_matches = __insn_seqb (v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_seqb (v, goal); + + if (__builtin_expect ((zero_matches | goal_matches) != 0, 0)) + break; + + v = *++p; + } + + z = __insn_ctz (zero_matches); + g = __insn_ctz (goal_matches); + + /* If we found c before '\0' we got a match. Note that if c == '\0' + then g == z, and we correctly return the address of the '\0' + rather than NULL. */ + return (g <= z) ? ((char *) p) + (g >> 3) : NULL; +} +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/tile/tilepro/strchrnul.c b/REORG.TODO/sysdeps/tile/tilepro/strchrnul.c new file mode 100644 index 0000000000..3220ad7970 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/strchrnul.c @@ -0,0 +1,65 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> + +char * +__strchrnul (const char *s, int c) +{ + int z, g; + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint32_t *p = (const uint32_t *) (s_int & -4); + + /* Create four copies of the byte for which we are looking. */ + const uint32_t goal = 0x01010101 * (uint8_t) c; + + /* Read the first aligned word, but force bytes before the string to + match neither zero nor goal (we make sure the high bit of each + byte is 1, and the low 7 bits are all the opposite of the goal + byte). Note that this shift count expression works because we + know shift counts are taken mod 32. */ + const uint32_t before_mask = (1 << (s_int << 3)) - 1; + uint32_t v = (*p | before_mask) ^ (goal & __insn_shrib (before_mask, 1)); + + uint32_t zero_matches, goal_matches; + while (1) + { + /* Look for a terminating '\0'. */ + zero_matches = __insn_seqb (v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_seqb (v, goal); + + if (__builtin_expect ((zero_matches | goal_matches) != 0, 0)) + break; + + v = *++p; + } + + z = __insn_ctz (zero_matches); + g = __insn_ctz (goal_matches); + + /* Return a pointer to the NUL or goal, whichever is first. */ + if (z < g) + g = z; + return ((char *) p) + (g >> 3); +} +weak_alias (__strchrnul, strchrnul) diff --git a/REORG.TODO/sysdeps/tile/tilepro/strlen.c b/REORG.TODO/sysdeps/tile/tilepro/strlen.c new file mode 100644 index 0000000000..adad3d80e4 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/strlen.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> + +size_t +strlen (const char *s) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint32_t *p = (const uint32_t *) (s_int & -4); + + /* Read the first word, but force bytes before the string to be nonzero. + This expression works because we know shift counts are taken mod 32. */ + uint32_t v = *p | ((1 << (s_int << 3)) - 1); + + uint32_t bits; + while ((bits = __insn_seqb (v, 0)) == 0) + v = *++p; + + return ((const char *) p) + (__insn_ctz (bits) >> 3) - s; +} +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/tile/tilepro/strrchr.c b/REORG.TODO/sysdeps/tile/tilepro/strrchr.c new file mode 100644 index 0000000000..b30d259773 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tilepro/strrchr.c @@ -0,0 +1,73 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdint.h> + +char * +strrchr (const char *s, int c) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint32_t *p = (const uint32_t *) (s_int & -4); + + /* Create four copies of the byte for which we are looking. */ + const uint32_t goal = 0x01010101 * (uint8_t) c; + + /* Read the first aligned word, but force bytes before the string to + match neither zero nor goal (we make sure the high bit of each + byte is 1, and the low 7 bits are all the opposite of the goal + byte). Note that this shift count expression works because we + know shift counts are taken mod 32. */ + const uint32_t before_mask = (1 << (s_int << 3)) - 1; + uint32_t v = (*p | before_mask) ^ (goal & __insn_shrib (before_mask, 1)); + const char *found = NULL; + uint32_t zero_matches, goal_matches; + while (1) + { + /* Look for a terminating '\0'. */ + zero_matches = __insn_seqb (v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_seqb (v, goal); + + /* If we found the goal, record the last offset. */ + if (__builtin_expect (goal_matches != 0, 0)) + { + if (__builtin_expect (zero_matches != 0, 0)) + { + /* Clear any goal after the first zero. */ + int first_nul = __insn_ctz (zero_matches); + /* The number of top bits we need to clear is + 32 - (first_nul + 8). */ + int shift_amt = (24 - first_nul); + goal_matches <<= shift_amt; + goal_matches >>= shift_amt; + } + if (__builtin_expect (goal_matches != 0, 1)) + found = ((char *) p) + 3 - (__insn_clz (goal_matches) >> 3); + } + + if (__builtin_expect (zero_matches != 0, 0)) + return (char *) found; + + v = *++p; + } +} +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/tile/tls-macros.h b/REORG.TODO/sysdeps/tile/tls-macros.h new file mode 100644 index 0000000000..878358a731 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tls-macros.h @@ -0,0 +1,84 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef __tilegx__ +#define TLS_GD_OFFSET(x) \ + "moveli r0, hw1_last_tls_gd(" #x ")\n\t" \ + "shl16insli r0, r0, hw0_tls_gd(" #x ")\n\t" \ + "addi r0, %1, tls_add(" #x ")\n\t" +#else +#define TLS_GD_OFFSET(x) \ + "auli r0, %1, tls_gd_ha16(" #x ")\n\t" \ + "addli r0, r0, tls_gd_lo16(" #x ")\n\t" +#endif + +#define TLS_GD(x) \ + ({ \ + int *__retval; \ + extern char _GLOBAL_OFFSET_TABLE_[]; \ + \ + asm (TLS_GD_OFFSET(x) \ + "jal tls_gd_call(" #x ")\n\t" \ + "addi %0, r0, tls_gd_add(" #x ")" : \ + "=&r" (__retval) : "r" (_GLOBAL_OFFSET_TABLE_) : \ + "r0", "r25", "r26", "r27", "r28", "r29"); \ + __retval; }) + +/* No special support for LD mode. */ +#define TLS_LD TLS_GD + +#ifdef __tilegx__ +#define TLS_IE_OFFSET(x) \ + "moveli %0, hw1_last_tls_ie(" #x ")\n\t" \ + "shl16insli %0, %0, hw0_tls_ie(" #x ")\n\t" \ + "addi %0, %1, tls_add(" #x ")\n\t" +#define LD_TLS "ld_tls" +#else +#define TLS_IE_OFFSET(x) \ + "auli %0, %1, tls_ie_ha16(" #x ")\n\t" \ + "addli %0, %0, tls_ie_lo16(" #x ")\n\t" +#define LD_TLS "lw_tls" +#endif + +#define TLS_IE(x) \ + ({ \ + int *__retval; \ + extern char _GLOBAL_OFFSET_TABLE_[]; \ + \ + asm (TLS_IE_OFFSET(x) \ + LD_TLS " %0, %0, tls_ie_load(" #x ")\n\t" \ + "add %0, %0, tp" : \ + "=&r" (__retval) : "r" (_GLOBAL_OFFSET_TABLE_)); \ + __retval; }) + +#ifdef __tilegx__ +#define _TLS_LE(x) \ + "moveli %0, hw1_last_tls_le(" #x ")\n\t" \ + "shl16insli %0, %0, hw0_tls_le(" #x ")\n\t" \ + "add %0, %0, tp" +#else +#define _TLS_LE(x) \ + "auli %0, tp, tls_le_ha16(" #x ")\n\t" \ + "addli %0, %0, tls_le_lo16(" #x ")\n\t" +#endif + +#define TLS_LE(x) \ + ({ \ + int *__retval; \ + asm (_TLS_LE(x) : "=r" (__retval)); \ + __retval; }) diff --git a/REORG.TODO/sysdeps/tile/tst-audit.h b/REORG.TODO/sysdeps/tile/tst-audit.h new file mode 100644 index 0000000000..4adb73c8a2 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/tst-audit.h @@ -0,0 +1,23 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define pltenter la_tile_gnu_pltenter +#define pltexit la_tile_gnu_pltexit +#define La_regs La_tile_regs +#define La_retval La_tile_retval +#define int_retval lrv_reg[0] diff --git a/REORG.TODO/sysdeps/tile/wordcopy.c b/REORG.TODO/sysdeps/tile/wordcopy.c new file mode 100644 index 0000000000..2f93e5a5d1 --- /dev/null +++ b/REORG.TODO/sysdeps/tile/wordcopy.c @@ -0,0 +1,437 @@ +/* wordcopy.c -- subroutines for memory copy functions. Tile version. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* To optimize for tile, we make the following changes from the + default glibc version: + - Use the double align instruction instead of the MERGE macro. + - Since we don't have offset addressing mode, make sure the loads / + stores in the inner loop always have indices of 0. + - Use post-increment addresses in the inner loops, which yields + better scheduling. */ + +/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ + +#include <stddef.h> +#include <memcopy.h> + +/* Provide the appropriate dblalign builtin to shift two registers + based on the alignment of a pointer held in a third register. */ +#ifdef __tilegx__ +#define DBLALIGN __insn_dblalign +#else +#define DBLALIGN __insn_dword_align +#endif + +/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ + +void +_wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + switch (len % 8) + { + case 2: + a0 = ((op_t *) srcp)[0]; + srcp += OPSIZ; + len += 6; + goto do1; + case 3: + a1 = ((op_t *) srcp)[0]; + srcp += OPSIZ; + len += 5; + goto do2; + case 4: + a0 = ((op_t *) srcp)[0]; + srcp += OPSIZ; + len += 4; + goto do3; + case 5: + a1 = ((op_t *) srcp)[0]; + srcp += OPSIZ; + len += 3; + goto do4; + case 6: + a0 = ((op_t *) srcp)[0]; + srcp += OPSIZ; + len += 2; + goto do5; + case 7: + a1 = ((op_t *) srcp)[0]; + srcp += OPSIZ; + len += 1; + goto do6; + + case 0: + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + return; + a0 = ((op_t *) srcp)[0]; + srcp += OPSIZ; + goto do7; + case 1: + a1 = ((op_t *) srcp)[0]; + srcp += OPSIZ; + len -= 1; + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + goto do0; + goto do8; /* No-op. */ + } + + do + { + do8: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; + srcp += OPSIZ; + dstp += OPSIZ; + do7: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + srcp += OPSIZ; + dstp += OPSIZ; + do6: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; + srcp += OPSIZ; + dstp += OPSIZ; + do5: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + srcp += OPSIZ; + dstp += OPSIZ; + do4: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; + srcp += OPSIZ; + dstp += OPSIZ; + do3: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + srcp += OPSIZ; + dstp += OPSIZ; + do2: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; + srcp += OPSIZ; + dstp += OPSIZ; + do1: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + srcp += OPSIZ; + dstp += OPSIZ; + + len -= 8; + } + while (len != 0); + + /* This is the right position for do0. Please don't move + it into the loop. */ + do0: + ((op_t *) dstp)[0] = a1; +} + +/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + DSTP should be aligned for memory operations on `op_t's, but SRCP must + *not* be aligned. */ + +void +_wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len) +{ + void * srci; + op_t a0, a1, a2, a3; + + /* Save the initial source pointer so we know the number of bytes to + shift for merging two unaligned results. */ + srci = (void *) srcp; + + /* Make SRCP aligned by rounding it down to the beginning of the `op_t' + it points in the middle of. */ + srcp &= -OPSIZ; + + switch (len % 4) + { + case 2: + a1 = ((op_t *) srcp)[0]; + a2 = ((op_t *) srcp)[1]; + len += 2; + srcp += 2 * OPSIZ; + goto do1; + case 3: + a0 = ((op_t *) srcp)[0]; + a1 = ((op_t *) srcp)[1]; + len += 1; + srcp += 2 * OPSIZ; + goto do2; + case 0: + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + return; + a3 = ((op_t *) srcp)[0]; + a0 = ((op_t *) srcp)[1]; + len += 0; + srcp += 2 * OPSIZ; + goto do3; + case 1: + a2 = ((op_t *) srcp)[0]; + a3 = ((op_t *) srcp)[1]; + srcp += 2 * OPSIZ; + len -= 1; + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + goto do0; + goto do4; /* No-op. */ + } + + do + { + do4: + a0 = ((op_t *) srcp)[0]; + a2 = DBLALIGN (a2, a3, srci); + ((op_t *) dstp)[0] = a2; + srcp += OPSIZ; + dstp += OPSIZ; + do3: + a1 = ((op_t *) srcp)[0]; + a3 = DBLALIGN (a3, a0, srci); + ((op_t *) dstp)[0] = a3; + srcp += OPSIZ; + dstp += OPSIZ; + do2: + a2 = ((op_t *) srcp)[0]; + a0 = DBLALIGN (a0, a1, srci); + ((op_t *) dstp)[0] = a0; + srcp += OPSIZ; + dstp += OPSIZ; + do1: + a3 = ((op_t *) srcp)[0]; + a1 = DBLALIGN (a1, a2, srci); + ((op_t *) dstp)[0] = a1; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 4; + } + while (len != 0); + + /* This is the right position for do0. Please don't move + it into the loop. */ + do0: + ((op_t *) dstp)[0] = DBLALIGN (a2, a3, srci); +} + +/* _wordcopy_bwd_aligned -- Copy block finishing right before + SRCP to block finishing right before DSTP with LEN `op_t' words + (not LEN bytes!). Both SRCP and DSTP should be aligned for memory + operations on `op_t's. */ + +void +_wordcopy_bwd_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + long int srcp1; + + srcp1 = srcp - 1 * OPSIZ; + srcp -= 2 * OPSIZ; + dstp -= 1 * OPSIZ; + + switch (len % 8) + { + case 2: + a0 = ((op_t *) srcp1)[0]; + len += 6; + goto do1; + case 3: + a1 = ((op_t *) srcp1)[0]; + len += 5; + goto do2; + case 4: + a0 = ((op_t *) srcp1)[0]; + len += 4; + goto do3; + case 5: + a1 = ((op_t *) srcp1)[0]; + len += 3; + goto do4; + case 6: + a0 = ((op_t *) srcp1)[0]; + len += 2; + goto do5; + case 7: + a1 = ((op_t *) srcp1)[0]; + len += 1; + goto do6; + + case 0: + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + return; + a0 = ((op_t *) srcp1)[0]; + goto do7; + case 1: + a1 = ((op_t *) srcp1)[0]; + len -= 1; + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + goto do0; + goto do8; /* No-op. */ + } + + do + { + do8: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; + srcp -= OPSIZ; + dstp -= OPSIZ; + do7: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + srcp -= OPSIZ; + dstp -= OPSIZ; + do6: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; + srcp -= OPSIZ; + dstp -= OPSIZ; + do5: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + srcp -= OPSIZ; + dstp -= OPSIZ; + do4: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; + srcp -= OPSIZ; + dstp -= OPSIZ; + do3: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + srcp -= OPSIZ; + dstp -= OPSIZ; + do2: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; + srcp -= OPSIZ; + dstp -= OPSIZ; + do1: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + srcp -= OPSIZ; + dstp -= OPSIZ; + + len -= 8; + } + while (len != 0); + + /* This is the right position for do0. Please don't move + it into the loop. */ + do0: + ((op_t *) dstp)[0] = a1; +} + +/* _wordcopy_bwd_dest_aligned -- Copy block finishing right + before SRCP to block finishing right before DSTP with LEN `op_t' + words (not LEN bytes!). DSTP should be aligned for memory + operations on `op_t', but SRCP must *not* be aligned. */ + +void +_wordcopy_bwd_dest_aligned (long int dstp, long int srcp, size_t len) +{ + void * srci; + op_t a0, a1, a2, a3; + op_t b0, b1, b2, b3; + + /* Save the initial source pointer so we know the number of bytes to + shift for merging two unaligned results. */ + srci = (void *) srcp; + + /* Make SRCP aligned by rounding it down to the beginning of the op_t + it points in the middle of. */ + srcp &= -OPSIZ; + srcp += OPSIZ; + + switch (len % 4) + { + case 2: + srcp -= 3 * OPSIZ; + dstp -= 1 * OPSIZ; + b2 = ((op_t *) srcp)[2]; + b1 = a1 = ((op_t *) srcp)[1]; + len += 2; + goto do1; + case 3: + srcp -= 3 * OPSIZ; + dstp -= 1 * OPSIZ; + b3 = ((op_t *) srcp)[2]; + b2 = a2 = ((op_t *) srcp)[1]; + len += 1; + goto do2; + case 0: + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + return; + srcp -= 3 * OPSIZ; + dstp -= 1 * OPSIZ; + b0 = ((op_t *) srcp)[2]; + b3 = a3 = ((op_t *) srcp)[1]; + goto do3; + case 1: + srcp -= 3 * OPSIZ; + dstp -= 1 * OPSIZ; + b1 = ((op_t *) srcp)[2]; + b0 = a0 = ((op_t *) srcp)[1]; + len -= 1; + if (OP_T_THRES <= 3 * OPSIZ && len == 0) + goto do0; + goto do4; /* No-op. */ + } + + do + { + do4: + b3 = a3 = ((op_t *) srcp)[0]; + a0 = DBLALIGN (a0, b1, srci); + ((op_t *) dstp)[0] = a0; + srcp -= OPSIZ; + dstp -= OPSIZ; + do3: + b2 = a2 = ((op_t *) srcp)[0]; + a3 = DBLALIGN (a3, b0, srci); + ((op_t *) dstp)[0] = a3; + srcp -= OPSIZ; + dstp -= OPSIZ; + do2: + b1 = a1 = ((op_t *) srcp)[0]; + a2 = DBLALIGN (a2, b3, srci); + ((op_t *) dstp)[0] = a2; + srcp -= OPSIZ; + dstp -= OPSIZ; + do1: + b0 = a0 = ((op_t *) srcp)[0]; + a1 = DBLALIGN (a1, b2, srci); + ((op_t *) dstp)[0] = a1; + srcp -= OPSIZ; + dstp -= OPSIZ; + + len -= 4; + } + while (len != 0); + + /* This is the right position for do0. Please don't move + it into the loop. */ + do0: + a0 = DBLALIGN (a0, b1, srci); + ((op_t *) dstp)[0] = a0; +} |