diff options
Diffstat (limited to 'sysdeps/sparc/sparc64/multiarch')
28 files changed, 0 insertions, 3292 deletions
diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile deleted file mode 100644 index 55b757f9ad..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -ifeq ($(subdir),crypt) -libcrypt-sysdep_routines += md5-crop sha256-crop sha512-crop -endif - -ifeq ($(subdir),locale) -localedef-aux += md5-crop -endif - -ifeq ($(subdir),string) -sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ - memset-niagara1 memcpy-niagara4 memset-niagara4 -endif - -ifeq ($(subdir),stdlib) -sysdep_routines += mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 sub_n-vis3 -endif - -ifeq ($(subdir),math) -gmp-sysdep_routines = mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 \ - sub_n-vis3 -endif diff --git a/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S b/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S deleted file mode 100644 index c038bcbd6e..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S +++ /dev/null @@ -1,67 +0,0 @@ -! SPARC v9 64-bit VIS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and -! store sum in a third limb vector. -! -! Copyright (C) 2013-2017 Free Software Foundation, Inc. -! This file is part of the GNU C Library. -! Contributed by David S. Miller <davem@davemloft.net> -! -! The GNU C Library is free software; you can redistribute it and/or -! modify it under the terms of the GNU Lesser General Public -! License as published by the Free Software Foundation; either -! version 2.1 of the License, or (at your option) any later version. -! -! The GNU C Library is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -! Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public -! License along with the GNU C Library; if not, see -! <http://www.gnu.org/licenses/>. - -#include <sysdep.h> - -#define res_ptr %o0 -#define s1_ptr %o1 -#define s2_ptr %o2 -#define sz %o3 -#define tmp1 %g1 -#define tmp2 %g2 -#define tmp3 %g3 -#define tmp4 %o4 - - .register %g2,#scratch - .register %g3,#scratch -ENTRY(__mpn_add_n_vis3) - subcc sz, 1, sz - be .Lfinal_limb - cmp %g0, 0 - -.Lloop: - ldx [s2_ptr + 0x00], tmp1 - add s2_ptr, 0x10, s2_ptr - ldx [s1_ptr + 0x00], tmp2 - add s1_ptr, 0x10, s1_ptr - ldx [s2_ptr - 0x08], tmp3 - add res_ptr, 0x10, res_ptr - ldx [s1_ptr - 0x08], tmp4 - sub sz, 2, sz - addxccc tmp1, tmp2, tmp1 - stx tmp1, [res_ptr - 0x10] - addxccc tmp3, tmp4, tmp3 - brgz sz, .Lloop - stx tmp3, [res_ptr - 0x08] - - brlz,pt sz, .Lfinish - nop - -.Lfinal_limb: - ldx [s2_ptr + 0x00], tmp1 - ldx [s1_ptr + 0x00], tmp2 - addxccc tmp1, tmp2, tmp1 - stx tmp1, [res_ptr + 0x00] - -.Lfinish: - retl - addxc %g0, %g0, %o0 -END(__mpn_add_n_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/add_n.S b/sysdeps/sparc/sparc64/multiarch/add_n.S deleted file mode 100644 index 9ffaf7865b..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/add_n.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of add_n - - Copyright (C) 2013-2017 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_add_n) - .type __mpn_add_n, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_add_n_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_add_n_vis3), %o1 -# else - set __mpn_add_n_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_add_n_generic), %o1 - xor %o1, %gdop_lox10(__mpn_add_n_generic), %o1 -# else - set __mpn_add_n_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_add_n) - -#define __mpn_add_n __mpn_add_n_generic -#include "../add_n.S" diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S deleted file mode 100644 index 64671f5079..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S +++ /dev/null @@ -1,87 +0,0 @@ -! SPARC v9 64-bit VIS3 __mpn_addmul_1 -- Multiply a limb vector with a -! limb and add the result to a second limb vector. -! -! Copyright (C) 2013-2017 Free Software Foundation, Inc. -! This file is part of the GNU C Library. -! Contributed by David S. Miller <davem@davemloft.net> -! -! The GNU C Library is free software; you can redistribute it and/or -! modify it under the terms of the GNU Lesser General Public -! License as published by the Free Software Foundation; either -! version 2.1 of the License, or (at your option) any later version. -! -! The GNU C Library is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -! Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public -! License along with the GNU C Library; if not, see -! <http://www.gnu.org/licenses/>. - -#include <sysdep.h> - -#define res_ptr %i0 -#define s1_ptr %i1 -#define sz %i2 -#define s2_limb %i3 -#define carry %o5 -#define tmp1 %g1 -#define tmp2 %g2 -#define tmp3 %g3 -#define tmp4 %o4 -#define tmp5 %l0 -#define tmp6 %l1 -#define tmp7 %l2 -#define tmp8 %l3 - - .register %g2,#scratch - .register %g3,#scratch -ENTRY(__mpn_addmul_1_vis3) - save %sp, -176, %sp - subcc sz, 1, sz - be .Lfinal_limb - clr carry - -.Lloop: - ldx [s1_ptr + 0x00], tmp1 - ldx [res_ptr + 0x00], tmp3 - ldx [s1_ptr + 0x08], tmp2 - ldx [res_ptr + 0x08], tmp4 - mulx tmp1, s2_limb, tmp5 - add s1_ptr, 0x10, s1_ptr - umulxhi tmp1, s2_limb, tmp6 - add res_ptr, 0x10, res_ptr - mulx tmp2, s2_limb, tmp7 - sub sz, 2, sz - umulxhi tmp2, s2_limb, tmp8 - addcc carry, tmp5, tmp5 - addxc %g0, tmp6, carry - addcc tmp3, tmp5, tmp5 - addxc %g0, carry, carry - stx tmp5, [res_ptr - 0x10] - addcc carry, tmp7, tmp7 - addxc %g0, tmp8, carry - addcc tmp4, tmp7, tmp7 - addxc %g0, carry, carry - brgz sz, .Lloop - stx tmp7, [res_ptr - 0x08] - - brlz,pt sz, .Lfinish - nop - -.Lfinal_limb: - ldx [s1_ptr + 0x00], tmp1 - ldx [res_ptr + 0x00], tmp3 - mulx tmp1, s2_limb, tmp5 - umulxhi tmp1, s2_limb, tmp6 - addcc carry, tmp5, tmp5 - addxc %g0, tmp6, carry - addcc tmp3, tmp5, tmp5 - addxc %g0, carry, carry - stx tmp5, [res_ptr + 0x00] - -.Lfinish: - jmpl %i7 + 8, %g0 - restore carry, 0, %o0 -END(__mpn_addmul_1_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1.S b/sysdeps/sparc/sparc64/multiarch/addmul_1.S deleted file mode 100644 index dcb1da184c..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/addmul_1.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of addmul_1 - - Copyright (C) 2013-2017 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_addmul_1) - .type __mpn_addmul_1, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_addmul_1_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_addmul_1_vis3), %o1 -# else - set __mpn_addmul_1_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_addmul_1_generic), %o1 - xor %o1, %gdop_lox10(__mpn_addmul_1_generic), %o1 -# else - set __mpn_addmul_1_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_addmul_1) - -#define __mpn_addmul_1 __mpn_addmul_1_generic -#include "../addmul_1.S" diff --git a/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c b/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c deleted file mode 100644 index a97bc455a8..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c +++ /dev/null @@ -1,75 +0,0 @@ -/* Enumerate available IFUNC implementations of a function. sparc version. - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <assert.h> -#include <string.h> -#include <wchar.h> -#include <ldsodefs.h> -#include <sysdep.h> -#include <ifunc-impl-list.h> - -/* Fill ARRAY of MAX elements with IFUNC implementations for function - NAME and return the number of valid entries. */ - -size_t -__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - size_t max) -{ - size_t i = 0; - int hwcap; - - hwcap = GLRO(dl_hwcap); - - IFUNC_IMPL (i, name, memcpy, - IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_CRYPTO, - __memcpy_niagara4) - IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_N2, - __memcpy_niagara2) - IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_BLKINIT, - __memcpy_niagara1) - IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_ULTRA3, - __memcpy_ultra3) - IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ultra1)); - - IFUNC_IMPL (i, name, mempcpy, - IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_CRYPTO, - __mempcpy_niagara4) - IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_N2, - __mempcpy_niagara2) - IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_BLKINIT, - __mempcpy_niagara1) - IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_ULTRA3, - __mempcpy_ultra3) - IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ultra1)); - - IFUNC_IMPL (i, name, bzero, - IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_CRYPTO, - __bzero_niagara4) - IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_BLKINIT, - __bzero_niagara1) - IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ultra1)); - - IFUNC_IMPL (i, name, memset, - IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_CRYPTO, - __memset_niagara4) - IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_BLKINIT, - __memset_niagara1) - IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ultra1)); - - return i; -} diff --git a/sysdeps/sparc/sparc64/multiarch/md5-block.c b/sysdeps/sparc/sparc64/multiarch/md5-block.c deleted file mode 100644 index 7c1a3a368f..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/md5-block.c +++ /dev/null @@ -1,29 +0,0 @@ -#include <sparc-ifunc.h> - -#define __md5_process_block __md5_process_block_generic -extern void __md5_process_block_generic (const void *buffer, size_t len, - struct md5_ctx *ctx); - -#include <crypt/md5-block.c> - -#undef __md5_process_block - -extern void __md5_process_block_crop (const void *buffer, size_t len, - struct md5_ctx *ctx); -static bool cpu_supports_md5(int hwcap) -{ - unsigned long cfr; - - if (!(hwcap & HWCAP_SPARC_CRYPTO)) - return false; - - __asm__ ("rd %%asr26, %0" : "=r" (cfr)); - if (cfr & (1 << 4)) - return true; - - return false; -} - -extern void __md5_process_block (const void *buffer, size_t len, - struct md5_ctx *ctx); -sparc_libc_ifunc(__md5_process_block, cpu_supports_md5(hwcap) ? __md5_process_block_crop : __md5_process_block_generic); diff --git a/sysdeps/sparc/sparc64/multiarch/md5-crop.S b/sysdeps/sparc/sparc64/multiarch/md5-crop.S deleted file mode 100644 index e8810da83e..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/md5-crop.S +++ /dev/null @@ -1,110 +0,0 @@ -/* MD5 using sparc crypto opcodes. - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@davemloft.net) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define ASI_PL 0x88 - -#define MD5 \ - .word 0x81b02800; - - .text - .align 32 -ENTRY(__md5_process_block_crop) - /* %o0=buffer, %o1=len, %o2=CTX */ - ld [%o2 + 0x10], %g1 - add %g1, %o1, %o4 - st %o4, [%o2 + 0x10] - clr %o5 - cmp %o4, %g1 - movlu %icc, 1, %o5 -#ifdef __arch64__ - srlx %o1, 32, %o4 - add %o5, %o4, %o5 -#endif - ld [%o2 + 0x14], %o4 - add %o4, %o5, %o4 - st %o4, [%o2 + 0x14] - lda [%o2] ASI_PL, %f0 - add %o2, 0x4, %g1 - lda [%g1] ASI_PL, %f1 - add %o2, 0x8, %g1 - andcc %o0, 0x7, %g0 - lda [%g1] ASI_PL, %f2 - add %o2, 0xc, %g1 - bne,pn %xcc, 10f - lda [%g1] ASI_PL, %f3 - -1: - ldd [%o0 + 0x00], %f8 - ldd [%o0 + 0x08], %f10 - ldd [%o0 + 0x10], %f12 - ldd [%o0 + 0x18], %f14 - ldd [%o0 + 0x20], %f16 - ldd [%o0 + 0x28], %f18 - ldd [%o0 + 0x30], %f20 - ldd [%o0 + 0x38], %f22 - - MD5 - - subcc %o1, 64, %o1 - bne,pt %xcc, 1b - add %o0, 0x40, %o0 - -5: - sta %f0, [%o2] ASI_PL - add %o2, 0x4, %g1 - sta %f1, [%g1] ASI_PL - add %o2, 0x8, %g1 - sta %f2, [%g1] ASI_PL - add %o2, 0xc, %g1 - retl - sta %f3, [%g1] ASI_PL -10: - alignaddr %o0, %g0, %o0 - - ldd [%o0 + 0x00], %f10 -1: - ldd [%o0 + 0x08], %f12 - ldd [%o0 + 0x10], %f14 - ldd [%o0 + 0x18], %f16 - ldd [%o0 + 0x20], %f18 - ldd [%o0 + 0x28], %f20 - ldd [%o0 + 0x30], %f22 - ldd [%o0 + 0x38], %f24 - ldd [%o0 + 0x40], %f26 - - faligndata %f10, %f12, %f8 - faligndata %f12, %f14, %f10 - faligndata %f14, %f16, %f12 - faligndata %f16, %f18, %f14 - faligndata %f18, %f20, %f16 - faligndata %f20, %f22, %f18 - faligndata %f22, %f24, %f20 - faligndata %f24, %f26, %f22 - - MD5 - - subcc %o1, 64, %o1 - fsrc2 %f26, %f10 - bne,pt %xcc, 1b - add %o0, 0x40, %o0 - - ba,a,pt %xcc, 5b -END(__md5_process_block_crop) diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S deleted file mode 100644 index ccf42446e8..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S +++ /dev/null @@ -1,347 +0,0 @@ -/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara. - Copyright (C) 2006-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@davemloft.net) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 -#define ASI_P 0x80 -#define ASI_PNF 0x82 - -#define LOAD(type,addr,dest) type##a [addr] ASI_P, dest -#define LOAD_TWIN(addr_reg,dest0,dest1) \ - ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0 - -#define STORE(type,src,addr) type src, [addr] -#define STORE_INIT(src,addr) stxa src, [addr] %asi - -#ifndef XCC -#define USE_BPR -#define XCC xcc -#endif - -#if IS_IN (libc) - - .register %g2,#scratch - .register %g3,#scratch - .register %g6,#scratch - - .text - -ENTRY(__mempcpy_niagara1) - ba,pt %XCC, 101f - add %o0, %o2, %g5 -END(__mempcpy_niagara1) - - .align 32 -ENTRY(__memcpy_niagara1) -100: /* %o0=dst, %o1=src, %o2=len */ - mov %o0, %g5 -101: -# ifndef USE_BPR - srl %o2, 0, %o2 -# endif - cmp %o2, 0 - be,pn %XCC, 85f -218: or %o0, %o1, %o3 - cmp %o2, 16 - blu,a,pn %XCC, 80f - or %o3, %o2, %o3 - - /* 2 blocks (128 bytes) is the minimum we can do the block - * copy with. We need to ensure that we'll iterate at least - * once in the block copy loop. At worst we'll need to align - * the destination to a 64-byte boundary which can chew up - * to (64 - 1) bytes from the length before we perform the - * block copy loop. - */ - cmp %o2, (2 * 64) - blu,pt %XCC, 70f - andcc %o3, 0x7, %g0 - - /* %o0: dst - * %o1: src - * %o2: len (known to be >= 128) - * - * The block copy loops will use %o4/%o5,%g2/%g3 as - * temporaries while copying the data. - */ - - LOAD(prefetch, %o1, #one_read) - wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi - - /* Align destination on 64-byte boundary. */ - andcc %o0, (64 - 1), %o4 - be,pt %XCC, 2f - sub %o4, 64, %o4 - sub %g0, %o4, %o4 ! bytes to align dst - sub %o2, %o4, %o2 -1: subcc %o4, 1, %o4 - LOAD(ldub, %o1, %g1) - STORE(stb, %g1, %o0) - add %o1, 1, %o1 - bne,pt %XCC, 1b - add %o0, 1, %o0 - - /* If the source is on a 16-byte boundary we can do - * the direct block copy loop. If it is 8-byte aligned - * we can do the 16-byte loads offset by -8 bytes and the - * init stores offset by one register. - * - * If the source is not even 8-byte aligned, we need to do - * shifting and masking (basically integer faligndata). - * - * The careful bit with init stores is that if we store - * to any part of the cache line we have to store the whole - * cacheline else we can end up with corrupt L2 cache line - * contents. Since the loop works on 64-bytes of 64-byte - * aligned store data at a time, this is easy to ensure. - */ -2: - andcc %o1, (16 - 1), %o4 - andn %o2, (64 - 1), %g1 ! block copy loop iterator - sub %o2, %g1, %o2 ! final sub-block copy bytes - be,pt %XCC, 50f - cmp %o4, 8 - be,a,pt %XCC, 10f - sub %o1, 0x8, %o1 - - /* Neither 8-byte nor 16-byte aligned, shift and mask. */ - mov %g1, %o4 - and %o1, 0x7, %g1 - sll %g1, 3, %g1 - mov 64, %o3 - andn %o1, 0x7, %o1 - LOAD(ldx, %o1, %g2) - sub %o3, %g1, %o3 - sllx %g2, %g1, %g2 - -#define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\ - LOAD(ldx, SRC, TMP1); \ - srlx TMP1, PRE_SHIFT, TMP2; \ - or TMP2, PRE_VAL, TMP2; \ - STORE_INIT(TMP2, DST); \ - sllx TMP1, POST_SHIFT, PRE_VAL; - -1: add %o1, 0x8, %o1 - SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00) - add %o1, 0x8, %o1 - SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08) - add %o1, 0x8, %o1 - SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10) - add %o1, 0x8, %o1 - SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18) - add %o1, 32, %o1 - LOAD(prefetch, %o1, #one_read) - sub %o1, 32 - 8, %o1 - SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20) - add %o1, 8, %o1 - SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28) - add %o1, 8, %o1 - SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30) - add %o1, 8, %o1 - SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38) - subcc %o4, 64, %o4 - bne,pt %XCC, 1b - add %o0, 64, %o0 - -#undef SWIVEL_ONE_DWORD - - srl %g1, 3, %g1 - ba,pt %XCC, 60f - add %o1, %g1, %o1 - -10: /* Destination is 64-byte aligned, source was only 8-byte - * aligned but it has been subtracted by 8 and we perform - * one twin load ahead, then add 8 back into source when - * we finish the loop. - */ - LOAD_TWIN(%o1, %o4, %o5) -1: add %o1, 16, %o1 - LOAD_TWIN(%o1, %g2, %g3) - add %o1, 16 + 32, %o1 - LOAD(prefetch, %o1, #one_read) - sub %o1, 32, %o1 - STORE_INIT(%o5, %o0 + 0x00) ! initializes cache line - STORE_INIT(%g2, %o0 + 0x08) - LOAD_TWIN(%o1, %o4, %o5) - add %o1, 16, %o1 - STORE_INIT(%g3, %o0 + 0x10) - STORE_INIT(%o4, %o0 + 0x18) - LOAD_TWIN(%o1, %g2, %g3) - add %o1, 16, %o1 - STORE_INIT(%o5, %o0 + 0x20) - STORE_INIT(%g2, %o0 + 0x28) - LOAD_TWIN(%o1, %o4, %o5) - STORE_INIT(%g3, %o0 + 0x30) - STORE_INIT(%o4, %o0 + 0x38) - subcc %g1, 64, %g1 - bne,pt %XCC, 1b - add %o0, 64, %o0 - - ba,pt %XCC, 60f - add %o1, 0x8, %o1 - -50: /* Destination is 64-byte aligned, and source is 16-byte - * aligned. - */ -1: LOAD_TWIN(%o1, %o4, %o5) - add %o1, 16, %o1 - LOAD_TWIN(%o1, %g2, %g3) - add %o1, 16 + 32, %o1 - LOAD(prefetch, %o1, #one_read) - sub %o1, 32, %o1 - STORE_INIT(%o4, %o0 + 0x00) ! initializes cache line - STORE_INIT(%o5, %o0 + 0x08) - LOAD_TWIN(%o1, %o4, %o5) - add %o1, 16, %o1 - STORE_INIT(%g2, %o0 + 0x10) - STORE_INIT(%g3, %o0 + 0x18) - LOAD_TWIN(%o1, %g2, %g3) - add %o1, 16, %o1 - STORE_INIT(%o4, %o0 + 0x20) - STORE_INIT(%o5, %o0 + 0x28) - STORE_INIT(%g2, %o0 + 0x30) - STORE_INIT(%g3, %o0 + 0x38) - subcc %g1, 64, %g1 - bne,pt %XCC, 1b - add %o0, 64, %o0 - /* fall through */ - -60: - /* %o2 contains any final bytes still needed to be copied - * over. If anything is left, we copy it one byte at a time. - */ - wr %g0, ASI_PNF, %asi - brz,pt %o2, 85f - sub %o0, %o1, %o3 - ba,a,pt %XCC, 90f - - .align 64 -70: /* 16 < len <= 64 */ - bne,pn %XCC, 75f - sub %o0, %o1, %o3 - -72: - andn %o2, 0xf, %o4 - and %o2, 0xf, %o2 -1: subcc %o4, 0x10, %o4 - LOAD(ldx, %o1, %o5) - add %o1, 0x08, %o1 - LOAD(ldx, %o1, %g1) - sub %o1, 0x08, %o1 - STORE(stx, %o5, %o1 + %o3) - add %o1, 0x8, %o1 - STORE(stx, %g1, %o1 + %o3) - bgu,pt %XCC, 1b - add %o1, 0x8, %o1 -73: andcc %o2, 0x8, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x8, %o2 - LOAD(ldx, %o1, %o5) - STORE(stx, %o5, %o1 + %o3) - add %o1, 0x8, %o1 -1: andcc %o2, 0x4, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x4, %o2 - LOAD(lduw, %o1, %o5) - STORE(stw, %o5, %o1 + %o3) - add %o1, 0x4, %o1 -1: cmp %o2, 0 - be,pt %XCC, 85f - nop - ba,pt %XCC, 90f - nop - -75: - andcc %o0, 0x7, %g1 - sub %g1, 0x8, %g1 - be,pn %icc, 2f - sub %g0, %g1, %g1 - sub %o2, %g1, %o2 - -1: subcc %g1, 1, %g1 - LOAD(ldub, %o1, %o5) - STORE(stb, %o5, %o1 + %o3) - bgu,pt %icc, 1b - add %o1, 1, %o1 - -2: add %o1, %o3, %o0 - andcc %o1, 0x7, %g1 - bne,pt %icc, 8f - sll %g1, 3, %g1 - - cmp %o2, 16 - bgeu,pt %icc, 72b - nop - ba,a,pt %XCC, 73b - -8: mov 64, %o3 - andn %o1, 0x7, %o1 - LOAD(ldx, %o1, %g2) - sub %o3, %g1, %o3 - andn %o2, 0x7, %o4 - sllx %g2, %g1, %g2 -1: add %o1, 0x8, %o1 - LOAD(ldx, %o1, %g3) - subcc %o4, 0x8, %o4 - srlx %g3, %o3, %o5 - or %o5, %g2, %o5 - STORE(stx, %o5, %o0) - add %o0, 0x8, %o0 - bgu,pt %icc, 1b - sllx %g3, %g1, %g2 - - srl %g1, 3, %g1 - andcc %o2, 0x7, %o2 - be,pn %icc, 85f - add %o1, %g1, %o1 - ba,pt %XCC, 90f - sub %o0, %o1, %o3 - - .align 64 -80: /* 0 < len <= 16 */ - andcc %o3, 0x3, %g0 - bne,pn %XCC, 90f - sub %o0, %o1, %o3 - -1: - subcc %o2, 4, %o2 - LOAD(lduw, %o1, %g1) - STORE(stw, %g1, %o1 + %o3) - bgu,pt %XCC, 1b - add %o1, 4, %o1 - -85: retl - mov %g5, %o0 - - .align 32 -90: - subcc %o2, 1, %o2 - LOAD(ldub, %o1, %g1) - STORE(stb, %g1, %o1 + %o3) - bgu,pt %XCC, 90b - add %o1, 1, %o1 - retl - mov %g5, %o0 - -END(__memcpy_niagara1) - -#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S deleted file mode 100644 index 798b3c80fe..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S +++ /dev/null @@ -1,498 +0,0 @@ -/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara-2. - Copyright (C) 2007-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@davemloft.net) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 -#define ASI_BLK_P 0xf0 -#define ASI_P 0x80 -#define ASI_PNF 0x82 - -#define FPRS_FEF 0x04 - -#define VISEntryHalf \ - rd %fprs, %o5; \ - wr %g0, FPRS_FEF, %fprs - -#define VISExitHalf \ - and %o5, FPRS_FEF, %o5; \ - wr %o5, 0x0, %fprs - -#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P - -#define LOAD(type,addr,dest) type [addr], dest -#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest -#define STORE(type,src,addr) type src, [addr] -#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P -#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI - -#ifndef XCC -#define USE_BPR -#define XCC xcc -#endif - -#define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ - faligndata %x0, %x1, %f0; \ - faligndata %x1, %x2, %f2; \ - faligndata %x2, %x3, %f4; \ - faligndata %x3, %x4, %f6; \ - faligndata %x4, %x5, %f8; \ - faligndata %x5, %x6, %f10; \ - faligndata %x6, %x7, %f12; \ - faligndata %x7, %x8, %f14; - -#define FREG_MOVE_1(x0) \ - fsrc2 %x0, %f0; -#define FREG_MOVE_2(x0, x1) \ - fsrc2 %x0, %f0; \ - fsrc2 %x1, %f2; -#define FREG_MOVE_3(x0, x1, x2) \ - fsrc2 %x0, %f0; \ - fsrc2 %x1, %f2; \ - fsrc2 %x2, %f4; -#define FREG_MOVE_4(x0, x1, x2, x3) \ - fsrc2 %x0, %f0; \ - fsrc2 %x1, %f2; \ - fsrc2 %x2, %f4; \ - fsrc2 %x3, %f6; -#define FREG_MOVE_5(x0, x1, x2, x3, x4) \ - fsrc2 %x0, %f0; \ - fsrc2 %x1, %f2; \ - fsrc2 %x2, %f4; \ - fsrc2 %x3, %f6; \ - fsrc2 %x4, %f8; -#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \ - fsrc2 %x0, %f0; \ - fsrc2 %x1, %f2; \ - fsrc2 %x2, %f4; \ - fsrc2 %x3, %f6; \ - fsrc2 %x4, %f8; \ - fsrc2 %x5, %f10; -#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \ - fsrc2 %x0, %f0; \ - fsrc2 %x1, %f2; \ - fsrc2 %x2, %f4; \ - fsrc2 %x3, %f6; \ - fsrc2 %x4, %f8; \ - fsrc2 %x5, %f10; \ - fsrc2 %x6, %f12; -#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \ - fsrc2 %x0, %f0; \ - fsrc2 %x1, %f2; \ - fsrc2 %x2, %f4; \ - fsrc2 %x3, %f6; \ - fsrc2 %x4, %f8; \ - fsrc2 %x5, %f10; \ - fsrc2 %x6, %f12; \ - fsrc2 %x7, %f14; -#define FREG_LOAD_1(base, x0) \ - LOAD(ldd, base + 0x00, %x0) -#define FREG_LOAD_2(base, x0, x1) \ - LOAD(ldd, base + 0x00, %x0); \ - LOAD(ldd, base + 0x08, %x1); -#define FREG_LOAD_3(base, x0, x1, x2) \ - LOAD(ldd, base + 0x00, %x0); \ - LOAD(ldd, base + 0x08, %x1); \ - LOAD(ldd, base + 0x10, %x2); -#define FREG_LOAD_4(base, x0, x1, x2, x3) \ - LOAD(ldd, base + 0x00, %x0); \ - LOAD(ldd, base + 0x08, %x1); \ - LOAD(ldd, base + 0x10, %x2); \ - LOAD(ldd, base + 0x18, %x3); -#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ - LOAD(ldd, base + 0x00, %x0); \ - LOAD(ldd, base + 0x08, %x1); \ - LOAD(ldd, base + 0x10, %x2); \ - LOAD(ldd, base + 0x18, %x3); \ - LOAD(ldd, base + 0x20, %x4); -#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ - LOAD(ldd, base + 0x00, %x0); \ - LOAD(ldd, base + 0x08, %x1); \ - LOAD(ldd, base + 0x10, %x2); \ - LOAD(ldd, base + 0x18, %x3); \ - LOAD(ldd, base + 0x20, %x4); \ - LOAD(ldd, base + 0x28, %x5); -#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ - LOAD(ldd, base + 0x00, %x0); \ - LOAD(ldd, base + 0x08, %x1); \ - LOAD(ldd, base + 0x10, %x2); \ - LOAD(ldd, base + 0x18, %x3); \ - LOAD(ldd, base + 0x20, %x4); \ - LOAD(ldd, base + 0x28, %x5); \ - LOAD(ldd, base + 0x30, %x6); - -#if IS_IN (libc) - - .register %g2,#scratch - .register %g3,#scratch - .register %g6,#scratch - - .text - -ENTRY(__mempcpy_niagara2) - ba,pt %XCC, 101f - add %o0, %o2, %g5 -END(__mempcpy_niagara2) - - .align 32 -ENTRY(__memcpy_niagara2) -100: /* %o0=dst, %o1=src, %o2=len */ - mov %o0, %g5 -101: -# ifndef USE_BPR - srl %o2, 0, %o2 -# endif - cmp %o2, 0 - be,pn %XCC, 85f -218: or %o0, %o1, %o3 - cmp %o2, 16 - blu,a,pn %XCC, 80f - or %o3, %o2, %o3 - - /* 2 blocks (128 bytes) is the minimum we can do the block - * copy with. We need to ensure that we'll iterate at least - * once in the block copy loop. At worst we'll need to align - * the destination to a 64-byte boundary which can chew up - * to (64 - 1) bytes from the length before we perform the - * block copy loop. - * - * However, the cut-off point, performance wise, is around - * 4 64-byte blocks. - */ - cmp %o2, (4 * 64) - blu,pt %XCC, 75f - andcc %o3, 0x7, %g0 - - /* %o0: dst - * %o1: src - * %o2: len (known to be >= 128) - * - * The block copy loops can use %o4, %g2, %g3 as - * temporaries while copying the data. %o5 must - * be preserved between VISEntryHalf and VISExitHalf - */ - - LOAD(prefetch, %o1 + 0x000, #one_read) - LOAD(prefetch, %o1 + 0x040, #one_read) - LOAD(prefetch, %o1 + 0x080, #one_read) - - /* Align destination on 64-byte boundary. */ - andcc %o0, (64 - 1), %o4 - be,pt %XCC, 2f - sub %o4, 64, %o4 - sub %g0, %o4, %o4 ! bytes to align dst - sub %o2, %o4, %o2 -1: subcc %o4, 1, %o4 - LOAD(ldub, %o1, %g1) - STORE(stb, %g1, %o0) - add %o1, 1, %o1 - bne,pt %XCC, 1b - add %o0, 1, %o0 - -2: - /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve - * o5 from here until we hit VISExitHalf. - */ - VISEntryHalf - - membar #Sync - alignaddr %o1, %g0, %g0 - - add %o1, (64 - 1), %o4 - andn %o4, (64 - 1), %o4 - andn %o2, (64 - 1), %g1 - sub %o2, %g1, %o2 - - and %o1, (64 - 1), %g2 - add %o1, %g1, %o1 - sub %o0, %o4, %g3 - brz,pt %g2, 190f - cmp %g2, 32 - blu,a 5f - cmp %g2, 16 - cmp %g2, 48 - blu,a 4f - cmp %g2, 40 - cmp %g2, 56 - blu 170f - nop - ba,a,pt %xcc, 180f - -4: /* 32 <= low bits < 48 */ - blu 150f - nop - ba,a,pt %xcc, 160f -5: /* 0 < low bits < 32 */ - blu,a 6f - cmp %g2, 8 - cmp %g2, 24 - blu 130f - nop - ba,a,pt %xcc, 140f -6: /* 0 < low bits < 16 */ - bgeu 120f - nop - /* fall through for 0 < low bits < 8 */ -110: sub %o4, 64, %g2 - LOAD_BLK(%g2, %f0) -1: STORE_INIT(%g0, %o4 + %g3) - LOAD_BLK(%o4, %f16) - FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) - STORE_BLK(%f0, %o4 + %g3) - FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 - bne,pt %XCC, 1b - LOAD(prefetch, %o4 + 64, #one_read) - ba,pt %xcc, 195f - nop - -120: sub %o4, 56, %g2 - FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) -1: STORE_INIT(%g0, %o4 + %g3) - LOAD_BLK(%o4, %f16) - FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) - STORE_BLK(%f0, %o4 + %g3) - FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 - bne,pt %XCC, 1b - LOAD(prefetch, %o4 + 64, #one_read) - ba,pt %xcc, 195f - nop - -130: sub %o4, 48, %g2 - FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) -1: STORE_INIT(%g0, %o4 + %g3) - LOAD_BLK(%o4, %f16) - FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) - STORE_BLK(%f0, %o4 + %g3) - FREG_MOVE_6(f20, f22, f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 - bne,pt %XCC, 1b - LOAD(prefetch, %o4 + 64, #one_read) - ba,pt %xcc, 195f - nop - -140: sub %o4, 40, %g2 - FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) -1: STORE_INIT(%g0, %o4 + %g3) - LOAD_BLK(%o4, %f16) - FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) - STORE_BLK(%f0, %o4 + %g3) - FREG_MOVE_5(f22, f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 - bne,pt %XCC, 1b - LOAD(prefetch, %o4 + 64, #one_read) - ba,pt %xcc, 195f - nop - -150: sub %o4, 32, %g2 - FREG_LOAD_4(%g2, f0, f2, f4, f6) -1: STORE_INIT(%g0, %o4 + %g3) - LOAD_BLK(%o4, %f16) - FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) - STORE_BLK(%f0, %o4 + %g3) - FREG_MOVE_4(f24, f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 - bne,pt %XCC, 1b - LOAD(prefetch, %o4 + 64, #one_read) - ba,pt %xcc, 195f - nop - -160: sub %o4, 24, %g2 - FREG_LOAD_3(%g2, f0, f2, f4) -1: STORE_INIT(%g0, %o4 + %g3) - LOAD_BLK(%o4, %f16) - FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) - STORE_BLK(%f0, %o4 + %g3) - FREG_MOVE_3(f26, f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 - bne,pt %XCC, 1b - LOAD(prefetch, %o4 + 64, #one_read) - ba,pt %xcc, 195f - nop - -170: sub %o4, 16, %g2 - FREG_LOAD_2(%g2, f0, f2) -1: STORE_INIT(%g0, %o4 + %g3) - LOAD_BLK(%o4, %f16) - FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) - STORE_BLK(%f0, %o4 + %g3) - FREG_MOVE_2(f28, f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 - bne,pt %XCC, 1b - LOAD(prefetch, %o4 + 64, #one_read) - ba,pt %xcc, 195f - nop - -180: sub %o4, 8, %g2 - FREG_LOAD_1(%g2, f0) -1: STORE_INIT(%g0, %o4 + %g3) - LOAD_BLK(%o4, %f16) - FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) - STORE_BLK(%f0, %o4 + %g3) - FREG_MOVE_1(f30) - subcc %g1, 64, %g1 - add %o4, 64, %o4 - bne,pt %XCC, 1b - LOAD(prefetch, %o4 + 64, #one_read) - ba,pt %xcc, 195f - nop - -190: -1: STORE_INIT(%g0, %o4 + %g3) - subcc %g1, 64, %g1 - LOAD_BLK(%o4, %f0) - STORE_BLK(%f0, %o4 + %g3) - add %o4, 64, %o4 - bne,pt %XCC, 1b - LOAD(prefetch, %o4 + 64, #one_read) - -195: - add %o4, %g3, %o0 - membar #Sync - - VISExitHalf - - /* %o2 contains any final bytes still needed to be copied - * over. If anything is left, we copy it one byte at a time. - */ - brz,pt %o2, 85f - sub %o0, %o1, %o3 - ba,a,pt %XCC, 90f - - .align 64 -75: /* 16 < len <= 64 */ - bne,pn %XCC, 75f - sub %o0, %o1, %o3 - -72: - andn %o2, 0xf, %o4 - and %o2, 0xf, %o2 -1: subcc %o4, 0x10, %o4 - LOAD(ldx, %o1, %o5) - add %o1, 0x08, %o1 - LOAD(ldx, %o1, %g1) - sub %o1, 0x08, %o1 - STORE(stx, %o5, %o1 + %o3) - add %o1, 0x8, %o1 - STORE(stx, %g1, %o1 + %o3) - bgu,pt %XCC, 1b - add %o1, 0x8, %o1 -73: andcc %o2, 0x8, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x8, %o2 - LOAD(ldx, %o1, %o5) - STORE(stx, %o5, %o1 + %o3) - add %o1, 0x8, %o1 -1: andcc %o2, 0x4, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x4, %o2 - LOAD(lduw, %o1, %o5) - STORE(stw, %o5, %o1 + %o3) - add %o1, 0x4, %o1 -1: cmp %o2, 0 - be,pt %XCC, 85f - nop - ba,pt %xcc, 90f - nop - -75: - andcc %o0, 0x7, %g1 - sub %g1, 0x8, %g1 - be,pn %icc, 2f - sub %g0, %g1, %g1 - sub %o2, %g1, %o2 - -1: subcc %g1, 1, %g1 - LOAD(ldub, %o1, %o5) - STORE(stb, %o5, %o1 + %o3) - bgu,pt %icc, 1b - add %o1, 1, %o1 - -2: add %o1, %o3, %o0 - andcc %o1, 0x7, %g1 - bne,pt %icc, 8f - sll %g1, 3, %g1 - - cmp %o2, 16 - bgeu,pt %icc, 72b - nop - ba,a,pt %xcc, 73b - -8: mov 64, %o3 - andn %o1, 0x7, %o1 - LOAD(ldx, %o1, %g2) - sub %o3, %g1, %o3 - andn %o2, 0x7, %o4 - sllx %g2, %g1, %g2 -1: add %o1, 0x8, %o1 - LOAD(ldx, %o1, %g3) - subcc %o4, 0x8, %o4 - srlx %g3, %o3, %o5 - or %o5, %g2, %o5 - STORE(stx, %o5, %o0) - add %o0, 0x8, %o0 - bgu,pt %icc, 1b - sllx %g3, %g1, %g2 - - srl %g1, 3, %g1 - andcc %o2, 0x7, %o2 - be,pn %icc, 85f - add %o1, %g1, %o1 - ba,pt %xcc, 90f - sub %o0, %o1, %o3 - - .align 64 -80: /* 0 < len <= 16 */ - andcc %o3, 0x3, %g0 - bne,pn %XCC, 90f - sub %o0, %o1, %o3 - -1: - subcc %o2, 4, %o2 - LOAD(lduw, %o1, %g1) - STORE(stw, %g1, %o1 + %o3) - bgu,pt %XCC, 1b - add %o1, 4, %o1 - -85: retl - mov %g5, %o0 - - .align 32 -90: - subcc %o2, 1, %o2 - LOAD(ldub, %o1, %g1) - STORE(stb, %g1, %o1 + %o3) - bgu,pt %XCC, 90b - add %o1, 1, %o1 - retl - mov %g5, %o0 - -END(__memcpy_niagara2) - -#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S deleted file mode 100644 index 709b398364..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S +++ /dev/null @@ -1,332 +0,0 @@ -/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara-4. - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@davemloft.net) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 - -#define FPRS_FEF 0x04 - -/* On T4 it is very expensive to access ASRs like %fprs and - * %asi, avoiding a read or a write can save ~50 cycles. - */ -#define FPU_ENTER \ - rd %fprs, %o5; \ - andcc %o5, FPRS_FEF, %g0; \ - be,a,pn %icc, 999f; \ - wr %g0, FPRS_FEF, %fprs; \ - 999: - -#define VISEntryHalf FPU_ENTER -#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs - -#define GLOBAL_SPARE %g5 - -#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P -#define EX_LD(x) x -#define EX_ST(x) x -#define EX_RETVAL(x) x -#define LOAD(type,addr,dest) type [addr], dest -#define STORE(type,src,addr) type src, [addr] -#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI - -#if IS_IN (libc) - - .register %g2,#scratch - .register %g3,#scratch - .register %g6,#scratch - - .text - -ENTRY(__mempcpy_niagara4) - ba,pt %icc, 101f - add %o0, %o2, %o3 -END(__mempcpy_niagara4) - - .align 32 -ENTRY(__memcpy_niagara4) -100: /* %o0=dst, %o1=src, %o2=len */ - mov %o0, %o3 -101: -#ifndef __arch64__ - srl %o2, 0, %o2 -#endif - brz,pn %o2, .Lexit - cmp %o2, 3 - ble,pn %icc, .Ltiny - cmp %o2, 19 - ble,pn %icc, .Lsmall - or %o0, %o1, %g2 - cmp %o2, 128 - bl,pn %icc, .Lmedium - nop - -.Llarge:/* len >= 0x80 */ - /* First get dest 8 byte aligned. */ - sub %g0, %o0, %g1 - and %g1, 0x7, %g1 - brz,pt %g1, 51f - sub %o2, %g1, %o2 - -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) - add %o1, 1, %o1 - subcc %g1, 1, %g1 - add %o0, 1, %o0 - bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) - -51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) - LOAD(prefetch, %o1 + 0x080, #n_reads_strong) - LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) - LOAD(prefetch, %o1 + 0x100, #n_reads_strong) - LOAD(prefetch, %o1 + 0x140, #n_reads_strong) - LOAD(prefetch, %o1 + 0x180, #n_reads_strong) - LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) - LOAD(prefetch, %o1 + 0x200, #n_reads_strong) - - /* Check if we can use the straight fully aligned - * loop, or we require the alignaddr/faligndata variant. - */ - andcc %o1, 0x7, %o5 - bne,pn %icc, .Llarge_src_unaligned - sub %g0, %o0, %g1 - - /* Legitimize the use of initializing stores by getting dest - * to be 64-byte aligned. - */ - and %g1, 0x3f, %g1 - brz,pt %g1, .Llarge_aligned - sub %o2, %g1, %o2 - -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) - add %o1, 8, %o1 - subcc %g1, 8, %g1 - add %o0, 8, %o0 - bne,pt %icc, 1b - EX_ST(STORE(stx, %g2, %o0 - 0x08)) - -.Llarge_aligned: - /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ - andn %o2, 0x3f, %o4 - sub %o2, %o4, %o2 - -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) - add %o1, 0x40, %o1 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) - subcc %o4, 0x40, %o4 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) - EX_ST(STORE_INIT(%g1, %o0)) - add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) - add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) - EX_ST(STORE_INIT(%g3, %o0)) - add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) - add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) - EX_ST(STORE_INIT(%o5, %o0)) - add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) - add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g3, %o0)) - add %o0, 0x08, %o0 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) - add %o0, 0x08, %o0 - bne,pt %icc, 1b - LOAD(prefetch, %o1 + 0x200, #n_reads_strong) - - membar #StoreLoad | #StoreStore - - brz,pn %o2, .Lexit - cmp %o2, 19 - ble,pn %icc, .Lsmall_unaligned - nop - ba,a,pt %icc, .Lmedium_noprefetch - -.Lexit: retl - mov EX_RETVAL(%o3), %o0 - -.Llarge_src_unaligned: - andn %o2, 0x3f, %o4 - sub %o2, %o4, %o2 - VISEntryHalf - alignaddr %o1, %g0, %g1 - add %o1, %o4, %o1 - EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) -1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) - subcc %o4, 0x40, %o4 - EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) - EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) - EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) - EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) - EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) - EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) - faligndata %f0, %f2, %f16 - EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) - faligndata %f2, %f4, %f18 - add %g1, 0x40, %g1 - faligndata %f4, %f6, %f20 - faligndata %f6, %f8, %f22 - faligndata %f8, %f10, %f24 - faligndata %f10, %f12, %f26 - faligndata %f12, %f14, %f28 - faligndata %f14, %f0, %f30 - EX_ST(STORE(std, %f16, %o0 + 0x00)) - EX_ST(STORE(std, %f18, %o0 + 0x08)) - EX_ST(STORE(std, %f20, %o0 + 0x10)) - EX_ST(STORE(std, %f22, %o0 + 0x18)) - EX_ST(STORE(std, %f24, %o0 + 0x20)) - EX_ST(STORE(std, %f26, %o0 + 0x28)) - EX_ST(STORE(std, %f28, %o0 + 0x30)) - EX_ST(STORE(std, %f30, %o0 + 0x38)) - add %o0, 0x40, %o0 - bne,pt %icc, 1b - LOAD(prefetch, %g1 + 0x200, #n_reads_strong) - VISExitHalf - - brz,pn %o2, .Lexit - cmp %o2, 19 - ble,pn %icc, .Lsmall_unaligned - nop - ba,a,pt %icc, .Lmedium_unaligned - -.Lmedium: - LOAD(prefetch, %o1 + 0x40, #n_reads_strong) - andcc %g2, 0x7, %g0 - bne,pn %icc, .Lmedium_unaligned - nop -.Lmedium_noprefetch: - andncc %o2, 0x20 - 1, %o5 - be,pn %icc, 2f - sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) - add %o1, 0x20, %o1 - subcc %o5, 0x20, %o5 - EX_ST(STORE(stx, %g1, %o0 + 0x00)) - EX_ST(STORE(stx, %g2, %o0 + 0x08)) - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) - EX_ST(STORE(stx, %o4, %o0 + 0x18)) - bne,pt %icc, 1b - add %o0, 0x20, %o0 -2: andcc %o2, 0x18, %o5 - be,pt %icc, 3f - sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) - add %o1, 0x08, %o1 - add %o0, 0x08, %o0 - subcc %o5, 0x08, %o5 - bne,pt %icc, 1b - EX_ST(STORE(stx, %g1, %o0 - 0x08)) -3: brz,pt %o2, .Lexit - cmp %o2, 0x04 - bl,pn %icc, .Ltiny - nop - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) - add %o1, 0x04, %o1 - add %o0, 0x04, %o0 - subcc %o2, 0x04, %o2 - bne,pn %icc, .Ltiny - EX_ST(STORE(stw, %g1, %o0 - 0x04)) - ba,a,pt %icc, .Lexit -.Lmedium_unaligned: - /* First get dest 8 byte aligned. */ - sub %g0, %o0, %g1 - and %g1, 0x7, %g1 - brz,pt %g1, 2f - sub %o2, %g1, %o2 - -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) - add %o1, 1, %o1 - subcc %g1, 1, %g1 - add %o0, 1, %o0 - bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) -2: - and %o1, 0x7, %g1 - brz,pn %g1, .Lmedium_noprefetch - sll %g1, 3, %g1 - mov 64, %g2 - sub %g2, %g1, %g2 - andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) - sllx %o4, %g1, %o4 - andn %o2, 0x08 - 1, %o5 - sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) - add %o1, 0x08, %o1 - subcc %o5, 0x08, %o5 - srlx %g3, %g2, GLOBAL_SPARE - or GLOBAL_SPARE, %o4, GLOBAL_SPARE - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) - add %o0, 0x08, %o0 - bne,pt %icc, 1b - sllx %g3, %g1, %o4 - srl %g1, 3, %g1 - add %o1, %g1, %o1 - brz,pn %o2, .Lexit - nop - ba,pt %icc, .Lsmall_unaligned - -.Ltiny: - EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) - subcc %o2, 1, %o2 - be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x00)) - EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) - subcc %o2, 1, %o2 - be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x01)) - EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) - ba,pt %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x02)) - -.Lsmall: - andcc %g2, 0x3, %g0 - bne,pn %icc, .Lsmall_unaligned - andn %o2, 0x4 - 1, %o5 - sub %o2, %o5, %o2 -1: - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) - add %o1, 0x04, %o1 - subcc %o5, 0x04, %o5 - add %o0, 0x04, %o0 - bne,pt %icc, 1b - EX_ST(STORE(stw, %g1, %o0 - 0x04)) - brz,pt %o2, .Lexit - nop - ba,a,pt %icc, .Ltiny - -.Lsmall_unaligned: -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) - add %o1, 1, %o1 - add %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %icc, 1b - EX_ST(STORE(stb, %g1, %o0 - 0x01)) - ba,a,pt %icc, .Lexit -END(__memcpy_niagara4) - -#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S deleted file mode 100644 index b8f5c3cb8f..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S +++ /dev/null @@ -1,325 +0,0 @@ -/* Copy SIZE bytes from SRC to DEST. - For UltraSPARC-III. - Copyright (C) 2001-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@redhat.com) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define ASI_BLK_P 0xf0 -#define FPRS_FEF 0x04 -#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs -#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs - -#ifndef XCC -#define USE_BPR -#define XCC xcc -#endif - -#if IS_IN (libc) - - .register %g2,#scratch - .register %g3,#scratch - .register %g6,#scratch - - .text - -ENTRY(__mempcpy_ultra3) - ba,pt %XCC, 101f - add %o0, %o2, %g5 -END(__mempcpy_ultra3) - - /* Special/non-trivial issues of this code: - * - * 1) %o5 is preserved from VISEntryHalf to VISExitHalf - * 2) Only low 32 FPU registers are used so that only the - * lower half of the FPU register set is dirtied by this - * code. This is especially important in the kernel. - * 3) This code never prefetches cachelines past the end - * of the source buffer. - * - * The cheetah's flexible spine, oversized liver, enlarged heart, - * slender muscular body, and claws make it the swiftest hunter - * in Africa and the fastest animal on land. Can reach speeds - * of up to 2.4GB per second. - */ - .align 32 -ENTRY(__memcpy_ultra3) - -100: /* %o0=dst, %o1=src, %o2=len */ - mov %o0, %g5 -101: - cmp %o2, 0 - be,pn %XCC, out -218: or %o0, %o1, %o3 - cmp %o2, 16 - bleu,a,pn %XCC, small_copy - or %o3, %o2, %o3 - - cmp %o2, 256 - blu,pt %XCC, medium_copy - andcc %o3, 0x7, %g0 - - ba,pt %xcc, enter - andcc %o0, 0x3f, %g2 - - /* Here len >= 256 and condition codes reflect execution - * of "andcc %o0, 0x7, %g2", done by caller. - */ - .align 64 -enter: - /* Is 'dst' already aligned on an 64-byte boundary? */ - be,pt %XCC, 2f - - /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number - * of bytes to copy to make 'dst' 64-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x40, %g2 - sub %g0, %g2, %g2 - sub %o2, %g2, %o2 - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: ldub [%o1 + 0x00], %o3 - add %o1, 0x1, %o1 - add %o0, 0x1, %o0 - subcc %g2, 0x1, %g2 - - bg,pt %XCC, 1b - stb %o3, [%o0 + -1] - -2: VISEntryHalf - and %o1, 0x7, %g1 - ba,pt %xcc, begin - alignaddr %o1, %g0, %o1 - - .align 64 -begin: - prefetch [%o1 + 0x000], #one_read - prefetch [%o1 + 0x040], #one_read - andn %o2, (0x40 - 1), %o4 - prefetch [%o1 + 0x080], #one_read - prefetch [%o1 + 0x0c0], #one_read - ldd [%o1 + 0x000], %f0 - prefetch [%o1 + 0x100], #one_read - ldd [%o1 + 0x008], %f2 - prefetch [%o1 + 0x140], #one_read - ldd [%o1 + 0x010], %f4 - prefetch [%o1 + 0x180], #one_read - faligndata %f0, %f2, %f16 - ldd [%o1 + 0x018], %f6 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x020], %f8 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x028], %f10 - faligndata %f6, %f8, %f22 - - ldd [%o1 + 0x030], %f12 - faligndata %f8, %f10, %f24 - ldd [%o1 + 0x038], %f14 - faligndata %f10, %f12, %f26 - ldd [%o1 + 0x040], %f0 - - sub %o4, 0x80, %o4 - add %o1, 0x40, %o1 - ba,pt %xcc, loop - srl %o4, 6, %o3 - - .align 64 -loop: - ldd [%o1 + 0x008], %f2 - faligndata %f12, %f14, %f28 - ldd [%o1 + 0x010], %f4 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - ldd [%o1 + 0x018], %f6 - faligndata %f0, %f2, %f16 - - ldd [%o1 + 0x020], %f8 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x028], %f10 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x030], %f12 - faligndata %f6, %f8, %f22 - ldd [%o1 + 0x038], %f14 - faligndata %f8, %f10, %f24 - - ldd [%o1 + 0x040], %f0 - prefetch [%o1 + 0x180], #one_read - faligndata %f10, %f12, %f26 - subcc %o3, 0x01, %o3 - add %o1, 0x40, %o1 - bg,pt %XCC, loop - add %o0, 0x40, %o0 - - /* Finally we copy the last full 64-byte block. */ -loopfini: - ldd [%o1 + 0x008], %f2 - faligndata %f12, %f14, %f28 - ldd [%o1 + 0x010], %f4 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - ldd [%o1 + 0x018], %f6 - faligndata %f0, %f2, %f16 - ldd [%o1 + 0x020], %f8 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x028], %f10 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x030], %f12 - faligndata %f6, %f8, %f22 - ldd [%o1 + 0x038], %f14 - faligndata %f8, %f10, %f24 - cmp %g1, 0 - be,pt %XCC, 1f - add %o0, 0x40, %o0 - ldd [%o1 + 0x040], %f0 -1: faligndata %f10, %f12, %f26 - faligndata %f12, %f14, %f28 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - add %o0, 0x40, %o0 - add %o1, 0x40, %o1 - membar #Sync - - /* Now we copy the (len modulo 64) bytes at the end. - * Note how we borrow the %f0 loaded above. - * - * Also notice how this code is careful not to perform a - * load past the end of the src buffer. - */ -loopend: - and %o2, 0x3f, %o2 - andcc %o2, 0x38, %g2 - be,pn %XCC, endcruft - subcc %g2, 0x8, %g2 - be,pn %XCC, endcruft - cmp %g1, 0 - - be,a,pt %XCC, 1f - ldd [%o1 + 0x00], %f0 - -1: ldd [%o1 + 0x08], %f2 - add %o1, 0x8, %o1 - sub %o2, 0x8, %o2 - subcc %g2, 0x8, %g2 - faligndata %f0, %f2, %f8 - std %f8, [%o0 + 0x00] - be,pn %XCC, endcruft - add %o0, 0x8, %o0 - ldd [%o1 + 0x08], %f0 - add %o1, 0x8, %o1 - sub %o2, 0x8, %o2 - subcc %g2, 0x8, %g2 - faligndata %f2, %f0, %f8 - std %f8, [%o0 + 0x00] - bne,pn %XCC, 1b - add %o0, 0x8, %o0 - - /* If anything is left, we copy it one byte at a time. - * Note that %g1 is (src & 0x3) saved above before the - * alignaddr was performed. - */ -endcruft: - cmp %o2, 0 - add %o1, %g1, %o1 - VISExitHalf - be,pn %XCC, out - sub %o0, %o1, %o3 - - andcc %g1, 0x7, %g0 - bne,pn %icc, small_copy_unaligned - andcc %o2, 0x8, %g0 - be,pt %icc, 1f - nop - ldx [%o1], %o5 - stx %o5, [%o1 + %o3] - add %o1, 0x8, %o1 - -1: andcc %o2, 0x4, %g0 - be,pt %icc, 1f - nop - lduw [%o1], %o5 - stw %o5, [%o1 + %o3] - add %o1, 0x4, %o1 - -1: andcc %o2, 0x2, %g0 - be,pt %icc, 1f - nop - lduh [%o1], %o5 - sth %o5, [%o1 + %o3] - add %o1, 0x2, %o1 - -1: andcc %o2, 0x1, %g0 - be,pt %icc, out - nop - ldub [%o1], %o5 - ba,pt %xcc, out - stb %o5, [%o1 + %o3] - -medium_copy: /* 16 < len <= 64 */ - bne,pn %XCC, small_copy_unaligned - sub %o0, %o1, %o3 - -medium_copy_aligned: - andn %o2, 0x7, %o4 - and %o2, 0x7, %o2 -1: subcc %o4, 0x8, %o4 - ldx [%o1], %o5 - stx %o5, [%o1 + %o3] - bgu,pt %XCC, 1b - add %o1, 0x8, %o1 - andcc %o2, 0x4, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x4, %o2 - lduw [%o1], %o5 - stw %o5, [%o1 + %o3] - add %o1, 0x4, %o1 -1: cmp %o2, 0 - be,pt %XCC, out - nop - ba,pt %xcc, small_copy_unaligned - nop - -small_copy: /* 0 < len <= 16 */ - andcc %o3, 0x3, %g0 - bne,pn %XCC, small_copy_unaligned - sub %o0, %o1, %o3 - -small_copy_aligned: - subcc %o2, 4, %o2 - lduw [%o1], %g1 - stw %g1, [%o1 + %o3] - bgu,pt %XCC, small_copy_aligned - add %o1, 4, %o1 - -out: retl - mov %g5, %o0 - - .align 32 -small_copy_unaligned: - subcc %o2, 1, %o2 - ldub [%o1], %g1 - stb %g1, [%o1 + %o3] - bgu,pt %XCC, small_copy_unaligned - add %o1, 1, %o1 - retl - mov %g5, %o0 - -END(__memcpy_ultra3) - -#endif \ No newline at end of file diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy.S deleted file mode 100644 index b6396eeae5..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memcpy.S +++ /dev/null @@ -1,167 +0,0 @@ -/* Multiple versions of memcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#if IS_IN (libc) - .text -ENTRY(memcpy) - .type memcpy, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_CRYPTO, %o1 - andcc %o0, %o1, %g0 - be 1f - andcc %o0, HWCAP_SPARC_N2, %g0 -# ifdef SHARED - sethi %gdop_hix22(__memcpy_niagara4), %o1 - xor %o1, %gdop_lox10(__memcpy_niagara4), %o1 -# else - set __memcpy_niagara4, %o1 -# endif - ba 10f - nop -1: be 1f - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 -# ifdef SHARED - sethi %gdop_hix22(__memcpy_niagara2), %o1 - xor %o1, %gdop_lox10(__memcpy_niagara2), %o1 -# else - set __memcpy_niagara2, %o1 -# endif - ba 10f - nop -1: be 1f - andcc %o0, HWCAP_SPARC_ULTRA3, %g0 -# ifdef SHARED - sethi %gdop_hix22(__memcpy_niagara1), %o1 - xor %o1, %gdop_lox10(__memcpy_niagara1), %o1 -# else - set __memcpy_niagara1, %o1 -# endif - ba 10f - nop -1: be 9f - nop -# ifdef SHARED - sethi %gdop_hix22(__memcpy_ultra3), %o1 - xor %o1, %gdop_lox10(__memcpy_ultra3), %o1 -# else - set __memcpy_ultra3, %o1 -# endif - ba 10f - nop -9: -# ifdef SHARED - sethi %gdop_hix22(__memcpy_ultra1), %o1 - xor %o1, %gdop_lox10(__memcpy_ultra1), %o1 -# else - set __memcpy_ultra1, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(memcpy) - -ENTRY(__mempcpy) - .type __mempcpy, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_CRYPTO, %o1 - andcc %o0, %o1, %g0 - be 1f - andcc %o0, HWCAP_SPARC_N2, %g0 -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_niagara4), %o1 - xor %o1, %gdop_lox10(__mempcpy_niagara4), %o1 -# else - set __mempcpy_niagara4, %o1 -# endif - ba 10f - nop -1: be 1f - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_niagara2), %o1 - xor %o1, %gdop_lox10(__mempcpy_niagara2), %o1 -# else - set __mempcpy_niagara2, %o1 -# endif - ba 10f - nop -1: be 1f - andcc %o0, HWCAP_SPARC_ULTRA3, %g0 -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_niagara1), %o1 - xor %o1, %gdop_lox10(__mempcpy_niagara1), %o1 -# else - set __mempcpy_niagara1, %o1 -# endif - ba 10f - nop -1: be 9f - nop -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_ultra3), %o1 - xor %o1, %gdop_lox10(__mempcpy_ultra3), %o1 -# else - set __mempcpy_ultra3, %o1 -# endif - ba 10f - nop -9: -# ifdef SHARED - sethi %gdop_hix22(__mempcpy_ultra1), %o1 - xor %o1, %gdop_lox10(__mempcpy_ultra1), %o1 -# else - set __mempcpy_ultra1, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mempcpy) - -libc_hidden_builtin_def (memcpy) - -libc_hidden_def (__mempcpy) -weak_alias (__mempcpy, mempcpy) -libc_hidden_builtin_def (mempcpy) - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) -#undef weak_alias -#define weak_alias(x, y) -#undef libc_hidden_def -#define libc_hidden_def(name) - -#define memcpy __memcpy_ultra1 -#define __mempcpy __mempcpy_ultra1 - -#endif - -#include "../memcpy.S" diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S deleted file mode 100644 index 45b2251691..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S +++ /dev/null @@ -1,177 +0,0 @@ -/* Set a block of memory to some byte value. For SUN4V Niagara. - Copyright (C) 2006-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@davemloft.net) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 -#define ASI_P 0x80 -#define ASI_PNF 0x82 - -#ifndef XCC -#define USE_BPR -#define XCC xcc -#endif - -#if IS_IN (libc) - - .register %g2,#scratch - - .text - .align 32 - -ENTRY(__memset_niagara1) - /* %o0=buf, %o1=pat, %o2=len */ - and %o1, 0xff, %o3 - mov %o2, %o1 - sllx %o3, 8, %g1 - or %g1, %o3, %o2 - sllx %o2, 16, %g1 - or %g1, %o2, %o2 - sllx %o2, 32, %g1 - ba,pt %XCC, 1f - or %g1, %o2, %o2 -END(__memset_niagara1) - -ENTRY(__bzero_niagara1) - clr %o2 -1: -# ifndef USE_BRP - srl %o1, 0, %o1 -# endif - brz,pn %o1, 90f - mov %o0, %o3 - - wr %g0, ASI_P, %asi - - cmp %o1, 15 - blu,pn %XCC, 70f - andcc %o0, 0x7, %g1 - be,pt %XCC, 2f - mov 8, %g2 - sub %g2, %g1, %g1 - sub %o1, %g1, %o1 -1: stba %o2, [%o0 + 0x00] %asi - subcc %g1, 1, %g1 - bne,pt %XCC, 1b - add %o0, 1, %o0 -2: cmp %o1, 128 - blu,pn %XCC, 60f - andcc %o0, (64 - 1), %g1 - be,pt %XCC, 40f - mov 64, %g2 - sub %g2, %g1, %g1 - sub %o1, %g1, %o1 -1: stxa %o2, [%o0 + 0x00] %asi - subcc %g1, 8, %g1 - bne,pt %XCC, 1b - add %o0, 8, %o0 - -40: - wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi - andn %o1, (64 - 1), %g1 - sub %o1, %g1, %o1 - - andn %g1, (256 - 1), %g2 - brz,pt %g2, 50f - and %g1, (256 - 1), %g1 - -45: - stxa %o2, [%o0 + 0x00] %asi - stxa %o2, [%o0 + 0x08] %asi - stxa %o2, [%o0 + 0x10] %asi - stxa %o2, [%o0 + 0x18] %asi - stxa %o2, [%o0 + 0x20] %asi - stxa %o2, [%o0 + 0x28] %asi - stxa %o2, [%o0 + 0x30] %asi - stxa %o2, [%o0 + 0x38] %asi - stxa %o2, [%o0 + 0x40] %asi - stxa %o2, [%o0 + 0x48] %asi - stxa %o2, [%o0 + 0x50] %asi - stxa %o2, [%o0 + 0x58] %asi - stxa %o2, [%o0 + 0x60] %asi - stxa %o2, [%o0 + 0x68] %asi - stxa %o2, [%o0 + 0x70] %asi - stxa %o2, [%o0 + 0x78] %asi - stxa %o2, [%o0 + 0x80] %asi - stxa %o2, [%o0 + 0x88] %asi - stxa %o2, [%o0 + 0x90] %asi - stxa %o2, [%o0 + 0x98] %asi - stxa %o2, [%o0 + 0xa0] %asi - stxa %o2, [%o0 + 0xa8] %asi - stxa %o2, [%o0 + 0xb0] %asi - stxa %o2, [%o0 + 0xb8] %asi - stxa %o2, [%o0 + 0xc0] %asi - stxa %o2, [%o0 + 0xc8] %asi - stxa %o2, [%o0 + 0xd0] %asi - stxa %o2, [%o0 + 0xd8] %asi - stxa %o2, [%o0 + 0xe0] %asi - stxa %o2, [%o0 + 0xe8] %asi - stxa %o2, [%o0 + 0xf0] %asi - stxa %o2, [%o0 + 0xf8] %asi - subcc %g2, 256, %g2 - bne,pt %XCC, 45b - add %o0, 256, %o0 - - brz,pn %g1, 55f - nop - -50: - stxa %o2, [%o0 + 0x00] %asi - stxa %o2, [%o0 + 0x08] %asi - stxa %o2, [%o0 + 0x10] %asi - stxa %o2, [%o0 + 0x18] %asi - stxa %o2, [%o0 + 0x20] %asi - stxa %o2, [%o0 + 0x28] %asi - stxa %o2, [%o0 + 0x30] %asi - stxa %o2, [%o0 + 0x38] %asi - subcc %g1, 64, %g1 - bne,pt %XCC, 50b - add %o0, 64, %o0 - -55: - wr %g0, ASI_P, %asi - brz,pn %o1, 80f -60: - andncc %o1, 0x7, %g1 - be,pn %XCC, 2f - sub %o1, %g1, %o1 -1: stxa %o2, [%o0 + 0x00] %asi - subcc %g1, 8, %g1 - bne,pt %XCC, 1b - add %o0, 8, %o0 -2: brz,pt %o1, 80f - nop - -70: -1: stba %o2, [%o0 + 0x00] %asi - subcc %o1, 1, %o1 - bne,pt %XCC, 1b - add %o0, 1, %o0 - - /* fallthrough */ - -80: - wr %g0, ASI_PNF, %asi - -90: - retl - mov %o3, %o0 -END(__bzero_niagara1) - -#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S deleted file mode 100644 index c04a07a7f9..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S +++ /dev/null @@ -1,124 +0,0 @@ -/* Set a block of memory to some byte value. For SUN4V Niagara-4. - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@davemloft.net) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 - -#if IS_IN (libc) - - .register %g2, #scratch - .register %g3, #scratch - - .text - .align 32 - -ENTRY(__memset_niagara4) - andcc %o1, 0xff, %o4 - be,pt %icc, 1f - mov %o2, %o1 - sllx %o4, 8, %g1 - or %g1, %o4, %o2 - sllx %o2, 16, %g1 - or %g1, %o2, %o2 - sllx %o2, 32, %g1 - ba,pt %icc, 1f - or %g1, %o2, %o4 -END(__memset_niagara4) - - .align 32 -ENTRY(__bzero_niagara4) - clr %o4 -1: cmp %o1, 16 - ble %icc, .Ltiny - mov %o0, %o3 - sub %g0, %o0, %g1 - and %g1, 0x7, %g1 - brz,pt %g1, .Laligned8 - sub %o1, %g1, %o1 -1: stb %o4, [%o0 + 0x00] - subcc %g1, 1, %g1 - bne,pt %icc, 1b - add %o0, 1, %o0 -.Laligned8: - cmp %o1, 64 + (64 - 8) - ble .Lmedium - sub %g0, %o0, %g1 - andcc %g1, (64 - 1), %g1 - brz,pn %g1, .Laligned64 - sub %o1, %g1, %o1 -1: stx %o4, [%o0 + 0x00] - subcc %g1, 8, %g1 - bne,pt %icc, 1b - add %o0, 0x8, %o0 -.Laligned64: - andn %o1, 64 - 1, %g1 - sub %o1, %g1, %o1 - brnz,pn %o4, .Lnon_bzero_loop - mov 0x20, %g2 -1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P - subcc %g1, 0x40, %g1 - stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P - bne,pt %icc, 1b - add %o0, 0x40, %o0 -.Lpostloop: - cmp %o1, 8 - bl,pn %icc, .Ltiny - membar #StoreStore|#StoreLoad -.Lmedium: - andn %o1, 0x7, %g1 - sub %o1, %g1, %o1 -1: stx %o4, [%o0 + 0x00] - subcc %g1, 0x8, %g1 - bne,pt %icc, 1b - add %o0, 0x08, %o0 - andcc %o1, 0x4, %g1 - be,pt %icc, .Ltiny - sub %o1, %g1, %o1 - stw %o4, [%o0 + 0x00] - add %o0, 0x4, %o0 -.Ltiny: - cmp %o1, 0 - be,pn %icc, .Lexit -1: subcc %o1, 1, %o1 - stb %o4, [%o0 + 0x00] - bne,pt %icc, 1b - add %o0, 1, %o0 -.Lexit: - retl - mov %o3, %o0 -.Lnon_bzero_loop: - mov 0x08, %g3 - mov 0x28, %o5 -1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P - subcc %g1, 0x40, %g1 - stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P - stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P - stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P - add %o0, 0x10, %o0 - stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P - stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P - stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P - stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P - bne,pt %icc, 1b - add %o0, 0x30, %o0 - ba,a,pt %icc, .Lpostloop -END(__bzero_niagara4) - -#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset.S b/sysdeps/sparc/sparc64/multiarch/memset.S deleted file mode 100644 index 9469d5e7ce..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/memset.S +++ /dev/null @@ -1,124 +0,0 @@ -/* Multiple versions of memset and bzero - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#if IS_IN (libc) - .text -ENTRY(memset) - .type memset, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_CRYPTO, %o1 - andcc %o0, %o1, %g0 - be 1f - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 -# ifdef SHARED - sethi %gdop_hix22(__memset_niagara4), %o1 - xor %o1, %gdop_lox10(__memset_niagara4), %o1 -# else - set __memset_niagara4, %o1 -# endif - ba 10f - nop -1: be 9f - nop -# ifdef SHARED - sethi %gdop_hix22(__memset_niagara1), %o1 - xor %o1, %gdop_lox10(__memset_niagara1), %o1 -# else - set __memset_niagara1, %o1 -# endif - ba 10f - nop -9: -# ifdef SHARED - sethi %gdop_hix22(__memset_ultra1), %o1 - xor %o1, %gdop_lox10(__memset_ultra1), %o1 -# else - set __memset_ultra1, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(memset) - -ENTRY(__bzero) - .type bzero, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_CRYPTO, %o1 - andcc %o0, %o1, %g0 - be 1f - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 -# ifdef SHARED - sethi %gdop_hix22(__bzero_niagara4), %o1 - xor %o1, %gdop_lox10(__bzero_niagara4), %o1 -# else - set __bzero_niagara4, %o1 -# endif - ba 10f - nop -1: be 9f - nop -# ifdef SHARED - sethi %gdop_hix22(__bzero_niagara1), %o1 - xor %o1, %gdop_lox10(__bzero_niagara1), %o1 -# else - set __bzero_niagara1, %o1 -# endif - ba 10f - nop -9: -# ifdef SHARED - sethi %gdop_hix22(__bzero_ultra1), %o1 - xor %o1, %gdop_lox10(__bzero_ultra1), %o1 -# else - set __bzero_ultra1, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__bzero) - -weak_alias (__bzero, bzero) - -# undef weak_alias -# define weak_alias(a, b) - -libc_hidden_builtin_def (memset) - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -#define memset __memset_ultra1 -#define __bzero __bzero_ultra1 - -#endif - -#include "../memset.S" diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S deleted file mode 100644 index 7c4fa49ce4..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S +++ /dev/null @@ -1,73 +0,0 @@ -! SPARC v9 64-bit VIS3 __mpn_mul_1 -- Multiply a limb vector with a single -! limb and store the product in a second limb vector. -! -! Copyright (C) 2013-2017 Free Software Foundation, Inc. -! This file is part of the GNU C Library. -! Contributed by David S. Miller <davem@davemloft.net> -! -! The GNU C Library is free software; you can redistribute it and/or -! modify it under the terms of the GNU Lesser General Public -! License as published by the Free Software Foundation; either -! version 2.1 of the License, or (at your option) any later version. -! -! The GNU C Library is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -! Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public -! License along with the GNU C Library; if not, see -! <http://www.gnu.org/licenses/>. - -#include <sysdep.h> - -#define res_ptr %o0 -#define s1_ptr %o1 -#define sz %o2 -#define s2_limb %o3 -#define carry %o5 -#define tmp1 %g1 -#define tmp2 %g2 -#define tmp3 %g3 -#define tmp4 %o4 - - .register %g2,#scratch - .register %g3,#scratch -ENTRY(__mpn_mul_1_vis3) - subcc sz, 1, sz - be .Lfinal_limb - clr carry - -.Lloop: - ldx [s1_ptr + 0x00], tmp1 - ldx [s1_ptr + 0x08], tmp4 - mulx tmp1, s2_limb, tmp3 - add s1_ptr, 0x10, s1_ptr - umulxhi tmp1, s2_limb, tmp2 - sub sz, 2, sz - mulx tmp4, s2_limb, tmp1 - add res_ptr, 0x10, res_ptr - umulxhi tmp4, s2_limb, tmp4 - addcc carry, tmp3, tmp3 - stx tmp3, [res_ptr - 0x10] - addxc %g0, tmp2, carry - addcc carry, tmp1, tmp1 - addxc %g0, tmp4, carry - brgz sz, .Lloop - stx tmp1, [res_ptr - 0x08] - - brlz,pt sz, .Lfinish - nop - -.Lfinal_limb: - ldx [s1_ptr + 0x00], tmp1 - mulx tmp1, s2_limb, tmp3 - umulxhi tmp1, s2_limb, tmp2 - addcc carry, tmp3, tmp3 - addxc %g0, tmp2, carry - stx tmp3, [res_ptr + 0x00] - -.Lfinish: - retl - mov carry, %o0 -END(__mpn_mul_1_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1.S b/sysdeps/sparc/sparc64/multiarch/mul_1.S deleted file mode 100644 index 75fca932b7..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/mul_1.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of mul_1 - - Copyright (C) 2013-2017 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_mul_1) - .type __mpn_mul_1, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_mul_1_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_mul_1_vis3), %o1 -# else - set __mpn_mul_1_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_mul_1_generic), %o1 - xor %o1, %gdop_lox10(__mpn_mul_1_generic), %o1 -# else - set __mpn_mul_1_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_mul_1) - -#define __mpn_mul_1 __mpn_mul_1_generic -#include "../mul_1.S" diff --git a/sysdeps/sparc/sparc64/multiarch/rtld-memcpy.c b/sysdeps/sparc/sparc64/multiarch/rtld-memcpy.c deleted file mode 100644 index 2452575343..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/rtld-memcpy.c +++ /dev/null @@ -1 +0,0 @@ -#include "../rtld-memcpy.c" diff --git a/sysdeps/sparc/sparc64/multiarch/rtld-memset.c b/sysdeps/sparc/sparc64/multiarch/rtld-memset.c deleted file mode 100644 index c01eb0beb9..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/rtld-memset.c +++ /dev/null @@ -1 +0,0 @@ -#include "../rtld-memset.c" diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-block.c b/sysdeps/sparc/sparc64/multiarch/sha256-block.c deleted file mode 100644 index 9d65315a5a..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/sha256-block.c +++ /dev/null @@ -1,32 +0,0 @@ -#include <sparc-ifunc.h> - -#define __sha256_process_block __sha256_process_block_generic -extern void __sha256_process_block_generic (const void *buffer, size_t len, - struct sha256_ctx *ctx); - -#include <crypt/sha256-block.c> - -#undef __sha256_process_block - -extern void __sha256_process_block_crop (const void *buffer, size_t len, - struct sha256_ctx *ctx); - -static bool cpu_supports_sha256(int hwcap) -{ - unsigned long cfr; - - if (!(hwcap & HWCAP_SPARC_CRYPTO)) - return false; - - __asm__ ("rd %%asr26, %0" : "=r" (cfr)); - if (cfr & (1 << 6)) - return true; - - return false; -} - -extern void __sha256_process_block (const void *buffer, size_t len, - struct sha256_ctx *ctx); -sparc_libc_ifunc (__sha256_process_block, - cpu_supports_sha256(hwcap) ? __sha256_process_block_crop - : __sha256_process_block_generic); diff --git a/sysdeps/sparc/sparc64/multiarch/sha256-crop.S b/sysdeps/sparc/sparc64/multiarch/sha256-crop.S deleted file mode 100644 index 8f07e4245a..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/sha256-crop.S +++ /dev/null @@ -1,101 +0,0 @@ -/* SHA256 using sparc crypto opcodes. - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@davemloft.net) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define SHA256 \ - .word 0x81b02840; - - .text - .align 32 -ENTRY(__sha256_process_block_crop) - /* %o0=buffer, %o1=len, %o2=CTX */ - ldx [%o2 + 0x20], %g1 - add %g1, %o1, %g1 - stx %g1, [%o2 + 0x20] - - ld [%o2 + 0x00], %f0 - ld [%o2 + 0x04], %f1 - ld [%o2 + 0x08], %f2 - ld [%o2 + 0x0c], %f3 - ld [%o2 + 0x10], %f4 - ld [%o2 + 0x14], %f5 - andcc %o1, 0x7, %g0 - ld [%o2 + 0x18], %f6 - bne,pn %xcc, 10f - ld [%o2 + 0x1c], %f7 - -1: - ldd [%o0 + 0x00], %f8 - ldd [%o0 + 0x08], %f10 - ldd [%o0 + 0x10], %f12 - ldd [%o0 + 0x18], %f14 - ldd [%o0 + 0x20], %f16 - ldd [%o0 + 0x28], %f18 - ldd [%o0 + 0x30], %f20 - ldd [%o0 + 0x38], %f22 - - SHA256 - - subcc %o1, 0x40, %o1 - bne,pt %xcc, 1b - add %o0, 0x40, %o0 - -5: - st %f0, [%o2 + 0x00] - st %f1, [%o2 + 0x04] - st %f2, [%o2 + 0x08] - st %f3, [%o2 + 0x0c] - st %f4, [%o2 + 0x10] - st %f5, [%o2 + 0x14] - st %f6, [%o2 + 0x18] - retl - st %f7, [%o2 + 0x1c] -10: - alignaddr %o0, %g0, %o0 - - ldd [%o0 + 0x00], %f10 -1: - ldd [%o0 + 0x08], %f12 - ldd [%o0 + 0x10], %f14 - ldd [%o0 + 0x18], %f16 - ldd [%o0 + 0x20], %f18 - ldd [%o0 + 0x28], %f20 - ldd [%o0 + 0x30], %f22 - ldd [%o0 + 0x38], %f24 - ldd [%o0 + 0x40], %f26 - - faligndata %f10, %f12, %f8 - faligndata %f12, %f14, %f10 - faligndata %f14, %f16, %f12 - faligndata %f16, %f18, %f14 - faligndata %f18, %f20, %f16 - faligndata %f20, %f22, %f18 - faligndata %f22, %f24, %f20 - faligndata %f24, %f26, %f22 - - SHA256 - - subcc %o1, 0x40, %o1 - fsrc2 %f26, %f10 - bne,pt %xcc, 1b - add %o0, 0x40, %o0 - - ba,a,pt %xcc, 5b -END(__sha256_process_block_crop) diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-block.c b/sysdeps/sparc/sparc64/multiarch/sha512-block.c deleted file mode 100644 index 2863e05d09..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/sha512-block.c +++ /dev/null @@ -1,32 +0,0 @@ -#include <sparc-ifunc.h> - -#define __sha512_process_block __sha512_process_block_generic -extern void __sha512_process_block_generic (const void *buffer, size_t len, - struct sha512_ctx *ctx); - -#include <crypt/sha512-block.c> - -#undef __sha512_process_block - -extern void __sha512_process_block_crop (const void *buffer, size_t len, - struct sha512_ctx *ctx); - -static bool cpu_supports_sha512(int hwcap) -{ - unsigned long cfr; - - if (!(hwcap & HWCAP_SPARC_CRYPTO)) - return false; - - __asm__ ("rd %%asr26, %0" : "=r" (cfr)); - if (cfr & (1 << 6)) - return true; - - return false; -} - -extern void __sha512_process_block (const void *buffer, size_t len, - struct sha512_ctx *ctx); -sparc_libc_ifunc (__sha512_process_block, - cpu_supports_sha512(hwcap) ? __sha512_process_block_crop - : __sha512_process_block_generic); diff --git a/sysdeps/sparc/sparc64/multiarch/sha512-crop.S b/sysdeps/sparc/sparc64/multiarch/sha512-crop.S deleted file mode 100644 index f78354c485..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/sha512-crop.S +++ /dev/null @@ -1,131 +0,0 @@ -/* SHA512 using sparc crypto opcodes. - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@davemloft.net) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define SHA512 \ - .word 0x81b02860; - - .text - .align 32 -ENTRY(__sha512_process_block_crop) - /* %o0=buffer, %o1=len, %o2=CTX */ - ldx [%o2 + 0x48], %g1 - add %g1, %o1, %o4 - stx %o4, [%o2 + 0x48] - cmp %o4, %g1 - bgeu,pt %xcc, 1f - nop - ldx [%o2 + 0x40], %g1 - add %g1, 1, %g1 - stx %g1, [%o2 + 0x40] - -1: ldd [%o2 + 0x00], %f0 - ldd [%o2 + 0x08], %f2 - ldd [%o2 + 0x10], %f4 - ldd [%o2 + 0x18], %f6 - ldd [%o2 + 0x20], %f8 - ldd [%o2 + 0x28], %f10 - andcc %o1, 0x7, %g0 - ldd [%o2 + 0x30], %f12 - bne,pn %xcc, 10f - ldd [%o2 + 0x38], %f14 - -1: - ldd [%o0 + 0x00], %f16 - ldd [%o0 + 0x08], %f18 - ldd [%o0 + 0x10], %f20 - ldd [%o0 + 0x18], %f22 - ldd [%o0 + 0x20], %f24 - ldd [%o0 + 0x28], %f26 - ldd [%o0 + 0x30], %f28 - ldd [%o0 + 0x38], %f30 - ldd [%o0 + 0x40], %f32 - ldd [%o0 + 0x48], %f34 - ldd [%o0 + 0x50], %f36 - ldd [%o0 + 0x58], %f38 - ldd [%o0 + 0x60], %f40 - ldd [%o0 + 0x68], %f42 - ldd [%o0 + 0x70], %f44 - ldd [%o0 + 0x78], %f46 - - SHA512 - - subcc %o1, 0x80, %o1 - bne,pt %xcc, 1b - add %o0, 0x80, %o0 - -5: - std %f0, [%o2 + 0x00] - std %f2, [%o2 + 0x08] - std %f4, [%o2 + 0x10] - std %f6, [%o2 + 0x18] - std %f8, [%o2 + 0x20] - std %f10, [%o2 + 0x28] - std %f12, [%o2 + 0x30] - retl - std %f14, [%o2 + 0x38] -10: - alignaddr %o0, %g0, %o0 - - ldd [%o0 + 0x00], %f18 -1: - ldd [%o0 + 0x08], %f20 - ldd [%o0 + 0x10], %f22 - ldd [%o0 + 0x18], %f24 - ldd [%o0 + 0x20], %f26 - ldd [%o0 + 0x28], %f28 - ldd [%o0 + 0x30], %f30 - ldd [%o0 + 0x38], %f32 - ldd [%o0 + 0x40], %f34 - ldd [%o0 + 0x48], %f36 - ldd [%o0 + 0x50], %f38 - ldd [%o0 + 0x58], %f40 - ldd [%o0 + 0x60], %f42 - ldd [%o0 + 0x68], %f44 - ldd [%o0 + 0x70], %f46 - ldd [%o0 + 0x78], %f48 - ldd [%o0 + 0x80], %f50 - - faligndata %f18, %f20, %f16 - faligndata %f20, %f22, %f18 - faligndata %f22, %f24, %f20 - faligndata %f24, %f26, %f22 - faligndata %f26, %f28, %f24 - faligndata %f28, %f30, %f26 - faligndata %f30, %f32, %f28 - faligndata %f32, %f34, %f30 - faligndata %f34, %f36, %f32 - faligndata %f36, %f38, %f34 - faligndata %f38, %f40, %f36 - faligndata %f40, %f42, %f38 - faligndata %f42, %f44, %f40 - faligndata %f44, %f46, %f42 - faligndata %f46, %f48, %f44 - faligndata %f48, %f50, %f46 - - SHA512 - - subcc %o1, 0x80, %o1 - fsrc2 %f50, %f18 - bne,pt %xcc, 1b - add %o0, 0x80, %o0 - - ba,a,pt %xcc, 5b -END(__sha512_process_block_crop) diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S b/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S deleted file mode 100644 index 2d2a75dff8..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S +++ /dev/null @@ -1,71 +0,0 @@ -! SPARC v9 64-bit VIS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 -! and store difference in a third limb vector. -! -! Copyright (C) 2013-2017 Free Software Foundation, Inc. -! This file is part of the GNU C Library. -! Contributed by David S. Miller <davem@davemloft.net> -! -! The GNU C Library is free software; you can redistribute it and/or -! modify it under the terms of the GNU Lesser General Public -! License as published by the Free Software Foundation; either -! version 2.1 of the License, or (at your option) any later version. -! -! The GNU C Library is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -! Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public -! License along with the GNU C Library; if not, see -! <http://www.gnu.org/licenses/>. - -#include <sysdep.h> - -#define res_ptr %o0 -#define s1_ptr %o1 -#define s2_ptr %o2 -#define sz %o3 -#define tmp1 %g1 -#define tmp2 %g2 -#define tmp3 %g3 -#define tmp4 %o4 - - .register %g2,#scratch - .register %g3,#scratch -ENTRY(__mpn_sub_n_vis3) - subcc sz, 1, sz - be .Lfinal_limb - cmp %g0, 1 - -.Lloop: - ldx [s2_ptr + 0x00], tmp1 - add s2_ptr, 0x10, s2_ptr - ldx [s1_ptr + 0x00], tmp2 - add s1_ptr, 0x10, s1_ptr - ldx [s2_ptr - 0x08], tmp3 - add res_ptr, 0x10, res_ptr - ldx [s1_ptr - 0x08], tmp4 - sub sz, 2, sz - xnor tmp1, %g0, tmp1 - addxccc tmp1, tmp2, tmp1 - stx tmp1, [res_ptr - 0x10] - xnor tmp3, %g0, tmp3 - addxccc tmp3, tmp4, tmp3 - brgz sz, .Lloop - stx tmp3, [res_ptr - 0x08] - - brlz,pt sz, .Lfinish - nop - -.Lfinal_limb: - ldx [s2_ptr + 0x00], tmp1 - ldx [s1_ptr + 0x00], tmp2 - xnor tmp1, %g0, tmp1 - addxccc tmp1, tmp2, tmp1 - stx tmp1, [res_ptr + 0x00] - -.Lfinish: - clr %o0 - retl - movcc %xcc, 1, %o0 -END(__mpn_sub_n_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n.S b/sysdeps/sparc/sparc64/multiarch/sub_n.S deleted file mode 100644 index d20a286df1..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/sub_n.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of sub_n - - Copyright (C) 2013-2017 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_sub_n) - .type __mpn_sub_n, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_sub_n_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_sub_n_vis3), %o1 -# else - set __mpn_sub_n_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_sub_n_generic), %o1 - xor %o1, %gdop_lox10(__mpn_sub_n_generic), %o1 -# else - set __mpn_sub_n_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_sub_n) - -#define __mpn_sub_n __mpn_sub_n_generic -#include "../sub_n.S" diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S deleted file mode 100644 index 99644491e7..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S +++ /dev/null @@ -1,87 +0,0 @@ -! SPARC v9 64-bit VIS3 __mpn_submul_1 -- Multiply a limb vector with a -! limb and subtract the result from a second limb vector. -! -! Copyright (C) 2013-2017 Free Software Foundation, Inc. -! This file is part of the GNU C Library. -! Contributed by David S. Miller <davem@davemloft.net> -! -! The GNU C Library is free software; you can redistribute it and/or -! modify it under the terms of the GNU Lesser General Public -! License as published by the Free Software Foundation; either -! version 2.1 of the License, or (at your option) any later version. -! -! The GNU C Library is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -! Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public -! License along with the GNU C Library; if not, see -! <http://www.gnu.org/licenses/>. - -#include <sysdep.h> - -#define res_ptr %i0 -#define s1_ptr %i1 -#define sz %i2 -#define s2_limb %i3 -#define carry %o5 -#define tmp1 %g1 -#define tmp2 %g2 -#define tmp3 %g3 -#define tmp4 %o4 -#define tmp5 %l0 -#define tmp6 %l1 -#define tmp7 %l2 -#define tmp8 %l3 - - .register %g2,#scratch - .register %g3,#scratch -ENTRY(__mpn_submul_1_vis3) - save %sp, -176, %sp - subcc sz, 1, sz - be .Lfinal_limb - clr carry - -.Lloop: - ldx [s1_ptr + 0x00], tmp1 - ldx [res_ptr + 0x00], tmp3 - ldx [s1_ptr + 0x08], tmp2 - ldx [res_ptr + 0x08], tmp4 - mulx tmp1, s2_limb, tmp5 - add s1_ptr, 0x10, s1_ptr - umulxhi tmp1, s2_limb, tmp6 - add res_ptr, 0x10, res_ptr - mulx tmp2, s2_limb, tmp7 - sub sz, 2, sz - umulxhi tmp2, s2_limb, tmp8 - addcc carry, tmp5, tmp5 - addxc %g0, tmp6, carry - subcc tmp3, tmp5, tmp5 - addxc %g0, carry, carry - stx tmp5, [res_ptr - 0x10] - addcc carry, tmp7, tmp7 - addxc %g0, tmp8, carry - subcc tmp4, tmp7, tmp7 - addxc %g0, carry, carry - brgz sz, .Lloop - stx tmp7, [res_ptr - 0x08] - - brlz,pt sz, .Lfinish - nop - -.Lfinal_limb: - ldx [s1_ptr + 0x00], tmp1 - ldx [res_ptr + 0x00], tmp3 - mulx tmp1, s2_limb, tmp5 - umulxhi tmp1, s2_limb, tmp6 - addcc carry, tmp5, tmp5 - addxc %g0, tmp6, carry - subcc tmp3, tmp5, tmp5 - addxc %g0, carry, carry - stx tmp5, [res_ptr + 0x00] - -.Lfinish: - jmpl %i7 + 8, %g0 - restore carry, 0, %o0 -END(__mpn_submul_1_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1.S b/sysdeps/sparc/sparc64/multiarch/submul_1.S deleted file mode 100644 index 3c297d989b..0000000000 --- a/sysdeps/sparc/sparc64/multiarch/submul_1.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of submul_1 - - Copyright (C) 2013-2017 Free Software Foundation, Inc. - Contributed by David S. Miller (davem@davemloft.net) - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -ENTRY(__mpn_submul_1) - .type __mpn_submul_1, @gnu_indirect_function -# ifdef SHARED - SETUP_PIC_REG_LEAF(o3, o5) -# endif - set HWCAP_SPARC_VIS3, %o1 - andcc %o0, %o1, %g0 - be 1f - nop -# ifdef SHARED - sethi %gdop_hix22(__mpn_submul_1_vis3), %o1 - xor %o1, %gdop_lox10(__mpn_submul_1_vis3), %o1 -# else - set __mpn_submul_1_vis3, %o1 -# endif - ba 10f - nop -1: -# ifdef SHARED - sethi %gdop_hix22(__mpn_submul_1_generic), %o1 - xor %o1, %gdop_lox10(__mpn_submul_1_generic), %o1 -# else - set __mpn_submul_1_generic, %o1 -# endif -10: -# ifdef SHARED - add %o3, %o1, %o1 -# endif - retl - mov %o1, %o0 -END(__mpn_submul_1) - -#define __mpn_submul_1 __mpn_submul_1_generic -#include "../submul_1.S" |