diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | Makeconfig | 25 | ||||
-rw-r--r-- | Makerules | 4 | ||||
-rw-r--r-- | elf/ifuncmain1.c | 13 | ||||
-rw-r--r-- | elf/ifuncmain5.c | 9 | ||||
-rw-r--r-- | elf/rtld-Rules | 2 | ||||
-rw-r--r-- | inet/Makefile | 4 | ||||
-rw-r--r-- | stdlib/isomac.c | 1 | ||||
-rw-r--r-- | sysdeps/aarch64/memcpy.S | 187 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/Makefile | 2 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/ifunc-impl-list.c | 2 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/memcpy.c | 6 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/memcpy_advsimd.S | 248 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/memmove.c | 6 | ||||
-rw-r--r-- | sysdeps/gnu/Makefile | 3 |
15 files changed, 139 insertions, 377 deletions
diff --git a/ChangeLog b/ChangeLog index f9157d179b..95b7e2a59e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2019-02-03 Aurelien Jarno <aurelien@aurel32.net> + + * stdlib/isomac.c: Include <unistd.h>. + 2018-11-07 Florian Weimer <fweimer@redhat.com> Implement TEST_COMPARE_STRING. diff --git a/Makeconfig b/Makeconfig index f5e81bdf5d..3b8a199347 100644 --- a/Makeconfig +++ b/Makeconfig @@ -42,6 +42,22 @@ else objdir must be defined by the build-directory Makefile. endif +# Did we request 'make -s' run? "yes" or "no". +# Starting from make-4.4 MAKEFLAGS now contains long +# options like '--shuffle'. To detect presence of 's' +# we pick first word with short options. Long options +# are guaranteed to come after whitespace. We use '-' +# prefix to always have a word before long options +# even if no short options were passed. +# Typical MAKEFLAGS values to watch for: +# "rs --shuffle=42" (silent) +# " --shuffle" (not silent) +ifeq ($(findstring s, $(firstword -$(MAKEFLAGS))),) +silent-make := no +else +silent-make := yes +endif + # Root of the sysdeps tree. sysdep_dir := $(..)sysdeps export sysdep_dir := $(sysdep_dir) @@ -546,8 +562,13 @@ ifeq (yes,$(build-shared)) link-libc-rpath = -Wl,-rpath=$(rpath-link) link-libc-rpath-link = -Wl,-rpath-link=$(rpath-link) +# Tests use -Wl,-rpath instead of -Wl,-rpath-link for +# build-hardcoded-path-in-tests. Add -Wl,--disable-new-dtags to force +# DT_RPATH instead of DT_RUNPATH which only applies to DT_NEEDED entries +# in the executable and doesn't applies to DT_NEEDED entries in shared +# libraries which are loaded via DT_NEEDED entries in the executable. ifeq (yes,$(build-hardcoded-path-in-tests)) -link-libc-tests-rpath-link = $(link-libc-rpath) +link-libc-tests-rpath-link = $(link-libc-rpath) -Wl,--disable-new-dtags else link-libc-tests-rpath-link = $(link-libc-rpath-link) endif # build-hardcoded-path-in-tests @@ -880,7 +901,7 @@ endif # umpteen zillion filenames along with it (we use `...' instead) # but we don't want this echoing done when the user has said # he doesn't want to see commands echoed by using -s. -ifneq "$(findstring s,$(MAKEFLAGS))" "" # if -s +ifeq ($(silent-make),yes) # if -s +cmdecho := echo >/dev/null else # not -s +cmdecho := echo diff --git a/Makerules b/Makerules index a10a0b4d70..005b01ff40 100644 --- a/Makerules +++ b/Makerules @@ -875,7 +875,7 @@ endif # Maximize efficiency by minimizing the number of rules. .SUFFIXES: # Clear the suffix list. We don't use suffix rules. # Don't define any builtin rules. -MAKEFLAGS := $(MAKEFLAGS)r +MAKEFLAGS := $(MAKEFLAGS) -r # Generic rule for making directories. %/: @@ -892,7 +892,7 @@ MAKEFLAGS := $(MAKEFLAGS)r .PRECIOUS: $(foreach l,$(libtypes),$(patsubst %,$(common-objpfx)$l,c)) # Use the verbose option of ar and tar when not running silently. -ifeq "$(findstring s,$(MAKEFLAGS))" "" # if not -s +ifeq ($(silent-make),no) # if not -s verbose := v else # -s verbose := diff --git a/elf/ifuncmain1.c b/elf/ifuncmain1.c index 747fc02648..6effce3d77 100644 --- a/elf/ifuncmain1.c +++ b/elf/ifuncmain1.c @@ -19,7 +19,14 @@ typedef int (*foo_p) (void); #endif foo_p foo_ptr = foo; + +/* Address-significant access to protected symbols is not supported in + position-dependent mode on several architectures because GCC + generates relocations that assume that the address is local to the + main program. */ +#ifdef __PIE__ foo_p foo_procted_ptr = foo_protected; +#endif extern foo_p get_foo_p (void); extern foo_p get_foo_hidden_p (void); @@ -37,12 +44,16 @@ main (void) if ((*foo_ptr) () != -1) abort (); +#ifdef __PIE__ if (foo_procted_ptr != foo_protected) abort (); +#endif if (foo_protected () != 0) abort (); +#ifdef __PIE__ if ((*foo_procted_ptr) () != 0) abort (); +#endif p = get_foo_p (); if (p != foo) @@ -55,8 +66,10 @@ main (void) abort (); p = get_foo_protected_p (); +#ifdef __PIE__ if (p != foo_protected) abort (); +#endif if (ret_foo_protected != 0 || (*p) () != ret_foo_protected) abort (); diff --git a/elf/ifuncmain5.c b/elf/ifuncmain5.c index f398085cb4..6fda768fb6 100644 --- a/elf/ifuncmain5.c +++ b/elf/ifuncmain5.c @@ -14,12 +14,19 @@ get_foo (void) return foo; } + +/* Address-significant access to protected symbols is not supported in + position-dependent mode on several architectures because GCC + generates relocations that assume that the address is local to the + main program. */ +#ifdef __PIE__ foo_p __attribute__ ((noinline)) get_foo_protected (void) { return foo_protected; } +#endif int main (void) @@ -30,9 +37,11 @@ main (void) if ((*p) () != -1) abort (); +#ifdef __PIE__ p = get_foo_protected (); if ((*p) () != 0) abort (); +#endif return 0; } diff --git a/elf/rtld-Rules b/elf/rtld-Rules index 6fd9494ba3..c7a7d882e6 100644 --- a/elf/rtld-Rules +++ b/elf/rtld-Rules @@ -52,7 +52,7 @@ $(objpfx)rtld-libc.a: $(foreach dir,$(rtld-subdirs),\ mv -f $@T $@ # Use the verbose option of ar and tar when not running silently. -ifeq "$(findstring s,$(MAKEFLAGS))" "" # if not -s +ifeq ($(silent-make),no) # if not -s verbose := v else # -s verbose := diff --git a/inet/Makefile b/inet/Makefile index 7782913b4c..62d25f8535 100644 --- a/inet/Makefile +++ b/inet/Makefile @@ -112,4 +112,8 @@ ifeq ($(build-static-nss),yes) CFLAGS += -DSTATIC_NSS endif +# The test uses dlopen indirectly and would otherwise load system +# objects. +tst-idna_name_classify-ENV = \ + LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(common-objpfx)elf $(objpfx)tst-idna_name_classify.out: $(gen-locales) diff --git a/stdlib/isomac.c b/stdlib/isomac.c index 0873eaa505..439b000734 100644 --- a/stdlib/isomac.c +++ b/stdlib/isomac.c @@ -74,6 +74,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> #define HEADER_MAX 256 diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S index 919b28d7e1..36353cab21 100644 --- a/sysdeps/aarch64/memcpy.S +++ b/sysdeps/aarch64/memcpy.S @@ -1,4 +1,5 @@ -/* Copyright (C) 2012-2018 Free Software Foundation, Inc. +/* Generic optimized memcpy using SIMD. + Copyright (C) 2012-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -20,7 +21,7 @@ /* Assumptions: * - * ARMv8-a, AArch64, unaligned accesses. + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. * */ @@ -36,21 +37,18 @@ #define B_l x8 #define B_lw w8 #define B_h x9 -#define C_l x10 #define C_lw w10 -#define C_h x11 -#define D_l x12 -#define D_h x13 -#define E_l x14 -#define E_h x15 -#define F_l x16 -#define F_h x17 -#define G_l count -#define G_h dst -#define H_l src -#define H_h srcend #define tmp1 x14 +#define A_q q0 +#define B_q q1 +#define C_q q2 +#define D_q q3 +#define E_q q4 +#define F_q q5 +#define G_q q6 +#define H_q q7 + #ifndef MEMMOVE # define MEMMOVE memmove #endif @@ -69,8 +67,7 @@ Large copies use a software pipelined loop processing 64 bytes per iteration. The destination pointer is 16-byte aligned to minimize unaligned accesses. The loop tail is handled by always copying 64 bytes - from the end. -*/ + from the end. */ ENTRY_ALIGN (MEMCPY, 6) DELOUSE (0) @@ -87,10 +84,10 @@ ENTRY_ALIGN (MEMCPY, 6) /* Small copies: 0..32 bytes. */ cmp count, 16 b.lo L(copy16) - ldp A_l, A_h, [src] - ldp D_l, D_h, [srcend, -16] - stp A_l, A_h, [dstin] - stp D_l, D_h, [dstend, -16] + ldr A_q, [src] + ldr B_q, [srcend, -16] + str A_q, [dstin] + str B_q, [dstend, -16] ret /* Copy 8-15 bytes. */ @@ -102,7 +99,6 @@ L(copy16): str A_h, [dstend, -8] ret - .p2align 3 /* Copy 4-7 bytes. */ L(copy8): tbz count, 2, L(copy4) @@ -128,81 +124,62 @@ L(copy0): .p2align 4 /* Medium copies: 33..128 bytes. */ L(copy32_128): - ldp A_l, A_h, [src] - ldp B_l, B_h, [src, 16] - ldp C_l, C_h, [srcend, -32] - ldp D_l, D_h, [srcend, -16] + ldp A_q, B_q, [src] + ldp C_q, D_q, [srcend, -32] cmp count, 64 b.hi L(copy128) - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstend, -32] - stp D_l, D_h, [dstend, -16] + stp A_q, B_q, [dstin] + stp C_q, D_q, [dstend, -32] ret .p2align 4 /* Copy 65..128 bytes. */ L(copy128): - ldp E_l, E_h, [src, 32] - ldp F_l, F_h, [src, 48] + ldp E_q, F_q, [src, 32] cmp count, 96 b.ls L(copy96) - ldp G_l, G_h, [srcend, -64] - ldp H_l, H_h, [srcend, -48] - stp G_l, G_h, [dstend, -64] - stp H_l, H_h, [dstend, -48] + ldp G_q, H_q, [srcend, -64] + stp G_q, H_q, [dstend, -64] L(copy96): - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstin, 16] - stp E_l, E_h, [dstin, 32] - stp F_l, F_h, [dstin, 48] - stp C_l, C_h, [dstend, -32] - stp D_l, D_h, [dstend, -16] + stp A_q, B_q, [dstin] + stp E_q, F_q, [dstin, 32] + stp C_q, D_q, [dstend, -32] ret - .p2align 4 + /* Align loop64 below to 16 bytes. */ + nop + /* Copy more than 128 bytes. */ L(copy_long): - /* Copy 16 bytes and then align dst to 16-byte alignment. */ - ldp D_l, D_h, [src] - and tmp1, dstin, 15 - bic dst, dstin, 15 - sub src, src, tmp1 + /* Copy 16 bytes and then align src to 16-byte alignment. */ + ldr D_q, [src] + and tmp1, src, 15 + bic src, src, 15 + sub dst, dstin, tmp1 add count, count, tmp1 /* Count is now 16 too large. */ - ldp A_l, A_h, [src, 16] - stp D_l, D_h, [dstin] - ldp B_l, B_h, [src, 32] - ldp C_l, C_h, [src, 48] - ldp D_l, D_h, [src, 64]! + ldp A_q, B_q, [src, 16] + str D_q, [dstin] + ldp C_q, D_q, [src, 48] subs count, count, 128 + 16 /* Test and readjust count. */ b.ls L(copy64_from_end) - L(loop64): - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [src, 16] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [src, 32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [src, 48] - stp D_l, D_h, [dst, 64]! - ldp D_l, D_h, [src, 64]! + stp A_q, B_q, [dst, 16] + ldp A_q, B_q, [src, 80] + stp C_q, D_q, [dst, 48] + ldp C_q, D_q, [src, 112] + add src, src, 64 + add dst, dst, 64 subs count, count, 64 b.hi L(loop64) /* Write the last iteration and copy 64 bytes from the end. */ L(copy64_from_end): - ldp E_l, E_h, [srcend, -64] - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [srcend, -48] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [srcend, -16] - stp D_l, D_h, [dst, 64] - stp E_l, E_h, [dstend, -64] - stp A_l, A_h, [dstend, -48] - stp B_l, B_h, [dstend, -32] - stp C_l, C_h, [dstend, -16] + ldp E_q, F_q, [srcend, -64] + stp A_q, B_q, [dst, 16] + ldp A_q, B_q, [srcend, -32] + stp C_q, D_q, [dst, 48] + stp E_q, F_q, [dstend, -64] + stp A_q, B_q, [dstend, -32] ret END (MEMCPY) @@ -220,64 +197,56 @@ ENTRY_ALIGN (MEMMOVE, 4) cmp count, 32 b.hi L(copy32_128) - /* Small copies: 0..32 bytes. */ + /* Small moves: 0..32 bytes. */ cmp count, 16 b.lo L(copy16) - ldp A_l, A_h, [src] - ldp D_l, D_h, [srcend, -16] - stp A_l, A_h, [dstin] - stp D_l, D_h, [dstend, -16] + ldr A_q, [src] + ldr B_q, [srcend, -16] + str A_q, [dstin] + str B_q, [dstend, -16] ret - .p2align 4 L(move_long): /* Only use backward copy if there is an overlap. */ sub tmp1, dstin, src - cbz tmp1, L(copy0) + cbz tmp1, L(move0) cmp tmp1, count b.hs L(copy_long) /* Large backwards copy for overlapping copies. - Copy 16 bytes and then align dst to 16-byte alignment. */ - ldp D_l, D_h, [srcend, -16] - and tmp1, dstend, 15 - sub srcend, srcend, tmp1 + Copy 16 bytes and then align srcend to 16-byte alignment. */ +L(copy_long_backwards): + ldr D_q, [srcend, -16] + and tmp1, srcend, 15 + bic srcend, srcend, 15 sub count, count, tmp1 - ldp A_l, A_h, [srcend, -16] - stp D_l, D_h, [dstend, -16] - ldp B_l, B_h, [srcend, -32] - ldp C_l, C_h, [srcend, -48] - ldp D_l, D_h, [srcend, -64]! + ldp A_q, B_q, [srcend, -32] + str D_q, [dstend, -16] + ldp C_q, D_q, [srcend, -64] sub dstend, dstend, tmp1 subs count, count, 128 b.ls L(copy64_from_start) L(loop64_backwards): - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [srcend, -16] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [srcend, -48] - stp D_l, D_h, [dstend, -64]! - ldp D_l, D_h, [srcend, -64]! + str B_q, [dstend, -16] + str A_q, [dstend, -32] + ldp A_q, B_q, [srcend, -96] + str D_q, [dstend, -48] + str C_q, [dstend, -64]! + ldp C_q, D_q, [srcend, -128] + sub srcend, srcend, 64 subs count, count, 64 b.hi L(loop64_backwards) /* Write the last iteration and copy 64 bytes from the start. */ L(copy64_from_start): - ldp G_l, G_h, [src, 48] - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [src, 32] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [src, 16] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [src] - stp D_l, D_h, [dstend, -64] - stp G_l, G_h, [dstin, 48] - stp A_l, A_h, [dstin, 32] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstin] + ldp E_q, F_q, [src, 32] + stp A_q, B_q, [dstend, -32] + ldp A_q, B_q, [src] + stp C_q, D_q, [dstend, -64] + stp E_q, F_q, [dstin, 32] + stp A_q, B_q, [dstin] +L(move0): ret END (MEMMOVE) diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index 134fe463fd..57ffdf7238 100644 --- a/sysdeps/aarch64/multiarch/Makefile +++ b/sysdeps/aarch64/multiarch/Makefile @@ -1,4 +1,4 @@ ifeq ($(subdir),string) -sysdep_routines += memcpy_generic memcpy_advsimd memcpy_thunderx memcpy_thunderx2 \ +sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \ memcpy_falkor memmove_falkor memset_generic memset_falkor endif diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index 0ccd14118c..e55be80103 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -42,12 +42,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor) - IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_simd) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic)) IFUNC_IMPL (i, name, memmove, IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor) - IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_simd) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) IFUNC_IMPL (i, name, memset, /* Enable this on non-falkor processors too so that other cores diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c index e69a1ae5af..4a04a63b0f 100644 --- a/sysdeps/aarch64/multiarch/memcpy.c +++ b/sysdeps/aarch64/multiarch/memcpy.c @@ -29,7 +29,6 @@ extern __typeof (__redirect_memcpy) __libc_memcpy; extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden; -extern __typeof (__redirect_memcpy) __memcpy_simd attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; @@ -41,10 +40,7 @@ libc_ifunc (__libc_memcpy, ? __memcpy_falkor : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) ? __memcpy_thunderx2 - : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr) - || IS_NEOVERSE_V1 (midr) - ? __memcpy_simd - : __memcpy_generic))))); + : __memcpy_generic)))); # undef memcpy strong_alias (__libc_memcpy, memcpy); diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S deleted file mode 100644 index 48bb6d7ca4..0000000000 --- a/sysdeps/aarch64/multiarch/memcpy_advsimd.S +++ /dev/null @@ -1,248 +0,0 @@ -/* Generic optimized memcpy using SIMD. - Copyright (C) 2020 Free Software Foundation, Inc. - - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - <https://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -/* Assumptions: - * - * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. - * - */ - -#define dstin x0 -#define src x1 -#define count x2 -#define dst x3 -#define srcend x4 -#define dstend x5 -#define A_l x6 -#define A_lw w6 -#define A_h x7 -#define B_l x8 -#define B_lw w8 -#define B_h x9 -#define C_lw w10 -#define tmp1 x14 - -#define A_q q0 -#define B_q q1 -#define C_q q2 -#define D_q q3 -#define E_q q4 -#define F_q q5 -#define G_q q6 -#define H_q q7 - - -/* This implementation supports both memcpy and memmove and shares most code. - It uses unaligned accesses and branchless sequences to keep the code small, - simple and improve performance. - - Copies are split into 3 main cases: small copies of up to 32 bytes, medium - copies of up to 128 bytes, and large copies. The overhead of the overlap - check in memmove is negligible since it is only required for large copies. - - Large copies use a software pipelined loop processing 64 bytes per - iteration. The destination pointer is 16-byte aligned to minimize - unaligned accesses. The loop tail is handled by always copying 64 bytes - from the end. */ - -ENTRY (__memcpy_simd) - DELOUSE (0) - DELOUSE (1) - DELOUSE (2) - - add srcend, src, count - add dstend, dstin, count - cmp count, 128 - b.hi L(copy_long) - cmp count, 32 - b.hi L(copy32_128) - - /* Small copies: 0..32 bytes. */ - cmp count, 16 - b.lo L(copy16) - ldr A_q, [src] - ldr B_q, [srcend, -16] - str A_q, [dstin] - str B_q, [dstend, -16] - ret - - /* Copy 8-15 bytes. */ -L(copy16): - tbz count, 3, L(copy8) - ldr A_l, [src] - ldr A_h, [srcend, -8] - str A_l, [dstin] - str A_h, [dstend, -8] - ret - - /* Copy 4-7 bytes. */ -L(copy8): - tbz count, 2, L(copy4) - ldr A_lw, [src] - ldr B_lw, [srcend, -4] - str A_lw, [dstin] - str B_lw, [dstend, -4] - ret - - /* Copy 0..3 bytes using a branchless sequence. */ -L(copy4): - cbz count, L(copy0) - lsr tmp1, count, 1 - ldrb A_lw, [src] - ldrb C_lw, [srcend, -1] - ldrb B_lw, [src, tmp1] - strb A_lw, [dstin] - strb B_lw, [dstin, tmp1] - strb C_lw, [dstend, -1] -L(copy0): - ret - - .p2align 4 - /* Medium copies: 33..128 bytes. */ -L(copy32_128): - ldp A_q, B_q, [src] - ldp C_q, D_q, [srcend, -32] - cmp count, 64 - b.hi L(copy128) - stp A_q, B_q, [dstin] - stp C_q, D_q, [dstend, -32] - ret - - .p2align 4 - /* Copy 65..128 bytes. */ -L(copy128): - ldp E_q, F_q, [src, 32] - cmp count, 96 - b.ls L(copy96) - ldp G_q, H_q, [srcend, -64] - stp G_q, H_q, [dstend, -64] -L(copy96): - stp A_q, B_q, [dstin] - stp E_q, F_q, [dstin, 32] - stp C_q, D_q, [dstend, -32] - ret - - /* Align loop64 below to 16 bytes. */ - nop - - /* Copy more than 128 bytes. */ -L(copy_long): - /* Copy 16 bytes and then align src to 16-byte alignment. */ - ldr D_q, [src] - and tmp1, src, 15 - bic src, src, 15 - sub dst, dstin, tmp1 - add count, count, tmp1 /* Count is now 16 too large. */ - ldp A_q, B_q, [src, 16] - str D_q, [dstin] - ldp C_q, D_q, [src, 48] - subs count, count, 128 + 16 /* Test and readjust count. */ - b.ls L(copy64_from_end) -L(loop64): - stp A_q, B_q, [dst, 16] - ldp A_q, B_q, [src, 80] - stp C_q, D_q, [dst, 48] - ldp C_q, D_q, [src, 112] - add src, src, 64 - add dst, dst, 64 - subs count, count, 64 - b.hi L(loop64) - - /* Write the last iteration and copy 64 bytes from the end. */ -L(copy64_from_end): - ldp E_q, F_q, [srcend, -64] - stp A_q, B_q, [dst, 16] - ldp A_q, B_q, [srcend, -32] - stp C_q, D_q, [dst, 48] - stp E_q, F_q, [dstend, -64] - stp A_q, B_q, [dstend, -32] - ret - -END (__memcpy_simd) -libc_hidden_builtin_def (__memcpy_simd) - - -ENTRY (__memmove_simd) - DELOUSE (0) - DELOUSE (1) - DELOUSE (2) - - add srcend, src, count - add dstend, dstin, count - cmp count, 128 - b.hi L(move_long) - cmp count, 32 - b.hi L(copy32_128) - - /* Small moves: 0..32 bytes. */ - cmp count, 16 - b.lo L(copy16) - ldr A_q, [src] - ldr B_q, [srcend, -16] - str A_q, [dstin] - str B_q, [dstend, -16] - ret - -L(move_long): - /* Only use backward copy if there is an overlap. */ - sub tmp1, dstin, src - cbz tmp1, L(move0) - cmp tmp1, count - b.hs L(copy_long) - - /* Large backwards copy for overlapping copies. - Copy 16 bytes and then align srcend to 16-byte alignment. */ -L(copy_long_backwards): - ldr D_q, [srcend, -16] - and tmp1, srcend, 15 - bic srcend, srcend, 15 - sub count, count, tmp1 - ldp A_q, B_q, [srcend, -32] - str D_q, [dstend, -16] - ldp C_q, D_q, [srcend, -64] - sub dstend, dstend, tmp1 - subs count, count, 128 - b.ls L(copy64_from_start) - -L(loop64_backwards): - str B_q, [dstend, -16] - str A_q, [dstend, -32] - ldp A_q, B_q, [srcend, -96] - str D_q, [dstend, -48] - str C_q, [dstend, -64]! - ldp C_q, D_q, [srcend, -128] - sub srcend, srcend, 64 - subs count, count, 64 - b.hi L(loop64_backwards) - - /* Write the last iteration and copy 64 bytes from the start. */ -L(copy64_from_start): - ldp E_q, F_q, [src, 32] - stp A_q, B_q, [dstend, -32] - ldp A_q, B_q, [src] - stp C_q, D_q, [dstend, -64] - stp E_q, F_q, [dstin, 32] - stp A_q, B_q, [dstin] -L(move0): - ret - -END (__memmove_simd) -libc_hidden_builtin_def (__memmove_simd) diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c index b426dad834..e69d816291 100644 --- a/sysdeps/aarch64/multiarch/memmove.c +++ b/sysdeps/aarch64/multiarch/memmove.c @@ -29,7 +29,6 @@ extern __typeof (__redirect_memmove) __libc_memmove; extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden; -extern __typeof (__redirect_memmove) __memmove_simd attribute_hidden; extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden; extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden; @@ -38,10 +37,7 @@ libc_ifunc (__libc_memmove, ? __memmove_thunderx : (IS_FALKOR (midr) || IS_PHECDA (midr) ? __memmove_falkor - : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr) - || IS_NEOVERSE_V1 (midr) - ? __memmove_simd - : __memmove_generic)))); + : __memmove_generic))); # undef memmove strong_alias (__libc_memmove, memmove); diff --git a/sysdeps/gnu/Makefile b/sysdeps/gnu/Makefile index ae0c3fc0aa..ccb5ed8bea 100644 --- a/sysdeps/gnu/Makefile +++ b/sysdeps/gnu/Makefile @@ -54,8 +54,7 @@ $(objpfx)errlist-compat.h: $(objpfx)errlist-compat.c generated += errlist-compat.c errlist-compat.h # This will force the generation above to happy if need be. -$(foreach o,$(object-suffixes) $(object-suffixes:=.d),\ - $(objpfx)errlist$o): $(objpfx)errlist-compat.h +$(foreach o,$(object-suffixes),$(objpfx)errlist$o): $(objpfx)errlist-compat.h endif ifeq ($(subdir),login) |