diff options
37 files changed, 1938 insertions, 132 deletions
diff --git a/INSTALL b/INSTALL index 208ec98d4b..4bd3d53676 100644 --- a/INSTALL +++ b/INSTALL @@ -224,6 +224,23 @@ if 'CFLAGS' is specified it must enable optimization. For example: By default for x86_64, the GNU C Library is built with the vector math library. Use this option to disable the vector math library. +'--disable-static-c++-tests' + By default, if the C++ toolchain lacks support for static linking, + configure fails to find the C++ header files and the glibc build + fails. '--disable-static-c++-link-check' allows the glibc build to + finish, but static C++ tests will fail if the C++ toolchain doesn't + have the necessary static C++ libraries. Use this option to skip + the static C++ tests. This option implies + '--disable-static-c++-link-check'. + +'--disable-static-c++-link-check' + By default, if the C++ toolchain lacks support for static linking, + configure fails to find the C++ header files and the glibc build + fails. Use this option to disable the static C++ link check so + that the C++ header files can be located. The newly built libc.a + can be used to create static C++ tests if the C++ toolchain has the + necessary static C++ libraries. + '--disable-scv' Disable using 'scv' instruction for syscalls. All syscalls will use 'sc' instead, even if the kernel supports 'scv'. PowerPC only. diff --git a/catgets/Makefile b/catgets/Makefile index 24b4560d5f..40c65eac95 100644 --- a/catgets/Makefile +++ b/catgets/Makefile @@ -43,8 +43,12 @@ tests-special += \ $(objpfx)test-gencat.out \ $(objpfx)test1.cat \ $(objpfx)test2.cat \ - $(objpfx)tst-catgets-mem.out # tests-special +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) +tests-special += $(objpfx)tst-catgets-mem.out +endif +endif endif gencat-modules = xmalloc @@ -68,9 +72,17 @@ generated += \ test1.h \ test2.cat \ test2.h \ + # generated +ifeq ($(run-built-tests),yes) +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) +generated += \ tst-catgets-mem.out \ tst-catgets.mtrace \ # generated +endif +endif +endif generated-dirs += \ de \ diff --git a/configure b/configure index 1df2f2e6d1..1bae55b45b 100755 --- a/configure +++ b/configure @@ -771,6 +771,8 @@ ac_user_opts=' enable_option_checking with_pkgversion with_bugurl +enable_static_c___tests +enable_static_c___link_check with_gd with_gd_include with_gd_lib @@ -1440,6 +1442,10 @@ Optional Features: --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --disable-static-c++-tests + disable static C++ tests[default=no] + --disable-static-c++-link-check + disable static C++ link check [default=no] --disable-sanity-checks really do not use threads (should not be used except in special situations) [default=yes] --enable-shared build shared library [default=yes if GNU ld] @@ -3855,6 +3861,29 @@ if test -z "$CPP"; then fi +# This will get text that should go into config.make. +config_vars= + +# Check whether --enable-static-c++-tests was given. +if test ${enable_static_c___tests+y} +then : + enableval=$enable_static_c___tests; static_cxx_tests=$enableval +else $as_nop + static_cxx_tests=yes +fi + +config_vars="$config_vars +static-cxx-tests = $static_cxx_tests" + +# Check whether --enable-static-c++-link-check was given. +if test ${enable_static_c___link_check+y} +then : + enableval=$enable_static_c___link_check; static_cxx_link_check=$enableval +else $as_nop + static_cxx_link_check=yes +fi + + # We need the C++ compiler only for testing. @@ -4279,10 +4308,11 @@ esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -# Static case. -old_LDFLAGS="$LDFLAGS" -LDFLAGS="$LDFLAGS -static" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext +if test $static_cxx_link_check$static_cxx_tests = yesyes; then + # Static case. + old_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -static" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include <iostream> @@ -4304,7 +4334,8 @@ esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LDFLAGS="$old_LDFLAGS" + LDFLAGS="$old_LDFLAGS" +fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -4324,9 +4355,6 @@ if test "`cd $srcdir; pwd -P`" = "`pwd -P`"; then as_fn_error $? "you must configure in a separate build directory" "$LINENO" 5 fi -# This will get text that should go into config.make. -config_vars= - # Check for a --with-gd argument and set libgd-LDFLAGS in config.make. # Check whether --with-gd was given. diff --git a/configure.ac b/configure.ac index bdc385d03c..e48957f318 100644 --- a/configure.ac +++ b/configure.ac @@ -52,6 +52,22 @@ fi AC_SUBST(cross_compiling) AC_PROG_CPP +# This will get text that should go into config.make. +config_vars= + +AC_ARG_ENABLE([static-c++-tests], + AS_HELP_STRING([--disable-static-c++-tests], + [disable static C++ tests@<:@default=no@:>@]), + [static_cxx_tests=$enableval], + [static_cxx_tests=yes]) +LIBC_CONFIG_VAR([static-cxx-tests], [$static_cxx_tests]) + +AC_ARG_ENABLE([static-c++-link-check], + AS_HELP_STRING([--disable-static-c++-link-check], + [disable static C++ link check @<:@default=no@:>@]), + [static_cxx_link_check=$enableval], + [static_cxx_link_check=yes]) + # We need the C++ compiler only for testing. AC_PROG_CXX # It's useless to us if it can't link programs (e.g. missing -lstdc++). @@ -61,10 +77,11 @@ AC_LANG_PUSH([C++]) AC_LINK_IFELSE([AC_LANG_PROGRAM([], [])], [libc_cv_cxx_link_ok=yes], [libc_cv_cxx_link_ok=no]) -# Static case. -old_LDFLAGS="$LDFLAGS" -LDFLAGS="$LDFLAGS -static" -AC_LINK_IFELSE([AC_LANG_SOURCE([ +if test $static_cxx_link_check$static_cxx_tests = yesyes; then + # Static case. + old_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -static" + AC_LINK_IFELSE([AC_LANG_SOURCE([ #include <iostream> int @@ -74,9 +91,10 @@ main() return 0; } ])], - [], - [libc_cv_cxx_link_ok=no]) -LDFLAGS="$old_LDFLAGS" + [], + [libc_cv_cxx_link_ok=no]) + LDFLAGS="$old_LDFLAGS" +fi AC_LANG_POP([C++])]) AS_IF([test $libc_cv_cxx_link_ok != yes], [CXX=]) @@ -84,9 +102,6 @@ if test "`cd $srcdir; pwd -P`" = "`pwd -P`"; then AC_MSG_ERROR([you must configure in a separate build directory]) fi -# This will get text that should go into config.make. -config_vars= - # Check for a --with-gd argument and set libgd-LDFLAGS in config.make. AC_ARG_WITH([gd], AS_HELP_STRING([--with-gd=DIR], diff --git a/elf/Makefile b/elf/Makefile index bb6cd06dec..147f1d3437 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -73,6 +73,7 @@ dl-routines = \ dl-origin \ dl-printf \ dl-reloc \ + dl-rseq-symbols \ dl-runtime \ dl-scope \ dl-setup_hash \ @@ -445,6 +446,7 @@ tests += \ tst-p_align1 \ tst-p_align2 \ tst-p_align3 \ + tst-recursive-tls \ tst-relsort1 \ tst-ro-dynamic \ tst-rtld-run-static \ @@ -632,13 +634,19 @@ $(objpfx)tst-rtld-does-not-exist.out: tst-rtld-does-not-exist.sh $(objpfx)ld.so tests += $(tests-execstack-$(have-z-execstack)) ifeq ($(run-built-tests),yes) tests-special += \ - $(objpfx)noload-mem.out \ $(objpfx)tst-ldconfig-X.out \ $(objpfx)tst-ldconfig-p.out \ $(objpfx)tst-ldconfig-soname.out \ - $(objpfx)tst-leaks1-mem.out \ $(objpfx)tst-rtld-help.out \ # tests-special +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) +tests-special += \ + $(objpfx)noload-mem.out \ + $(objpfx)tst-leaks1-mem.out \ + # tests-special +endif +endif endif tlsmod17a-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 tlsmod18a-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 @@ -886,6 +894,23 @@ modules-names += \ tst-null-argv-lib \ tst-p_alignmod-base \ tst-p_alignmod3 \ + tst-recursive-tlsmallocmod \ + tst-recursive-tlsmod0 \ + tst-recursive-tlsmod1 \ + tst-recursive-tlsmod2 \ + tst-recursive-tlsmod3 \ + tst-recursive-tlsmod4 \ + tst-recursive-tlsmod5 \ + tst-recursive-tlsmod6 \ + tst-recursive-tlsmod7 \ + tst-recursive-tlsmod8 \ + tst-recursive-tlsmod9 \ + tst-recursive-tlsmod10 \ + tst-recursive-tlsmod11 \ + tst-recursive-tlsmod12 \ + tst-recursive-tlsmod13 \ + tst-recursive-tlsmod14 \ + tst-recursive-tlsmod15 \ tst-relsort1mod1 \ tst-relsort1mod2 \ tst-ro-dynamic-mod \ @@ -1200,7 +1225,6 @@ tests-special += \ $(objpfx)tst-trace3.out \ $(objpfx)tst-trace4.out \ $(objpfx)tst-trace5.out \ - $(objpfx)tst-tunables-enable_secure-env.out \ $(objpfx)tst-unused-dep-cmp.out \ $(objpfx)tst-unused-dep.out \ # tests-special @@ -2228,13 +2252,7 @@ $(objpfx)tst-unused-dep-cmp.out: $(objpfx)tst-unused-dep.out cmp $< /dev/null > $@; \ $(evaluate-test) -$(objpfx)tst-tunables-enable_secure-env.out: $(objpfx)tst-tunables-enable_secure-env - $(test-wrapper-env) \ - GLIBC_TUNABLES=glibc.rtld.enable_secure=1 \ - $(rtld-prefix) \ - $< > $@; \ - $(evaluate-test) - +tst-tunables-enable_secure-env-ARGS = -- $(host-test-program-cmd) $(objpfx)tst-audit11.out: $(objpfx)tst-auditmod11.so $(objpfx)tst-audit11mod1.so tst-audit11-ENV = LD_AUDIT=$(objpfx)tst-auditmod11.so @@ -3093,3 +3111,11 @@ CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=$(have-mtls-descriptor) CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor) CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor) endif + +$(objpfx)tst-recursive-tls: $(objpfx)tst-recursive-tlsmallocmod.so +# More objects than DTV_SURPLUS, to trigger DTV reallocation. +$(objpfx)tst-recursive-tls.out: \ + $(patsubst %,$(objpfx)tst-recursive-tlsmod%.so, \ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) +$(objpfx)tst-recursive-tlsmod%.os: tst-recursive-tlsmodN.c + $(compile-command.c) -DVAR=thread_$* -DFUNC=get_threadvar_$* diff --git a/elf/dl-rseq-symbols.S b/elf/dl-rseq-symbols.S new file mode 100644 index 0000000000..b4bba06a99 --- /dev/null +++ b/elf/dl-rseq-symbols.S @@ -0,0 +1,64 @@ +/* Define symbols used by rseq. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if __WORDSIZE == 64 +#define RSEQ_OFFSET_SIZE 8 +#else +#define RSEQ_OFFSET_SIZE 4 +#endif + +/* Some targets define a macro to denote the zero register. */ +#undef zero + +/* Define 2 symbols: '__rseq_size' is public const and '_rseq_size' (an + alias of '__rseq_size') is hidden and writable for internal use by the + dynamic linker which will initialize the value both symbols point to + before copy relocations take place. */ + + .globl __rseq_size + .type __rseq_size, %object + .size __rseq_size, 4 + .hidden _rseq_size + .globl _rseq_size + .type _rseq_size, %object + .size _rseq_size, 4 + .section .data.rel.ro + .balign 4 +__rseq_size: +_rseq_size: + .zero 4 + +/* Define 2 symbols: '__rseq_offset' is public const and '_rseq_offset' (an + alias of '__rseq_offset') is hidden and writable for internal use by the + dynamic linker which will initialize the value both symbols point to + before copy relocations take place. */ + + .globl __rseq_offset + .type __rseq_offset, %object + .size __rseq_offset, RSEQ_OFFSET_SIZE + .hidden _rseq_offset + .globl _rseq_offset + .type _rseq_offset, %object + .size _rseq_offset, RSEQ_OFFSET_SIZE + .section .data.rel.ro + .balign RSEQ_OFFSET_SIZE +__rseq_offset: +_rseq_offset: + .zero RSEQ_OFFSET_SIZE diff --git a/elf/dl-tls.c b/elf/dl-tls.c index 670dbc42fc..3d221273f1 100644 --- a/elf/dl-tls.c +++ b/elf/dl-tls.c @@ -75,6 +75,31 @@ /* Default for dl_tls_static_optional. */ #define OPTIONAL_TLS 512 +/* Used to count the number of threads currently executing dynamic TLS + updates. Used to avoid recursive malloc calls in __tls_get_addr + for an interposed malloc that uses global-dynamic TLS (which is not + recommended); see _dl_tls_allocate_active checks. This could be a + per-thread flag, but would need TLS access in the dynamic linker. */ +unsigned int _dl_tls_threads_in_update; + +static inline void +_dl_tls_allocate_begin (void) +{ + atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, 1); +} + +static inline void +_dl_tls_allocate_end (void) +{ + atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, -1); +} + +static inline bool +_dl_tls_allocate_active (void) +{ + return atomic_load_relaxed (&_dl_tls_threads_in_update) > 0; +} + /* Compute the static TLS surplus based on the namespace count and the TLS space that can be used for optimizations. */ static inline int @@ -425,12 +450,18 @@ _dl_allocate_tls_storage (void) size += TLS_PRE_TCB_SIZE; #endif - /* Perform the allocation. Reserve space for the required alignment - and the pointer to the original allocation. */ + /* Reserve space for the required alignment and the pointer to the + original allocation. */ size_t alignment = GLRO (dl_tls_static_align); + + /* Perform the allocation. */ + _dl_tls_allocate_begin (); void *allocated = malloc (size + alignment + sizeof (void *)); if (__glibc_unlikely (allocated == NULL)) - return NULL; + { + _dl_tls_allocate_end (); + return NULL; + } /* Perform alignment and allocate the DTV. */ #if TLS_TCB_AT_TP @@ -466,6 +497,8 @@ _dl_allocate_tls_storage (void) result = allocate_dtv (result); if (result == NULL) free (allocated); + + _dl_tls_allocate_end (); return result; } @@ -483,6 +516,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) size_t newsize = max_modid + DTV_SURPLUS; size_t oldsize = dtv[-1].counter; + _dl_tls_allocate_begin (); if (dtv == GL(dl_initial_dtv)) { /* This is the initial dtv that was either statically allocated in @@ -502,6 +536,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) if (newp == NULL) oom (); } + _dl_tls_allocate_end (); newp[0].counter = newsize; @@ -676,7 +711,9 @@ allocate_dtv_entry (size_t alignment, size_t size) if (powerof2 (alignment) && alignment <= _Alignof (max_align_t)) { /* The alignment is supported by malloc. */ + _dl_tls_allocate_begin (); void *ptr = malloc (size); + _dl_tls_allocate_end (); return (struct dtv_pointer) { ptr, ptr }; } @@ -688,7 +725,10 @@ allocate_dtv_entry (size_t alignment, size_t size) /* Perform the allocation. This is the pointer we need to free later. */ + _dl_tls_allocate_begin (); void *start = malloc (alloc_size); + _dl_tls_allocate_end (); + if (start == NULL) return (struct dtv_pointer) {}; @@ -826,7 +866,11 @@ _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen) free implementation. Checking here papers over at least some dynamic TLS usage by interposed mallocs. */ if (dtv[modid].pointer.to_free != NULL) - free (dtv[modid].pointer.to_free); + { + _dl_tls_allocate_begin (); + free (dtv[modid].pointer.to_free); + _dl_tls_allocate_end (); + } dtv[modid].pointer.val = TLS_DTV_UNALLOCATED; dtv[modid].pointer.to_free = NULL; @@ -956,10 +1000,22 @@ __tls_get_addr (GET_ADDR_ARGS) size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); if (__glibc_unlikely (dtv[0].counter != gen)) { - /* Update DTV up to the global generation, see CONCURRENCY NOTES - in _dl_update_slotinfo. */ - gen = atomic_load_acquire (&GL(dl_tls_generation)); - return update_get_addr (GET_ADDR_PARAM, gen); + if (_dl_tls_allocate_active () + && GET_ADDR_MODULE < _dl_tls_initial_modid_limit) + /* This is a reentrant __tls_get_addr call, but we can + satisfy it because it's an initially-loaded module ID. + These TLS slotinfo slots do not change, so the + out-of-date generation counter does not matter. However, + if not in a TLS update, still update_get_addr below, to + get off the slow path eventually. */ + ; + else + { + /* Update DTV up to the global generation, see CONCURRENCY NOTES + in _dl_update_slotinfo. */ + gen = atomic_load_acquire (&GL(dl_tls_generation)); + return update_get_addr (GET_ADDR_PARAM, gen); + } } void *p = dtv[GET_ADDR_MODULE].pointer.val; @@ -969,7 +1025,7 @@ __tls_get_addr (GET_ADDR_ARGS) return (char *) p + GET_ADDR_OFFSET; } -#endif +#endif /* SHARED */ /* Look up the module's TLS block as for __tls_get_addr, @@ -1018,6 +1074,25 @@ _dl_tls_get_addr_soft (struct link_map *l) return data; } +size_t _dl_tls_initial_modid_limit; + +void +_dl_tls_initial_modid_limit_setup (void) +{ + struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list); + size_t idx; + for (idx = 0; idx < listp->len; ++idx) + { + struct link_map *l = listp->slotinfo[idx].map; + if (l == NULL + /* The object can be unloaded, so its modid can be + reassociated. */ + || !(l->l_type == lt_executable || l->l_type == lt_library)) + break; + } + _dl_tls_initial_modid_limit = idx; +} + void _dl_add_to_slotinfo (struct link_map *l, bool do_add) @@ -1050,9 +1125,11 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add) the first slot. */ assert (idx == 0); + _dl_tls_allocate_begin (); listp = (struct dtv_slotinfo_list *) malloc (sizeof (struct dtv_slotinfo_list) + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); + _dl_tls_allocate_end (); if (listp == NULL) { /* We ran out of memory while resizing the dtv slotinfo list. */ diff --git a/elf/rtld.c b/elf/rtld.c index e9525ea987..bfdf632e77 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -788,6 +788,8 @@ init_tls (size_t naudit) _dl_fatal_printf ("\ cannot allocate TLS data structures for initial thread\n"); + _dl_tls_initial_modid_limit_setup (); + /* Store for detection of the special case by __tls_get_addr so it knows not to pass this dtv to the normal realloc. */ GL(dl_initial_dtv) = GET_DTV (tcbp); @@ -1325,7 +1327,7 @@ _dl_start_args_adjust (int skip_args, int skip_env) /* Shuffle auxv down. */ ElfW(auxv_t) ax; - char *oldp = (char *) (p + 1); + char *oldp = (char *) (p + 1 + skip_env); char *newp = (char *) (sp + 1); do { diff --git a/elf/tst-recursive-tls.c b/elf/tst-recursive-tls.c new file mode 100644 index 0000000000..716d1f783a --- /dev/null +++ b/elf/tst-recursive-tls.c @@ -0,0 +1,60 @@ +/* Test with interposed malloc with dynamic TLS. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <array_length.h> +#include <stdio.h> +#include <support/check.h> +#include <support/xdlfcn.h> + +/* Defined in tst-recursive-tlsmallocmod.so. */ +extern __thread unsigned int malloc_subsytem_counter; + +static int +do_test (void) +{ + /* 16 is large enough to exercise the DTV resizing case. */ + void *handles[16]; + + for (unsigned int i = 0; i < array_length (handles); ++i) + { + /* Re-use the TLS slot for module 0. */ + if (i > 0) + xdlclose (handles[0]); + + char soname[30]; + snprintf (soname, sizeof (soname), "tst-recursive-tlsmod%u.so", i); + handles[i] = xdlopen (soname, RTLD_NOW); + + if (i > 0) + { + handles[0] = xdlopen ("tst-recursive-tlsmod0.so", RTLD_NOW); + int (*fptr) (void) = xdlsym (handles[0], "get_threadvar_0"); + /* May trigger TLS storage allocation using malloc. */ + TEST_COMPARE (fptr (), 0); + } + } + + for (unsigned int i = 0; i < array_length (handles); ++i) + xdlclose (handles[i]); + + printf ("info: malloc subsystem calls: %u\n", malloc_subsytem_counter); + TEST_VERIFY (malloc_subsytem_counter > 0); + return 0; +} + +#include <support/test-driver.c> diff --git a/elf/tst-recursive-tlsmallocmod.c b/elf/tst-recursive-tlsmallocmod.c new file mode 100644 index 0000000000..c24e9945d1 --- /dev/null +++ b/elf/tst-recursive-tlsmallocmod.c @@ -0,0 +1,64 @@ +/* Interposed malloc with dynamic TLS. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <dlfcn.h> + +__thread unsigned int malloc_subsytem_counter; + +static __typeof (malloc) *malloc_fptr; +static __typeof (free) *free_fptr; +static __typeof (calloc) *calloc_fptr; +static __typeof (realloc) *realloc_fptr; + +static void __attribute__ ((constructor)) +init (void) +{ + malloc_fptr = dlsym (RTLD_NEXT, "malloc"); + free_fptr = dlsym (RTLD_NEXT, "free"); + calloc_fptr = dlsym (RTLD_NEXT, "calloc"); + realloc_fptr = dlsym (RTLD_NEXT, "realloc"); +} + +void * +malloc (size_t size) +{ + ++malloc_subsytem_counter; + return malloc_fptr (size); +} + +void +free (void *ptr) +{ + ++malloc_subsytem_counter; + return free_fptr (ptr); +} + +void * +calloc (size_t a, size_t b) +{ + ++malloc_subsytem_counter; + return calloc_fptr (a, b); +} + +void * +realloc (void *ptr, size_t size) +{ + ++malloc_subsytem_counter; + return realloc_fptr (ptr, size); +} diff --git a/elf/tst-recursive-tlsmodN.c b/elf/tst-recursive-tlsmodN.c new file mode 100644 index 0000000000..bb7592aee6 --- /dev/null +++ b/elf/tst-recursive-tlsmodN.c @@ -0,0 +1,28 @@ +/* Test module with global-dynamic TLS. Used to trigger DTV reallocation. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* Compiled with VAR and FUNC set via -D. FUNC requires some + relocation against TLS variable VAR. */ + +__thread int VAR; + +int +FUNC (void) +{ + return VAR; +} diff --git a/elf/tst-tunables-enable_secure-env.c b/elf/tst-tunables-enable_secure-env.c index 24e846f299..01f121efc3 100644 --- a/elf/tst-tunables-enable_secure-env.c +++ b/elf/tst-tunables-enable_secure-env.c @@ -17,15 +17,136 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ +#include <array_length.h> +#include <errno.h> +#include <getopt.h> +#include <intprops.h> +#include <stdlib.h> #include <support/capture_subprocess.h> #include <support/check.h> +#ifdef __linux__ +# define HAVE_AUXV 1 +# include <sys/auxv.h> +#else +# define HAVE_AUXV 0 +#endif + +/* Nonzero if the program gets called via `exec'. */ +#define CMDLINE_OPTIONS \ + { "restart", no_argument, &restart, 1 }, +static int restart; + +/* Hold the four initial argument used to respawn the process, plus the extra + '--direct', '--restart', auxiliary vector values, and final NULL. */ +static char *spargs[11]; + +#if HAVE_AUXV +static void +check_auxv (unsigned long type, char *argv) +{ + char *endptr; + errno = 0; + unsigned long int varg = strtol (argv, &endptr, 10); + TEST_VERIFY_EXIT (errno == 0); + TEST_VERIFY_EXIT (*endptr == '\0'); + errno = 0; + unsigned long int v = getauxval (type); + TEST_COMPARE (errno, 0); + TEST_COMPARE (varg, v); +} +#endif + +/* Called on process re-execution. */ +_Noreturn static void +handle_restart (int argc, char *argv[]) +{ + TEST_VERIFY (getenv ("GLIBC_TUNABLES") == NULL); + TEST_VERIFY (getenv ("LD_BIND_NOW") == NULL); + +#if HAVE_AUXV + TEST_VERIFY_EXIT (argc == 4); + check_auxv (AT_PHENT, argv[0]); + check_auxv (AT_PHNUM, argv[1]); + check_auxv (AT_PAGESZ, argv[2]); + check_auxv (AT_HWCAP, argv[3]); +#endif + + exit (EXIT_SUCCESS); +} static int do_test (int argc, char *argv[]) { - /* Ensure that no assertions are hit when a dynamically linked application - runs. This test requires that GLIBC_TUNABLES=glibc.rtld.enable_secure=1 - is set. */ + /* We must have either: + + - four parameter if called initially: + + path for ld.so [optional] + + "--library-path" [optional] + + the library path [optional] + + the application name + + - either parameters left if called through re-execution. + + auxiliary vector value 1 + + auxiliary vector value 2 + + auxiliary vector value 3 + + auxiliary vector value 4 + */ + if (restart) + handle_restart (argc - 1, &argv[1]); + + TEST_VERIFY_EXIT (argc == 2 || argc == 5); + +#if HAVE_AUXV + struct + { + unsigned long int type; + char str[INT_BUFSIZE_BOUND (unsigned long)]; + } auxvals[] = + { + /* Check some auxiliary values that should be constant over process + re-execution. */ + { AT_PHENT }, + { AT_PHNUM }, + { AT_PAGESZ }, + { AT_HWCAP }, + }; + for (int i = 0; i < array_length (auxvals); i++) + { + unsigned long int v = getauxval (auxvals[i].type); + snprintf (auxvals[i].str, sizeof auxvals[i].str, "%lu", v); + } +#endif + + { + int i; + for (i = 0; i < argc - 1; i++) + spargs[i] = argv[i + 1]; + spargs[i++] = (char *) "--direct"; + spargs[i++] = (char *) "--restart"; +#if HAVE_AUXV + for (int j = 0; j < array_length (auxvals); j++) + spargs[i++] = auxvals[j].str; +#endif + spargs[i] = NULL; + } + + { + char *envs[] = + { + /* Add some environment variable that should be filtered out. */ + (char *) "GLIBC_TUNABLES=glibc.rtld.enable_secure=1", + (char* ) "LD_BIND_NOW=0", + NULL, + }; + struct support_capture_subprocess result + = support_capture_subprogram (spargs[0], spargs, envs); + support_capture_subprocess_check (&result, + "tst-tunables-enable_secure-env", + 0, + sc_allow_none); + support_capture_subprocess_free (&result); + } + return 0; } diff --git a/io/bits/fcntl2.h b/io/bits/fcntl2.h index c8888b50c1..0cced392e7 100644 --- a/io/bits/fcntl2.h +++ b/io/bits/fcntl2.h @@ -61,13 +61,8 @@ open (const char *__path, int __oflag, ...) return __open_alias (__path, __oflag, __va_arg_pack ()); } #elif __fortify_use_clang -__fortify_function_error_function __attribute_overloadable__ int -open (__fortify_clang_overload_arg (const char *, ,__path), int __oflag, ...) - __fortify_clang_unavailable ("open can be called either with 2 or 3 arguments, not more"); - __fortify_function __attribute_overloadable__ int open (__fortify_clang_overload_arg (const char *, ,__path), int __oflag) - __fortify_clang_prefer_this_overload __fortify_clang_error (__OPEN_NEEDS_MODE (__oflag), "open with O_CREAT or O_TMPFILE in second argument needs 3 arguments") { diff --git a/libio/Makefile b/libio/Makefile index f607edbefb..8720381fdc 100644 --- a/libio/Makefile +++ b/libio/Makefile @@ -261,15 +261,28 @@ tst-bz22415-ENV = MALLOC_TRACE=$(objpfx)tst-bz22415.mtrace \ tst-bz24228-ENV = MALLOC_TRACE=$(objpfx)tst-bz24228.mtrace \ LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so -generated += test-fmemopen.mtrace test-fmemopen.check -generated += tst-fdopen-seek-failure.mtrace tst-fdopen-seek-failure.check -generated += tst-fopenloc.mtrace tst-fopenloc.check -generated += tst-bz22415.mtrace tst-bz22415.check - aux := fileops genops stdfiles stdio strops +ifeq ($(run-built-tests),yes) +ifeq ($(build-shared),yes) +ifneq ($(PERL),no) +generated += \ + test-fmemopen.check \ + test-fmemopen.mtrace \ + tst-bz22415.check \ + tst-bz22415.mtrace \ + tst-bz24228.check \ + tst-bz24228.mtrace \ + tst-fdopen-seek-failure.check \ + tst-fdopen-seek-failure.mtrace \ + tst-fopenloc.check \ + tst-fopenloc.mtrace \ + # generated +endif +endif +endif + ifeq ($(build-shared),yes) -generated += tst-bz24228.mtrace tst-bz24228.check aux += oldfileops oldstdfiles tests += \ tst-stderr-compat \ @@ -286,16 +299,23 @@ shared-only-routines = oldiofopen oldiofdopen oldiofclose oldfileops \ ifeq ($(run-built-tests),yes) tests-special += \ - $(objpfx)test-fmemopen-mem.out \ $(objpfx)test-freopen.out \ - $(objpfx)tst-bz22415-mem.out \ - $(objpfx)tst-fdopen-seek-failure-mem.out \ # tests-special ifeq (yes,$(build-shared)) # Run tst-fopenloc-cmp.out and tst-openloc-mem.out only if shared # library is enabled since they depend on tst-fopenloc.out. -tests-special += $(objpfx)tst-fopenloc-cmp.out $(objpfx)tst-fopenloc-mem.out \ - $(objpfx)tst-bz24228-mem.out +tests-special += $(objpfx)tst-fopenloc-cmp.out +ifeq ($(build-shared),yes) +ifneq ($(PERL),no) +tests-special += \ + $(objpfx)test-fmemopen-mem.out \ + $(objpfx)tst-bz22415-mem.out \ + $(objpfx)tst-bz24228-mem.out \ + $(objpfx)tst-fdopen-seek-failure-mem.out \ + $(objpfx)tst-fopenloc-mem.out \ + # tests-special +endif +endif endif tests += \ diff --git a/manual/dynlink.texi b/manual/dynlink.texi index d71f7a30d6..03565d4fb0 100644 --- a/manual/dynlink.texi +++ b/manual/dynlink.texi @@ -15,6 +15,7 @@ Dynamic linkers are sometimes called @dfn{dynamic loaders}. @menu * Dynamic Linker Invocation:: Explicit invocation of the dynamic linker. * Dynamic Linker Introspection:: Interfaces for querying mapping information. +* Dynamic Linker Hardening:: Avoiding unexpected issues with dynamic linking. @end menu @node Dynamic Linker Invocation @@ -535,6 +536,563 @@ information is processed. This function is a GNU extension. @end deftypefun +@node Dynamic Linker Hardening +@section Avoiding Unexpected Issues With Dynamic Linking + +This section details recommendations for increasing application +robustness, by avoiding potential issues related to dynamic linking. +The recommendations have two main aims: reduce the involvement of the +dynamic linker in application execution after process startup, and +restrict the application to a dynamic linker feature set whose behavior +is more easily understood. + +Key aspects of limiting dynamic linker usage after startup are: no use +of the @code{dlopen} function, disabling lazy binding, and using the +static TLS model. More easily understood dynamic linker behavior +requires avoiding name conflicts (symbols and sonames) and highly +customizable features like the audit subsystem. + +Note that while these steps can be considered a form of application +hardening, they do not guard against potential harm from accidental or +deliberate loading of untrusted or malicious code. There is only +limited overlap with traditional security hardening for applications +running on GNU systems. + +@subsection Restricted Dynamic Linker Features + +Avoiding certain dynamic linker features can increase predictability of +applications and reduce the risk of running into dynamic linker defects. + +@itemize @bullet +@item +Do not use the functions @code{dlopen}, @code{dlmopen}, or +@code{dlclose}. Dynamic loading and unloading of shared objects +introduces substantial complications related to symbol and thread-local +storage (TLS) management. + +@item +Without the @code{dlopen} function, @code{dlsym} and @code{dlvsym} +cannot be used with shared object handles. Minimizing the use of both +functions is recommended. If they have to be used, only the +@code{RTLD_DEFAULT} pseudo-handle should be used. + +@item +Use the local-exec or initial-exec TLS models. If @code{dlopen} is not +used, there are no compatibility concerns for initial-exec TLS. This +TLS model avoids most of the complexity around TLS access. In +particular, there are no TLS-related run-time memory allocations after +process or thread start. + +If shared objects are expected to be used more generally, outside the +hardened, feature-restricted context, lack of compatibility between +@code{dlopen} and initial-exec TLS could be a concern. In that case, +the second-best alternative is to use global-dynamic TLS with GNU2 TLS +descriptors, for targets that fully implement them, including the fast +path for access to TLS variables defined in the initially loaded set of +objects. Like initial-exec TLS, this avoids memory allocations after +thread creation, but only if the @code{dlopen} function is not used. + +@item +Do not use lazy binding. Lazy binding may require run-time memory +allocation, is not async-signal-safe, and introduces considerable +complexity. + +@item +Make dependencies on shared objects explicit. Do not assume that +certain libraries (such as @code{libc.so.6}) are always loaded. +Specifically, if a main program or shared object references a symbol, +create an ELF @code{DT_NEEDED} dependency on that shared object, or on +another shared object that is documented (or otherwise guaranteed) to +have the required explicit dependency. Referencing a symbol without a +matching link dependency results in underlinking, and underlinked +objects cannot always be loaded correctly: Initialization of objects may +not happen in the required order. + +@item +Do not create dependency loops between shared objects (@code{libA.so.1} +depending on @code{libB.so.1} depending on @code{libC.so.1} depending on +@code{libA.so.1}). @Theglibc{} has to initialize one of the objects in +the cycle first, and the choice of that object is arbitrary and can +change over time. The object which is initialized first (and other +objects involved in the cycle) may not run correctly because not all of +its dependencies have been initialized. + +Underlinking (see above) can hide the presence of cycles. + +@item +Limit the creation of indirect function (IFUNC) resolvers. These +resolvers run during relocation processing, when @theglibc{} is not in +a fully consistent state. If you write your own IFUNC resolvers, do +not depend on external data or function references in those resolvers. + +@item +Do not use the audit functionality (@code{LD_AUDIT}, @code{DT_AUDIT}, +@code{DT_DEPAUDIT}). Its callback and hooking capabilities introduce a +lot of complexity and subtly alter dynamic linker behavior in corner +cases even if the audit module is inactive. + +@item +Do not use symbol interposition. Without symbol interposition, the +exact order in which shared objects are searched are less relevant. + +Exceptions to this rule are copy relocations (see the next item), and +vague linkage, as used by the C++ implementation (see below). + +@item +One potential source of symbol interposition is a combination of static +and dynamic linking, namely linking a static archive into multiple +dynamic shared objects. For such scenarios, the static library should +be converted into its own dynamic shared object. + +A different approach to this situation uses hidden visibility for +symbols in the static library, but this can cause problems if the +library does not expect that multiple copies of its code coexist within +the same process, with no or partial sharing of state. + +@item +If you use shared objects that are linked with @option{-Wl,-Bsymbolic} +(or equivalent) or use protected visibility, the code for the main +program must be built as @option{-fpic} or @option{-fPIC} to avoid +creating copy relocations (and the main program must not use copy +relocations for other reasons). Using @option{-fpie} or @option{-fPIE} +is not an alternative to PIC code in this context. + +@item +Be careful about explicit section annotations. Make sure that the +target section matches the properties of the declared entity (e.g., no +writable objects in @code{.text}). + +@item +Ensure that all assembler or object input files have the recommended +security markup, particularly for non-executable stack. + +@item +Avoid using non-default linker flags and features. In particular, do +not use the @code{DT_PREINIT_ARRAY} dynamic tag, and do not flag +objects as @code{DF_1_INITFIRST}. Do not change the default linker +script of BFD ld. Do not override ABI defaults, such as the dynamic +linker path (with @option{--dynamic-linker}). + +@item +Some features of @theglibc{} indirectly depend on run-time code loading +and @code{dlopen}. Use @code{iconv_open} with built-in converters only +(such as @code{UTF-8}). Do not use NSS functionality such as +@code{getaddrinfo} or @code{getpwuid_r} unless the system is configured +for built-in NSS service modules only (see below). +@end itemize + +Several considerations apply to ELF constructors and destructors. + +@itemize @bullet +@item +The dynamic linker does not take constructor and destructor priorities +into account when determining their execution order. Priorities are +only used by the link editor for ordering execution within a +completely linked object. If a dynamic shared object needs to be +initialized before another object, this can be expressed with a +@code{DT_NEEDED} dependency on the object that needs to be initialized +earlier. + +@item +The recommendations to avoid cyclic dependencies and symbol +interposition make it less likely that ELF objects are accessed before +their ELF constructors have run. However, using @code{dlsym} and +@code{dlvsym}, it is still possible to access uninitialized facilities +even with these restrictions in place. (Of course, access to +uninitialized functionality is also possible within a single shared +object or the main executable, without resorting to explicit symbol +lookup.) Consider using dynamic, on-demand initialization instead. To +deal with access after de-initialization, it may be necessary to +implement special cases for that scenario, potentially with degraded +functionality. + +@item +Be aware that when ELF destructors are executed, it is possible to +reference already-deconstructed shared objects. This can happen even in +the absence of @code{dlsym} and @code{dlvsym} function calls, for +example if client code using a shared object has registered callbacks or +objects with another shared object. The ELF destructor for the client +code is executed before the ELF destructor for the shared objects that +it uses, based on the expected dependency order. + +@item +If @code{dlopen} and @code{dlmopen} are not used, @code{DT_NEEDED} +dependency information is complete, and lazy binding is disabled, the +execution order of ELF destructors is expected to be the reverse of the +ELF constructor order. However, two separate dependency sort operations +still occur. Even though the listed preconditions should ensure that +both sorts produce the same ordering, it is recommended not to depend on +the destructor order being the reverse of the constructor order. +@end itemize + +The following items provide C++-specific guidance for preparing +applications. If another programming language is used and it uses these +toolchain features targeted at C++ to implement some language +constructs, these restrictions and recommendations still apply in +analogous ways. + +@itemize @bullet +@item +C++ inline functions, templates, and other constructs may need to be +duplicated into multiple shared objects using vague linkage, resulting +in symbol interposition. This type of symbol interposition is +unproblematic, as long as the C++ one definition rule (ODR) is followed, +and all definitions in different translation units are equivalent +according to the language C++ rules. + +@item +Be aware that under C++ language rules, it is unspecified whether +evaluating a string literal results in the same address for each +evaluation. This also applies to anonymous objects of static storage +duration that GCC creates, for example to implement the compound +literals C++ extension. As a result, comparing pointers to such +objects, or using them directly as hash table keys, may give unexpected +results. + +By default, variables of block scope of static storage have consistent +addresses across different translation units, even if defined in +functions that use vague linkage. + +@item +Special care is needed if a C++ project uses symbol visibility or +symbol version management (for example, the GCC @samp{visibility} +attribute, the GCC @option{-fvisibility} option, or a linker version +script with the linker option @option{--version-script}). It is +necessary to ensure that the symbol management remains consistent with +how the symbols are used. Some C++ constructs are implemented with +the help of ancillary symbols, which can make complicated to achieve +consistency. For example, an inline function that is always inlined +into its callers has no symbol footprint for the function itself, but +if the function contains a variable of static storage duration, this +variable may result in the creation of one or more global symbols. +For correctness, such symbols must be visible and bound to the same +object in all other places where the inline function may be called. +This requirement is not met if the symbol visibility is set to hidden, +or if symbols are assigned a textually different symbol version +(effectively creating two distinct symbols). + +Due to the complex interaction between ELF symbol management and C++ +symbol generation, it is recommended to use C++ language features for +symbol management, in particular inline namespaces. + +@item +The toolchain and dynamic linker have multiple mechanisms that bypass +the usual symbol binding procedures. This means that the C++ one +definition rule (ODR) still holds even if certain symbol-based isolation +mechanisms are used, and object addresses are not shared across +translation units with incompatible type definitions. + +This does not matter if the original (language-independent) advice +regarding symbol interposition is followed. However, as the advice may +be difficult to implement for C++ applications, it is recommended to +avoid ODR violations across the entire process image. Inline namespaces +can be helpful in this context because they can be used to create +distinct ELF symbols while maintaining source code compatibility at the +C++ level. + +@item +Be aware that as a special case of interposed symbols, symbols with the +@code{STB_GNU_UNIQUE} binding type do not follow the usual ELF symbol +namespace isolation rules: such symbols bind across @code{RTLD_LOCAL} +boundaries. Furthermore, symbol versioning is ignored for such symbols; +they are bound by symbol name only. All their definitions and uses must +therefore be compatible. Hidden visibility still prevents the creation +of @code{STB_GNU_UNIQUE} symbols and can achieve isolation of +incompatible definitions. + +@item +C++ constructor priorities only affect constructor ordering within one +shared object. Global constructor order across shared objects is +consistent with ELF dependency ordering if there are no ELF dependency +cycles. + +@item +C++ exception handling and run-time type information (RTTI), as +implemented in the GNU toolchain, is not address-significant, and +therefore is not affected by the symbol binding behaviour of the dynamic +linker. This means that types of the same fully-qualified name (in +non-anonymous namespaces) are always considered the same from an +exception-handling or RTTI perspective. This is true even if the type +information object or vtable has hidden symbol visibility, or the +corresponding symbols are versioned under different symbol versions, or +the symbols are not bound to the same objects due to the use of +@code{RTLD_LOCAL} or @code{dlmopen}. + +This can cause issues in applications that contain multiple incompatible +definitions of the same type. Inline namespaces can be used to create +distinct symbols at the ELF layer, avoiding this type of issue. + +@item +C++ exception handling across multiple @code{dlmopen} namespaces may +not work, particular with the unwinder in GCC versions before 12. +Current toolchain versions are able to process unwinding tables across +@code{dlmopen} boundaries. However, note that type comparison is +name-based, not address-based (see the previous item), so exception +types may still be matched in unexpected ways. An important special +case of exception handling, invoking destructors for variables of block +scope, is not impacted by this RTTI type-sharing. Likewise, regular +virtual member function dispatch for objects is unaffected (but still +requires that the type definitions match in all directly involved +translation units). + +Once more, inline namespaces can be used to create distinct ELF symbols +for different types. + +@item +Although the C++ standard requires that destructors for global objects +run in the opposite order of their constructors, the Itanium C++ ABI +requires a different destruction order in some cases. As a result, do +not depend on the precise destructor invocation order in applications +that use @code{dlclose}. + +@item +Registering destructors for later invocation allocates memory and may +silently fail if insufficient memory is available. As a result, the +destructor is never invoked. This applies to all forms of destructor +registration, with the exception of thread-local variables (see the next +item). To avoid this issue, ensure that such objects merely have +trivial destructors, avoiding the need for registration, and deallocate +resources using a different mechanism (for example, from an ELF +destructor). + +@item +A similar issue exists for @code{thread_local} variables with thread +storage duration of types that have non-trivial destructors. However, +in this case, memory allocation failure during registration leads to +process termination. If process termination is not acceptable, use +@code{thread_local} variables with trivial destructors only. +Functions for per-thread cleanup can be registered using +@code{pthread_key_create} (globally for all threads) and activated +using @code{pthread_setspecific} (on each thread). Note that a +@code{pthread_key_create} call may still fail (and +@code{pthread_create} keys are a limited resource in @theglibc{}), but +this failure can be handled without terminating the process. +@end itemize + +@subsection Producing Matching Binaries + +This subsection recommends tools and build flags for producing +applications that meet the recommendations of the previous subsection. + +@itemize @bullet +@item +Use BFD ld (@command{bfd.ld}) from GNU binutils to produce binaries, +invoked through a compiler driver such as @command{gcc}. The version +should be not too far ahead of what was current when the version of +@theglibc{} was first released. + +@item +Do not use a binutils release that is older than the one used to build +@theglibc{} itself. + +@item +Compile with @option{-ftls-model=initial-exec} to force the initial-exec +TLS model. + +@item +Link with @option{-Wl,-z,now} to disable lazy binding. + +@item +Link with @option{-Wl,-z,relro} to enable RELRO (which is the default on +most targets). + +@item +Specify all direct shared objects dependencies using @option{-l} options +to avoid underlinking. Rely on @code{.so} files (which can be linker +scripts) and searching with the @option{-l} option. Do not specify the +file names of shared objects on the linker command line. + +@item +Consider using @option{-Wl,-z,defs} to treat underlinking as an error +condition. + +@item +When creating a shared object (linked with @option{-shared}), use +@option{-Wl,-soname,lib@dots{}} to set a soname that matches the final +installed name of the file. + +@item +Do not use the @option{-rpath} linker option. (As explained below, all +required shared objects should be installed into the default search +path.) + +@item +Use @option{-Wl,--error-rwx-segments} and @option{-Wl,--error-execstack} to +instruct the link editor to fail the link if the resulting final object +would have read-write-execute segments or an executable stack. Such +issues usually indicate that the input files are not marked up +correctly. + +@item +Ensure that for each @code{LOAD} segment in the ELF program header, file +offsets, memory sizes, and load addresses are multiples of the largest +page size supported at run time. Similarly, the start address and size +of the @code{GNU_RELRO} range should be multiples of the page size. + +Avoid creating gaps between @code{LOAD} segments. The difference +between the load addresses of two subsequent @code{LOAD} segments should +be the size of the first @code{LOAD} segment. (This may require linking +with @option{-Wl,-z,noseparate-code}.) + +This may not be possible to achieve with the currently available link +editors. + +@item +If the multiple-of-page-size criterion for the @code{GNU_RELRO} region +cannot be achieved, ensure that the process memory image right before +the start of the region does not contain executable or writable memory. +@c https://sourceware.org/pipermail/libc-alpha/2022-May/138638.html +@end itemize + +@subsection Checking Binaries + +In some cases, if the previous recommendations are not followed, this +can be determined from the produced binaries. This section contains +suggestions for verifying aspects of these binaries. + +@itemize @bullet +@item +To detect underlinking, examine the dynamic symbol table, for example +using @samp{readelf -sDW}. If the symbol is defined in a shared object +that uses symbol versioning, it must carry a symbol version, as in +@samp{pthread_kill@@GLIBC_2.34}. + +@item +Examine the dynamic segment with @samp{readelf -dW} to check that all +the required @code{NEEDED} entries are present. (It is not necessary to +list indirect dependencies if these dependencies are guaranteed to +remain during the evolution of the explicitly listed direct +dependencies.) + +@item +The @code{NEEDED} entries should not contain full path names including +slashes, only @code{sonames}. + +@item +For a further consistency check, collect all shared objects referenced +via @code{NEEDED} entries in dynamic segments, transitively, starting at +the main program. Then determine their dynamic symbol tables (using +@samp{readelf -sDW}, for example). Ideally, every symbol should be +defined at most once, so that symbol interposition does not happen. + +If there are interposed data symbols, check if the single interposing +definition is in the main program. In this case, there must be a copy +relocation for it. (This only applies to targets with copy relocations.) + +Function symbols should only be interposed in C++ applications, to +implement vague linkage. (See the discussion in the C++ recommendations +above.) + +@item +Using the previously collected @code{NEEDED} entries, check that the +dependency graph does not contain any cycles. + +@item +The dynamic segment should also mention @code{BIND_NOW} on the +@code{FLAGS} line or @code{NOW} on the @code{FLAGS_1} line (one is +enough). + +@item +For shared objects (not main programs), if the program header has a +@code{PT_TLS} segment, the dynamic segment (as shown by @samp{readelf +-dW}) should contain the @code{STATIC_TLS} flag on the @code{FLAGS} +line. + +If @code{STATIC_TLS} is missing in shared objects, ensure that the +appropriate relocations for GNU2 TLS descriptors are used (for example, +@code{R_AARCH64_TLSDESC} or @code{R_X86_64_TLSDESC}). + +@item +There should not be a reference to the symbols @code{__tls_get_addr}, +@code{__tls_get_offset}, @code{__tls_get_addr_opt} in the dynamic symbol +table (in the @samp{readelf -sDW} output). Thread-local storage must be +accessed using the initial-exec (static) model, or using GNU2 TLS +descriptors. + +@item +Likewise, the functions @code{dlopen}, @code{dlmopen}, @code{dlclose} +should not be referenced from the dynamic symbol table. + +@item +For shared objects, there should be a @code{SONAME} entry that matches +the file name (the base name, i.e., the part after the slash). The +@code{SONAME} string must not contain a slash @samp{/}. + +@item +For all objects, the dynamic segment (as shown by @samp{readelf -dW}) +should not contain @code{RPATH} or @code{RUNPATH} entries. + +@item +Likewise, the dynamic segment should not show any @code{AUDIT}, +@code{DEPAUDIT}, @code{AUXILIARY}, @code{FILTER}, or +@code{PREINIT_ARRAY} tags. + +@item +If the dynamic segment contains a (deprecated) @code{HASH} tag, it +must also contain a @code{GNU_HASH} tag. + +@item +The @code{INITFIRST} flag (undeer @code{FLAGS_1}) should not be used. + +@item +The program header must not have @code{LOAD} segments that are writable +and executable at the same time. + +@item +All produced objects should have a @code{GNU_STACK} program header that +is not marked as executable. (However, on some newer targets, a +non-executable stack is the default, so the @code{GNU_STACK} program +header is not required.) +@end itemize + +@subsection Run-time Considerations + +In addition to preparing program binaries in a recommended fashion, the +run-time environment should be set up in such a way that problematic +dynamic linker features are not used. + +@itemize @bullet +@item +Install shared objects using their sonames in a default search path +directory (usually @file{/usr/lib64}). Do not use symbolic links. +@c This is currently not standard practice. + +@item +The default search path must not contain objects with duplicate file +names or sonames. + +@item +Do not use environment variables (@code{LD_@dots{}} variables such as +@code{LD_PRELOAD} or @code{LD_LIBRARY_PATH}, or @code{GLIBC_TUNABLES}) +to change default dynamic linker behavior. + +@item +Do not install shared objects in non-default locations. (Such locations +are listed explicitly in the configuration file for @command{ldconfig}, +usually @file{/etc/ld.so.conf}, or in files included from there.) + +@item +In relation to the previous item, do not install any objects it +@code{glibc-hwcaps} subdirectories. + +@item +Do not configure dynamically-loaded NSS service modules, to avoid +accidental internal use of the @code{dlopen} facility. The @code{files} +and @code{dns} modules are built in and do not rely on @code{dlopen}. + +@item +Do not truncate and overwrite files containing programs and shared +objects in place, while they are used. Instead, write the new version +to a different path and use @code{rename} to replace the +already-installed version. + +@item +Be aware that during a component update procedure that involves multiple +object files (shared objects and main programs), concurrently starting +processes may observe an inconsistent combination of object files (some +already updated, some still at the previous version). For example, +this can happen during an update of @theglibc{} itself. +@end itemize @c FIXME these are undocumented: @c dladdr diff --git a/manual/install.texi b/manual/install.texi index 6504d02c62..a7847b02c0 100644 --- a/manual/install.texi +++ b/manual/install.texi @@ -252,6 +252,22 @@ configure with @option{--disable-werror}. By default for x86_64, @theglibc{} is built with the vector math library. Use this option to disable the vector math library. +@item --disable-static-c++-tests +By default, if the C++ toolchain lacks support for static linking, +configure fails to find the C++ header files and the glibc build fails. +@option{--disable-static-c++-link-check} allows the glibc build to finish, +but static C++ tests will fail if the C++ toolchain doesn't have the +necessary static C++ libraries. Use this option to skip the static C++ +tests. This option implies @option{--disable-static-c++-link-check}. + +@item --disable-static-c++-link-check +By default, if the C++ toolchain lacks support for static linking, +configure fails to find the C++ header files and the glibc build fails. +Use this option to disable the static C++ link check so that the C++ +header files can be located. The newly built libc.a can be used to +create static C++ tests if the C++ toolchain has the necessary static +C++ libraries. + @item --disable-scv Disable using @code{scv} instruction for syscalls. All syscalls will use @code{sc} instead, even if the kernel supports @code{scv}. PowerPC only. diff --git a/manual/llio.texi b/manual/llio.texi index fe1807a849..78c7c79913 100644 --- a/manual/llio.texi +++ b/manual/llio.texi @@ -1573,10 +1573,15 @@ permitted. They include @code{PROT_READ}, @code{PROT_WRITE}, and of address space for future use. The @code{mprotect} function can be used to change the protection flags. @xref{Memory Protection}. -@var{flags} contains flags that control the nature of the map. -One of @code{MAP_SHARED} or @code{MAP_PRIVATE} must be specified. +The @var{flags} parameter contains flags that control the nature of +the map. One of @code{MAP_SHARED}, @code{MAP_SHARED_VALIDATE}, or +@code{MAP_PRIVATE} must be specified. Additional flags may be bitwise +OR'd to further define the mapping. -They include: +Note that, aside from @code{MAP_PRIVATE} and @code{MAP_SHARED}, not +all flags are supported on all versions of all operating systems. +Consult the kernel-specific documentation for details. The flags +include: @vtable @code @item MAP_PRIVATE @@ -1598,9 +1603,19 @@ Note that actual writing may take place at any time. You need to use @code{msync}, described below, if it is important that other processes using conventional I/O get a consistent view of the file. +@item MAP_SHARED_VALIDATE +Similar to @code{MAP_SHARED} except that additional flags will be +validated by the kernel, and the call will fail if an unrecognized +flag is provided. With @code{MAP_SHARED} using a flag on a kernel +that doesn't support it causes the flag to be ignored. +@code{MAP_SHARED_VALIDATE} should be used when the behavior of all +flags is required. + @item MAP_FIXED This forces the system to use the exact mapping address specified in -@var{address} and fail if it can't. +@var{address} and fail if it can't. Note that if the new mapping +would overlap an existing mapping, the overlapping portion of the +existing map is unmapped. @c One of these is official - the other is obviously an obsolete synonym @c Which is which? @@ -1641,10 +1656,73 @@ The @code{MAP_HUGETLB} flag is specific to Linux. @c There is a mechanism to select different hugepage sizes; see @c include/uapi/asm-generic/hugetlb_encode.h in the kernel sources. -@c Linux has some other MAP_ options, which I have not discussed here. -@c MAP_DENYWRITE, MAP_EXECUTABLE and MAP_GROWSDOWN don't seem applicable to -@c user programs (and I don't understand the last two). MAP_LOCKED does -@c not appear to be implemented. +@item MAP_32BIT +Require addresses that can be accessed with a signed 32 bit pointer, +i.e., within the first 2 GiB. Ignored if MAP_FIXED is specified. + +@item MAP_DENYWRITE +@itemx MAP_EXECUTABLE +@itemx MAP_FILE + +Provided for compatibility. Ignored by the Linux kernel. + +@item MAP_FIXED_NOREPLACE +Similar to @code{MAP_FIXED} except the call will fail with +@code{EEXIST} if the new mapping would overwrite an existing mapping. +To test for support for this flag, specify MAP_FIXED_NOREPLACE without +MAP_FIXED, and (if the call was successful) check the actual address +returned. If it does not match the address passed, then this flag is +not supported. + +@item MAP_GROWSDOWN +This flag is used to make stacks, and is typically only needed inside +the program loader to set up the main stack for the running process. +The mapping is created according to the other flags, except an +additional page just prior to the mapping is marked as a ``guard +page''. If a write is attempted inside this guard page, that page is +mapped, the mapping is extended, and a new guard page is created. +Thus, the mapping continues to grow towards lower addresses until it +encounters some other mapping. + +Note that accessing memory beyond the guard page will not trigger this +feature. In gcc, use @code{-fstack-clash-protection} to ensure the +guard page is always touched. + +@item MAP_LOCKED +A hint that requests that mapped pages are locked in memory (i.e. not +paged out). Note that this is a request and not a requirement; use +@code{mlock} if locking is required. + +@item MAP_POPULATE +@itemx MAP_NONBLOCK +@code{MAP_POPULATE} is a hint that requests that the kernel read-ahead +a file-backed mapping, causing pages to be mapped before they're +needed. @code{MAP_NONBLOCK} is a hint that requests that the kernel +@emph{not} attempt such except for pages are already in memory. Note +that neither of these hints affects future paging activity, use +@code{mlock} if such needs to be controlled. + +@item MAP_NORESERVE +Asks the kernel to not reserve physical backing (i.e. space in a swap +device) for a mapping. This would be useful for, for example, a very +large but sparsely used mapping which need not be limited in total +length by available RAM, but with very few mapped pages. Note that +writes to such a mapping may cause a @code{SIGSEGV} if the system is +unable to map a page due to lack of resources. + +On Linux, this flag's behavior may be overwridden by +@file{/proc/sys/vm/overcommit_memory} as documented in the proc(5) man +page. + +@item MAP_STACK +Ensures that the resulting mapping is suitable for use as a program +stack. For example, the use of huge pages might be precluded. + +@item MAP_SYNC +This is a special flag for DAX devices, which tells the kernel to +write dirty metadata out whenever dirty data is written out. Unlike +most other flags, this one will fail unless @code{MAP_SHARED_VALIDATE} +is also given. @end vtable @@ -1655,6 +1733,24 @@ Possible errors include: @table @code +@item EACCES + +@var{filedes} was not open for the type of access specified in @var{protect}. + +@item EAGAIN + +The system has temporarily run out of resources. + +@item EBADF + +The @var{fd} passed is invalid, and a valid file descriptor is +required (i.e. MAP_ANONYMOUS was not specified). + +@item EEXIST + +@code{MAP_FIXED_NOREPLACE} was specified and an existing mapping was +found overlapping the requested address range. + @item EINVAL Either @var{address} was unusable (because it is not a multiple of the @@ -1663,23 +1759,37 @@ applicable page size), or inconsistent @var{flags} were given. If @code{MAP_HUGETLB} was specified, the file or system does not support large page sizes. -@item EACCES +@item ENODEV -@var{filedes} was not open for the type of access specified in @var{protect}. +This file is of a type that doesn't support mapping, the process has +exceeded its data space limit, or the map request would exceed the +process's virtual address space. @item ENOMEM -Either there is not enough memory for the operation, or the process is -out of address space. - -@item ENODEV - -This file is of a type that doesn't support mapping. +There is not enough memory for the operation, the process is out of +address space, or there are too many mappings. On Linux, the maximum +number of mappings can be controlled via +@file{/proc/sys/vm/max_map_count} or, if your OS supports it, via +the @code{vm.max_map_count} @code{sysctl} setting. @item ENOEXEC The file is on a filesystem that doesn't support mapping. +@item EPERM + +@code{PROT_EXEC} was requested but the file is on a filesystem that +was mounted with execution denied, a file seal prevented the mapping, +or the caller set MAP_HUDETLB but does not have the required +priviledges. + +@item EOVERFLOW + +Either the offset into the file plus the length of the mapping causes +internal page counts to overflow, or the offset requested exceeds the +length of the file. + @c On Linux, EAGAIN will appear if the file has a conflicting mandatory lock. @c However mandatory locks are not discussed in this manual. @c diff --git a/misc/Makefile b/misc/Makefile index c273ec6974..5d17c562fe 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -214,12 +214,18 @@ routines_no_fortify += \ syslog \ # routines_no_fortify +ifeq ($(run-built-tests),yes) +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) generated += \ tst-allocate_once-mem.out \ tst-allocate_once.mtrace \ tst-error1-mem.out \ tst-error1.mtrace \ # generated +endif +endif +endif aux := init-misc install-lib := libg.a @@ -285,8 +291,14 @@ tests-internal += tst-fd_to_filename tests-static += tst-fd_to_filename ifeq ($(run-built-tests),yes) -tests-special += $(objpfx)tst-error1-mem.out \ - $(objpfx)tst-allocate_once-mem.out +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) +tests-special += \ + $(objpfx)tst-allocate_once-mem.out \ + $(objpfx)tst-error1-mem.out \ + # tests-special +endif +endif endif tests-container := \ diff --git a/nptl/Makefile b/nptl/Makefile index b3f8af2e1c..c4c27e0d23 100644 --- a/nptl/Makefile +++ b/nptl/Makefile @@ -545,6 +545,9 @@ tests-static += \ # tests-static tests += tst-cancel24-static +ifeq ($(static-cxx-tests),no) +tests-unsupported += tst-cancel24-static +endif tests-internal += \ tst-sem11-static \ @@ -556,10 +559,12 @@ xtests-static += tst-setuid1-static ifeq ($(run-built-tests),yes) tests-special += \ $(objpfx)tst-oddstacklimit.out \ - $(objpfx)tst-stack3-mem.out \ # tests-special ifeq ($(build-shared),yes) tests-special += $(objpfx)tst-tls6.out +ifneq ($(PERL),no) +tests-special += $(objpfx)tst-stack3-mem.out +endif endif endif @@ -617,10 +622,17 @@ tst-stack3-ENV = MALLOC_TRACE=$(objpfx)tst-stack3.mtrace \ $(objpfx)tst-stack3-mem.out: $(objpfx)tst-stack3.out $(common-objpfx)malloc/mtrace $(objpfx)tst-stack3.mtrace > $@; \ $(evaluate-test) + +ifeq ($(run-built-tests),yes) +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) generated += \ tst-stack3-mem.out \ tst-stack3.mtrace \ # generated +endif +endif +endif tst-stack4mod.sos=$(shell for i in 0 1 2 3 4 5 6 7 8 9 10 \ 11 12 13 14 15 16 17 18 19; do \ diff --git a/posix/Makefile b/posix/Makefile index a1e84853a8..2c598cd20a 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -418,6 +418,17 @@ generated += \ $(addprefix wordexp-test-result, 1 2 3 4 5 6 7 8 9 10) \ annexc \ annexc.out \ + getconf.speclist \ + ptestcases.h \ + testcases.h \ + tst-getconf.out \ + wordexp-tst.out \ + # generated + +ifeq ($(run-built-tests),yes) +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) +generated += \ bug-ga2-mem.out \ bug-ga2.mtrace \ bug-glob2-mem.out \ @@ -430,23 +441,22 @@ generated += \ bug-regex21.mtrace \ bug-regex31-mem.out \ bug-regex31.mtrace \ + bug-regex36-mem.out \ bug-regex36.mtrace \ - getconf.speclist \ - ptestcases.h \ - testcases.h \ tst-boost-mem.out \ tst-boost.mtrace \ tst-fnmatch-mem.out \ tst-fnmatch.mtrace \ - tst-getconf.out \ tst-pcre-mem.out \ tst-pcre.mtrace \ tst-rxspencer-no-utf8-mem.out \ tst-rxspencer-no-utf8.mtrace \ tst-vfork3-mem.out \ tst-vfork3.mtrace \ - wordexp-tst.out \ # generated +endif +endif +endif ifeq ($(run-built-tests),yes) ifeq (yes,$(build-shared)) @@ -461,6 +471,9 @@ endif # XXX Please note that for now we ignore the result of this test. tests-special += $(objpfx)annexc.out ifeq ($(run-built-tests),yes) +tests-special += $(objpfx)tst-getconf.out +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) tests-special += \ $(objpfx)bug-ga2-mem.out \ $(objpfx)bug-glob2-mem.out \ @@ -471,13 +484,14 @@ tests-special += \ $(objpfx)bug-regex36-mem.out \ $(objpfx)tst-boost-mem.out \ $(objpfx)tst-fnmatch-mem.out \ - $(objpfx)tst-getconf.out \ $(objpfx)tst-glob-tilde-mem.out \ $(objpfx)tst-pcre-mem.out \ $(objpfx)tst-rxspencer-no-utf8-mem.out \ $(objpfx)tst-vfork3-mem.out \ # tests-special endif +endif +endif include ../Rules diff --git a/signal/Makefile b/signal/Makefile index e8e3dce0cf..7cddbc3c65 100644 --- a/signal/Makefile +++ b/signal/Makefile @@ -46,11 +46,22 @@ routines := signal raise killpg \ allocrtsig sigtimedwait sigwaitinfo sigqueue \ sighold sigrelse sigignore sigset -tests := tst-signal tst-sigset tst-sigsimple tst-raise tst-sigset2 \ - tst-sigwait-eintr tst-sigaction \ - tst-minsigstksz-1 tst-minsigstksz-2 tst-minsigstksz-3 \ - tst-minsigstksz-3a tst-minsigstksz-4 tst-minsigstksz-5 \ - tst-sigisemptyset +tests := \ + tst-minsigstksz-1 \ + tst-minsigstksz-2 \ + tst-minsigstksz-3 \ + tst-minsigstksz-3a \ + tst-minsigstksz-4 \ + tst-minsigstksz-5 \ + tst-raise \ + tst-sigaction \ + tst-sigisemptyset \ + tst-signal \ + tst-sigset \ + tst-sigset2 \ + tst-sigsimple \ + tst-sigwait-eintr \ +# tests include ../Rules diff --git a/socket/Makefile b/socket/Makefile index fc1bd0a260..df732fa9b7 100644 --- a/socket/Makefile +++ b/socket/Makefile @@ -71,6 +71,7 @@ tests := \ tst-cmsg_cloexec \ tst-cmsghdr \ tst-connect \ + tst-shutdown \ tst-sockopt \ # tests diff --git a/socket/tst-shutdown.c b/socket/tst-shutdown.c new file mode 100644 index 0000000000..a305e5e494 --- /dev/null +++ b/socket/tst-shutdown.c @@ -0,0 +1,257 @@ +/* Test the shutdown function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <arpa/inet.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <support/check.h> +#include <support/support.h> +#include <support/xsocket.h> +#include <support/xunistd.h> +#include <sys/socket.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> + +struct connection +{ + int sockets[2]; +}; + +void +establish_connection (struct connection *conn) +{ + if (socketpair (AF_UNIX, SOCK_STREAM, 0, conn->sockets) != 0) + { + FAIL_EXIT1 ("socketpair (AF_UNIX, SOCK_STREAM, 0): %m\n"); + } +} + +void +close_connection (struct connection *conn) +{ + xclose (conn->sockets[0]); + xclose (conn->sockets[1]); +} + +/* Open a file and check that shutdown fails with the ENOTSOCK error code. */ +void +do_test_enotsock (void) +{ + /* Open file and check that shutdown will fail with ENOTSOCK. */ + int fd = xopen ("/dev/null", O_RDWR, 0); + + int result = shutdown (fd, SHUT_RD); + if (result == 0 || errno != ENOTSOCK) + { + FAIL_EXIT1 ("shutdown should fail with ENOTSOCK"); + } + xclose (fd); +} + +/* Test shutdown with SHUT_RD. */ +void +do_test_shut_rd (void) +{ + struct connection conn; + const char *str = "AAAAAAA"; + int len = 8; + int ret; + void *s_buf = xmalloc (len); + bzero (s_buf, len); + + establish_connection (&conn); + int server = conn.sockets[0]; + int client = conn.sockets[1]; + + /* Call shutdown with SHUT_RD on server socket. */ + if (shutdown (server, SHUT_RD) != 0) + { + FAIL_EXIT1 ("shutdown with SHUT_RD on socket %d failed", server); + } + + ret = send (server, str, len, 0); + if (ret <= 0) + { + FAIL_EXIT1 ("send (%d, data, %d): %m", server, len); + } + + ret = recv (client, s_buf, len, 0); + if (ret <= 0) + { + FAIL_EXIT1 ("recv (%d, data, %d): %m", client, len); + } + + TEST_COMPARE_BLOB (str, len, s_buf, len); + + /* Send data should be disallowed on shutdown socket. */ + errno = 0; + ret = send (client, str, len, MSG_NOSIGNAL); + if (ret >= 0 || errno != EPIPE) + { + FAIL_EXIT1 ("Send on SHUT_RD socket should be disallowed: %m"); + } + + /* Recv should return zero and no error. */ + errno = 0; + ret = recv (server, s_buf, len, 0); + if (ret != 0 || errno != 0) + { + FAIL_EXIT1 ("recv should return 0 without error: %m"); + } + + close_connection (&conn); +} + +/* Test shutdown with SHUT_WR. */ +void +do_test_shut_wr (void) +{ + struct connection conn; + const char *str1 = "CCCCCCC"; + const char *str2 = "DDDDDDD"; + const char *str3 = "EEEEEEE"; + int len = 8; + int ret; + void *c_buf = xmalloc (len); + void *s_buf = xmalloc (len); + + establish_connection (&conn); + int server = conn.sockets[0]; + int client = conn.sockets[1]; + + xwrite (client, str1, len); + + if (shutdown (client, SHUT_WR) != 0) + { + FAIL_EXIT1 ("shutdown with SHUT_WR on socket %d failed", client); + } + + ret = send (client, str2, len, MSG_NOSIGNAL); + if (ret >= 0) + { + FAIL_EXIT1 ("send on SHUT_WR socket should fail"); + } + + /* Read data written before shutdown and check if it's correct. */ + xread (server, s_buf, len); + TEST_COMPARE_BLOB (str1, len, s_buf, len); + + /* Second read should return zero without error. */ + errno = 0; + if (read (server, s_buf, len) != 0 || errno != 0) + { + FAIL_EXIT1 ("read after shutdown should return zero without error: %m"); + } + + /* Write some data to socket and check it still can be read on other side. */ + memcpy (s_buf, str3, len); + xwrite (server, s_buf, len); + + xread (client, c_buf, len); + TEST_COMPARE_BLOB (s_buf, len, c_buf, len); + + close_connection (&conn); +} + +/* Test shutdown with SHUT_RDWR. */ +void +do_test_shut_rdwr (void) +{ + struct connection conn; + struct sockaddr peer; + socklen_t peer_len = sizeof (peer); + + const char *str1 = "FFFFFFF"; + const char *str2 = "GGGGGGG"; + int len = 8; + int ret; + void *s_buf = xmalloc (len); + bzero (s_buf, len); + + establish_connection (&conn); + int server = conn.sockets[0]; + int client = conn.sockets[1]; + + /* Send some data to both sockets before shutdown. */ + xwrite (client, str1, len); + xwrite (server, str2, len); + + /* Call shutdown with SHUT_RDWR on client socket. */ + if (shutdown (client, SHUT_RDWR) != 0) + { + FAIL_EXIT1 ("shutdown with SHUT_RDWR on socket %d failed", client); + } + + /* Verify that socket is still connected. */ + xgetsockname (client, &peer, &peer_len); + + /* Read data written before shutdown. */ + xread (client, s_buf, len); + TEST_COMPARE_BLOB (s_buf, len, str2, len); + + /* Second read should return zero, but no error. */ + errno = 0; + if (read (client, s_buf, len) != 0 || errno != 0) + { + FAIL_EXIT1 ("read after shutdown should return zero without error: %m"); + } + + /* Send some data to shutdown socket and expect error. */ + errno = 0; + ret = send (server, str2, len, MSG_NOSIGNAL); + if (ret >= 0 || errno != EPIPE) + { + FAIL_EXIT1 ("send to RDWR shutdown socket should fail with EPIPE"); + } + + /* Read data written before shutdown. */ + xread (server, s_buf, len); + TEST_COMPARE_BLOB (s_buf, len, str1, len); + + /* Second read should return zero, but no error. */ + errno = 0; + if (read (server, s_buf, len) != 0 || errno != 0) + { + FAIL_EXIT1 ("read after shutdown should return zero without error: %m"); + } + + /* Send some data to shutdown socket and expect error. */ + errno = 0; + ret = send (client, str1, len, MSG_NOSIGNAL); + if (ret >= 0 || errno != EPIPE) + { + FAIL_EXIT1 ("send to RDWR shutdown socket should fail with EPIPE"); + } + + close_connection (&conn); +} + +static int +do_test (void) +{ + do_test_enotsock (); + do_test_shut_rd (); + do_test_shut_wr (); + do_test_shut_rdwr (); + + return 0; +} + +#include <support/test-driver.c> diff --git a/stdio-common/Makefile b/stdio-common/Makefile index 6bc972af1a..a63c05a120 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -229,10 +229,6 @@ tests := \ tst-popen \ tst-popen2 \ tst-printf-binary \ - tst-printf-bz18872 \ - tst-printf-bz25691 \ - tst-printf-fp-free \ - tst-printf-fp-leak \ tst-printf-intn \ tst-printf-oct \ tst-printf-round \ @@ -261,7 +257,6 @@ tests := \ tst-vfprintf-mbs-prec \ tst-vfprintf-user-type \ tst-vfprintf-width-i18n \ - tst-vfprintf-width-prec \ tst-vfprintf-width-prec-alloc \ tst-wc-printf \ tstdiomisc \ @@ -270,6 +265,20 @@ tests := \ xbug \ # tests +ifeq ($(run-built-tests),yes) +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) +tests += \ + tst-printf-bz18872 \ + tst-printf-bz25691 \ + tst-printf-fp-free \ + tst-printf-fp-leak \ + tst-vfprintf-width-prec \ + # tests +endif +endif +endif + tests-container += \ tst-popen3 # tests-container @@ -293,14 +302,19 @@ test-srcs = \ ifeq ($(run-built-tests),yes) tests-special += \ - $(objpfx)tst-printf-bz18872-mem.out \ - $(objpfx)tst-printf-bz25691-mem.out \ - $(objpfx)tst-printf-fp-free-mem.out \ - $(objpfx)tst-printf-fp-leak-mem.out \ $(objpfx)tst-printf.out \ $(objpfx)tst-printfsz-islongdouble.out \ $(objpfx)tst-setvbuf1-cmp.out \ $(objpfx)tst-unbputc.out \ + # tests-special + +ifeq (yes,$(build-shared)) +ifneq ($(PERL),no) +tests-special += \ + $(objpfx)tst-printf-bz18872-mem.out \ + $(objpfx)tst-printf-bz25691-mem.out \ + $(objpfx)tst-printf-fp-free-mem.out \ + $(objpfx)tst-printf-fp-leak-mem.out \ $(objpfx)tst-vfprintf-width-prec-mem.out \ # tests-special @@ -317,6 +331,8 @@ generated += \ tst-vfprintf-width-prec-mem.out \ tst-vfprintf-width-prec.mtrace \ # generated +endif +endif endif # $(run-built-tests) tests-special += $(objpfx)tst-errno-manual.out diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index ef5ea9ab8c..3e251cc234 100644 --- a/sysdeps/aarch64/multiarch/Makefile +++ b/sysdeps/aarch64/multiarch/Makefile @@ -15,6 +15,7 @@ sysdep_routines += \ memset_generic \ memset_kunpeng \ memset_mops \ + memset_oryon1 \ memset_zva64 \ strlen_asimd \ strlen_generic \ diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index 65c56b9b41..b2fda541f9 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -56,6 +56,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) IFUNC_IMPL (i, name, memset, IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva64) + IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_oryon1) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng) #if HAVE_AARCH64_SVE_ASM diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c index 34bce045dd..bd063c16c9 100644 --- a/sysdeps/aarch64/multiarch/memset.c +++ b/sysdeps/aarch64/multiarch/memset.c @@ -1,5 +1,6 @@ /* Multiple versions of memset. AARCH64 version. Copyright (C) 2017-2024 Free Software Foundation, Inc. + Copyright The GNU Toolchain Authors. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -34,6 +35,7 @@ extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden; extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden; extern __typeof (__redirect_memset) __memset_generic attribute_hidden; extern __typeof (__redirect_memset) __memset_mops attribute_hidden; +extern __typeof (__redirect_memset) __memset_oryon1 attribute_hidden; static inline __typeof (__redirect_memset) * select_memset_ifunc (void) @@ -49,6 +51,9 @@ select_memset_ifunc (void) return __memset_a64fx; } + if (IS_ORYON1 (midr) && zva_size == 64) + return __memset_oryon1; + if (IS_KUNPENG920 (midr)) return __memset_kunpeng; diff --git a/sysdeps/aarch64/multiarch/memset_oryon1.S b/sysdeps/aarch64/multiarch/memset_oryon1.S new file mode 100644 index 0000000000..b43a43b54e --- /dev/null +++ b/sysdeps/aarch64/multiarch/memset_oryon1.S @@ -0,0 +1,169 @@ +/* Optimized memset for Qualcomm's oyron-1 core. + Copyright (C) 2018-2024 Free Software Foundation, Inc. + Copyright The GNU Toolchain Authors. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include "memset-reg.h" + +/* Assumptions: + ARMv8-a, AArch64, unaligned accesses + */ + +ENTRY (__memset_oryon1) + + PTR_ARG (0) + SIZE_ARG (2) + + bfi valw, valw, 8, 8 + bfi valw, valw, 16, 16 + bfi val, val, 32, 32 + + add dstend, dstin, count + + cmp count, 96 + b.hi L(set_long) + cmp count, 16 + b.hs L(set_medium) + + /* Set 0..15 bytes. */ + tbz count, 3, 1f + str val, [dstin] + str val, [dstend, -8] + ret + + .p2align 3 +1: tbz count, 2, 2f + str valw, [dstin] + str valw, [dstend, -4] + ret +2: cbz count, 3f + strb valw, [dstin] + tbz count, 1, 3f + strh valw, [dstend, -2] +3: ret + + .p2align 3 + /* Set 16..96 bytes. */ +L(set_medium): + stp val, val, [dstin] + tbnz count, 6, L(set96) + stp val, val, [dstend, -16] + tbz count, 5, 1f + stp val, val, [dstin, 16] + stp val, val, [dstend, -32] +1: ret + + .p2align 6 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +L(set96): + stp val, val, [dstin, 16] + stp val, val, [dstin, 32] + stp val, val, [dstin, 48] + stp val, val, [dstend, -32] + stp val, val, [dstend, -16] + ret + + .p2align 6 +L(set_long): + stp val, val, [dstin] + bic dst, dstin, 15 + cmp count, 256 + ccmp valw, 0, 0, cs + b.eq L(try_zva) + cmp count, #32768 + b.hi L(set_long_with_nontemp) + /* Small-size or non-zero memset does not use DC ZVA. */ + sub count, dstend, dst + + /* Adjust count and bias for loop. By subtracting extra 1 from count, + it is easy to use tbz instruction to check whether loop tailing + count is less than 33 bytes, so as to bypass 2 unnecessary stps. */ + sub count, count, 64+16+1 + +1: stp val, val, [dst, 16] + stp val, val, [dst, 32] + stp val, val, [dst, 48] + stp val, val, [dst, 64]! + subs count, count, 64 + b.hs 1b + + tbz count, 5, 1f /* Remaining count is less than 33 bytes? */ + stp val, val, [dst, 16] + stp val, val, [dst, 32] +1: stp val, val, [dstend, -32] + stp val, val, [dstend, -16] + ret + +L(set_long_with_nontemp): + /* Small-size or non-zero memset does not use DC ZVA. */ + sub count, dstend, dst + + /* Adjust count and bias for loop. By subtracting extra 1 from count, + it is easy to use tbz instruction to check whether loop tailing + count is less than 33 bytes, so as to bypass 2 unnecessary stps. */ + sub count, count, 64+16+1 + +1: stnp val, val, [dst, 16] + stnp val, val, [dst, 32] + stnp val, val, [dst, 48] + stnp val, val, [dst, 64] + add dst, dst, #64 + subs count, count, 64 + b.hs 1b + + tbz count, 5, 1f /* Remaining count is less than 33 bytes? */ + stnp val, val, [dst, 16] + stnp val, val, [dst, 32] +1: stnp val, val, [dstend, -32] + stnp val, val, [dstend, -16] + ret + +L(try_zva): + /* Write the first and last 64 byte aligned block using stp rather + than using DC ZVA as it is faster. */ + .p2align 6 +L(zva_64): + stp val, val, [dst, 16] + stp val, val, [dst, 32] + stp val, val, [dst, 48] + bic dst, dst, 63 + stp val, val, [dst, 64] + stp val, val, [dst, 64+16] + stp val, val, [dst, 96] + stp val, val, [dst, 96+16] + sub count, dstend, dst /* Count is now 128 too large. */ + sub count, count, 128+64+64 /* Adjust count and bias for loop. */ + add dst, dst, 128 +1: dc zva, dst + add dst, dst, 64 + subs count, count, 64 + b.hi 1b + stp val, val, [dst, 0] + stp val, val, [dst, 16] + stp val, val, [dst, 32] + stp val, val, [dst, 48] + + stp val, val, [dstend, -64] + stp val, val, [dstend, -64+16] + stp val, val, [dstend, -32] + stp val, val, [dstend, -16] + ret + +END (__memset_oryon1) diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 50f58a60e3..656e8a3fa0 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -1256,6 +1256,20 @@ extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid, size_t gen) attribute_hidden; +/* The last TLS module ID that is initially loaded, plus 1. TLS + addresses for modules with IDs lower than that can be obtained from + the DTV even if its generation is outdated. */ +extern size_t _dl_tls_initial_modid_limit attribute_hidden attribute_relro; + +/* Compute _dl_tls_initial_modid_limit. To be called after initial + relocation. */ +void _dl_tls_initial_modid_limit_setup (void) attribute_hidden; + +/* Number of threads currently in a TLS update. This is used to + detect reentrant __tls_get_addr calls without a per-thread + flag. */ +extern unsigned int _dl_tls_threads_in_update attribute_hidden; + /* Look up the module's TLS block as for __tls_get_addr, but never touch anything. Return null if it's not allocated yet. */ extern void *_dl_tls_get_addr_soft (struct link_map *l) attribute_hidden; diff --git a/sysdeps/mips/fpu/math-use-builtins-fma.h b/sysdeps/mips/fpu/math-use-builtins-fma.h new file mode 100644 index 0000000000..57108f968e --- /dev/null +++ b/sysdeps/mips/fpu/math-use-builtins-fma.h @@ -0,0 +1,36 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* MIPSr6 has MADDF.s/MADDF.d instructions, which are fused. In MIPS + ISA, double support can be subsetted. Only FMAF is enabled for this + case. */ + +#include <sysdep.h> + +#if __mips_isa_rev >= 6 +# ifdef __mips_single_float +# define USE_FMA_BUILTIN 0 +# else +# define USE_FMA_BUILTIN 1 +# endif +# define USE_FMAF_BUILTIN 1 +#else +# define USE_FMA_BUILTIN 0 +# define USE_FMAF_BUILTIN 0 +#endif +#define USE_FMAL_BUILTIN 0 +#define USE_FMAF128_BUILTIN 0 diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c index 092c274f36..7eb35fb133 100644 --- a/sysdeps/nptl/dl-tls_init_tp.c +++ b/sysdeps/nptl/dl-tls_init_tp.c @@ -45,8 +45,10 @@ rtld_mutex_dummy (pthread_mutex_t *lock) #endif const unsigned int __rseq_flags; -const unsigned int __rseq_size attribute_relro; -const ptrdiff_t __rseq_offset attribute_relro; + +/* The variables are in .data.relro but are not yet write-protected. */ +extern unsigned int _rseq_size attribute_hidden; +extern ptrdiff_t _rseq_offset attribute_hidden; void __tls_pre_init_tp (void) @@ -105,10 +107,7 @@ __tls_init_tp (void) do_rseq = TUNABLE_GET (rseq, int, NULL); if (rseq_register_current_thread (pd, do_rseq)) { - /* We need a writable view of the variables. They are in - .data.relro and are not yet write-protected. */ - extern unsigned int size __asm__ ("__rseq_size"); - size = sizeof (pd->rseq_area); + _rseq_size = sizeof (pd->rseq_area); } #ifdef RSEQ_SIG @@ -117,8 +116,7 @@ __tls_init_tp (void) all targets support __thread_pointer, so set __rseq_offset only if the rseq registration may have happened because RSEQ_SIG is defined. */ - extern ptrdiff_t offset __asm__ ("__rseq_offset"); - offset = (char *) &pd->rseq_area - (char *) __thread_pointer (); + _rseq_offset = (char *) &pd->rseq_area - (char *) __thread_pointer (); #endif } diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps index d8cff3e077..9ad64d1d85 100644 --- a/sysdeps/riscv/nofpu/libm-test-ulps +++ b/sysdeps/riscv/nofpu/libm-test-ulps @@ -972,6 +972,11 @@ double: 2 float: 1 ldouble: 3 +Function: "exp10m1": +double: 2 +float: 1 +ldouble: 1 + Function: "exp2": double: 1 ldouble: 1 @@ -989,6 +994,11 @@ double: 1 float: 1 ldouble: 2 +Function: "exp2m1": +double: 1 +float: 1 +ldouble: 1 + Function: "exp_downward": double: 1 float: 1 @@ -1161,6 +1171,11 @@ double: 2 float: 2 ldouble: 1 +Function: "log10p1": +double: 1 +float: 1 +ldouble: 3 + Function: "log1p": double: 1 float: 1 @@ -1198,6 +1213,11 @@ Function: "log2_upward": double: 3 ldouble: 1 +Function: "log2p1": +double: 1 +float: 1 +ldouble: 3 + Function: "log_downward": ldouble: 1 diff --git a/sysdeps/unix/sysv/linux/hppa/sysdep.h b/sysdeps/unix/sysv/linux/hppa/sysdep.h index af62f7501e..e47975e5cf 100644 --- a/sysdeps/unix/sysv/linux/hppa/sysdep.h +++ b/sysdeps/unix/sysv/linux/hppa/sysdep.h @@ -473,11 +473,8 @@ L(pre_end): ASM_LINE_SEP \ #ifdef __LP64__ # define HAVE_CLOCK_GETTIME_VSYSCALL "__vdso_clock_gettime" -# define HAVE_GETTIMEOFDAY_VSYSCALL "__vdso_gettimeofday" #else -# define HAVE_CLOCK_GETTIME_VSYSCALL "__vdso_clock_gettime" # define HAVE_CLOCK_GETTIME64_VSYSCALL "__vdso_clock_gettime64" -# define HAVE_GETTIMEOFDAY_VSYSCALL "__vdso_gettimeofday" #endif /* __LP64__ */ #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 3d7c2819d7..e501e084ef 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -1023,39 +1023,59 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht model += extended_model; if (family == 0x6) - { - if (model == 0xf || model == 0x19) - { + { + /* Tuning for older Zhaoxin processors. */ + if (model == 0xf || model == 0x19) + { CPU_FEATURE_UNSET (cpu_features, AVX); CPU_FEATURE_UNSET (cpu_features, AVX2); - cpu_features->preferred[index_arch_Slow_SSE4_2] - |= bit_arch_Slow_SSE4_2; + cpu_features->preferred[index_arch_Slow_SSE4_2] + |= bit_arch_Slow_SSE4_2; + /* Unaligned AVX loads are slower. */ cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - &= ~bit_arch_AVX_Fast_Unaligned_Load; - } - } + &= ~bit_arch_AVX_Fast_Unaligned_Load; + } + } else if (family == 0x7) - { - if (model == 0x1b) + { + switch (model) { + /* Wudaokou microarch tuning. */ + case 0x1b: CPU_FEATURE_UNSET (cpu_features, AVX); CPU_FEATURE_UNSET (cpu_features, AVX2); cpu_features->preferred[index_arch_Slow_SSE4_2] - |= bit_arch_Slow_SSE4_2; + |= bit_arch_Slow_SSE4_2; cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - &= ~bit_arch_AVX_Fast_Unaligned_Load; - } - else if (model == 0x3b) - { + &= ~bit_arch_AVX_Fast_Unaligned_Load; + break; + + /* Lujiazui microarch tuning. */ + case 0x3b: CPU_FEATURE_UNSET (cpu_features, AVX); CPU_FEATURE_UNSET (cpu_features, AVX2); cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - &= ~bit_arch_AVX_Fast_Unaligned_Load; + &= ~bit_arch_AVX_Fast_Unaligned_Load; + break; + + /* Yongfeng and Shijidadao mircoarch tuning. */ + case 0x5b: + cpu_features->cachesize_non_temporal_divisor = 2; + case 0x6b: + cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] + &= ~bit_arch_AVX_Fast_Unaligned_Load; + + /* To use sse2_unaligned versions of memset, strcpy and strcat. + */ + cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER] + |= (bit_arch_Prefer_No_VZEROUPPER + | bit_arch_Fast_Unaligned_Load); + break; } } } diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index 3a6ec4ef9f..5e77345a6e 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -934,8 +934,10 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) /* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run a higher risk of actually thrashing the cache as they don't have a HW LRU hint. As well, their performance in highly parallel situations is - noticeably worse. */ - if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS)) + noticeably worse. Zhaoxin processors are an exception, the lowbound is not + suitable for them based on actual test data. */ + if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS) + && cpu_features->basic.kind != arch_kind_zhaoxin) non_temporal_threshold = non_temporal_threshold_lowbound; /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of 'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c index 869023bbba..b3c1e4fcd7 100644 --- a/sysdeps/x86_64/dl-tls.c +++ b/sysdeps/x86_64/dl-tls.c @@ -41,7 +41,10 @@ __tls_get_addr_slow (GET_ADDR_ARGS) dtv_t *dtv = THREAD_DTV (); size_t gen = atomic_load_acquire (&GL(dl_tls_generation)); - if (__glibc_unlikely (dtv[0].counter != gen)) + if (__glibc_unlikely (dtv[0].counter != gen) + /* See comment in __tls_get_addr in elf/dl-tls.c. */ + && !(_dl_tls_allocate_active () + && GET_ADDR_MODULE < _dl_tls_initial_modid_limit)) return update_get_addr (GET_ADDR_PARAM, gen); return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL); diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S index 048d015712..01008fd981 100644 --- a/sysdeps/x86_64/multiarch/memmove-ssse3.S +++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S @@ -151,13 +151,10 @@ L(more_2x_vec): loop. */ movups %xmm0, (%rdi) -# ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %RDX_LP -# else - cmp __x86_shared_cache_size_half(%rip), %rdx -# endif + cmp __x86_shared_non_temporal_threshold(%rip), %rdx ja L(large_memcpy) +L(loop_fwd): leaq -64(%rdi, %rdx), %r8 andq $-16, %rdi movl $48, %edx @@ -199,6 +196,13 @@ L(large_memcpy): movups -64(%r9, %rdx), %xmm10 movups -80(%r9, %rdx), %xmm11 + /* Check if src and dst overlap. If they do use cacheable + writes to potentially gain positive interference between + the loads during the memmove. */ + subq %rdi, %r9 + cmpq %rdx, %r9 + jb L(loop_fwd) + sall $5, %ecx leal (%rcx, %rcx, 2), %r8d leaq -96(%rdi, %rdx), %rcx |