37 files changed, 1938 insertions, 132 deletions
diff --git a/INSTALL b/INSTALL
index 208ec98d4b..4bd3d53676 100644
--- a/INSTALL
+++ b/INSTALL
@@ -224,6 +224,23 @@ if 'CFLAGS' is specified it must enable optimization.  For example:
      By default for x86_64, the GNU C Library is built with the vector
      math library.  Use this option to disable the vector math library.
 
+'--disable-static-c++-tests'
+     By default, if the C++ toolchain lacks support for static linking,
+     configure fails to find the C++ header files and the glibc build
+     fails.  '--disable-static-c++-link-check' allows the glibc build to
+     finish, but static C++ tests will fail if the C++ toolchain doesn't
+     have the necessary static C++ libraries.  Use this option to skip
+     the static C++ tests.  This option implies
+     '--disable-static-c++-link-check'.
+
+'--disable-static-c++-link-check'
+     By default, if the C++ toolchain lacks support for static linking,
+     configure fails to find the C++ header files and the glibc build
+     fails.  Use this option to disable the static C++ link check so
+     that the C++ header files can be located.  The newly built libc.a
+     can be used to create static C++ tests if the C++ toolchain has the
+     necessary static C++ libraries.
+
 '--disable-scv'
      Disable using 'scv' instruction for syscalls.  All syscalls will
      use 'sc' instead, even if the kernel supports 'scv'.  PowerPC only.
diff --git a/catgets/Makefile b/catgets/Makefile
index 24b4560d5f..40c65eac95 100644
--- a/catgets/Makefile
+++ b/catgets/Makefile
@@ -43,8 +43,12 @@ tests-special += \
   $(objpfx)test-gencat.out \
   $(objpfx)test1.cat \
   $(objpfx)test2.cat \
-  $(objpfx)tst-catgets-mem.out
   # tests-special
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests-special += $(objpfx)tst-catgets-mem.out
+endif
+endif
 endif
 
 gencat-modules	= xmalloc
@@ -68,9 +72,17 @@ generated += \
   test1.h \
   test2.cat \
   test2.h \
+  # generated
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+generated += \
   tst-catgets-mem.out \
   tst-catgets.mtrace \
   # generated
+endif
+endif
+endif
 
 generated-dirs += \
   de \
diff --git a/configure b/configure
index 1df2f2e6d1..1bae55b45b 100755
--- a/configure
+++ b/configure
@@ -771,6 +771,8 @@ ac_user_opts='
 enable_option_checking
 with_pkgversion
 with_bugurl
+enable_static_c___tests
+enable_static_c___link_check
 with_gd
 with_gd_include
 with_gd_lib
@@ -1440,6 +1442,10 @@ Optional Features:
   --disable-option-checking  ignore unrecognized --enable/--with options
   --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --disable-static-c++-tests
+                          disable static C++ tests[default=no]
+  --disable-static-c++-link-check
+                          disable static C++ link check [default=no]
   --disable-sanity-checks really do not use threads (should not be used except
                           in special situations) [default=yes]
   --enable-shared         build shared library [default=yes if GNU ld]
@@ -3855,6 +3861,29 @@ if test -z "$CPP"; then
 fi
 
 
+# This will get text that should go into config.make.
+config_vars=
+
+# Check whether --enable-static-c++-tests was given.
+if test ${enable_static_c___tests+y}
+then :
+  enableval=$enable_static_c___tests; static_cxx_tests=$enableval
+else $as_nop
+  static_cxx_tests=yes
+fi
+
+config_vars="$config_vars
+static-cxx-tests = $static_cxx_tests"
+
+# Check whether --enable-static-c++-link-check was given.
+if test ${enable_static_c___link_check+y}
+then :
+  enableval=$enable_static_c___link_check; static_cxx_link_check=$enableval
+else $as_nop
+  static_cxx_link_check=yes
+fi
+
+
 # We need the C++ compiler only for testing.
 
 
@@ -4279,10 +4308,11 @@ esac
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.beam \
     conftest$ac_exeext conftest.$ac_ext
-# Static case.
-old_LDFLAGS="$LDFLAGS"
-LDFLAGS="$LDFLAGS -static"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+if test $static_cxx_link_check$static_cxx_tests = yesyes; then
+  # Static case.
+  old_LDFLAGS="$LDFLAGS"
+  LDFLAGS="$LDFLAGS -static"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 #include <iostream>
@@ -4304,7 +4334,8 @@ esac
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.beam \
     conftest$ac_exeext conftest.$ac_ext
-LDFLAGS="$old_LDFLAGS"
+  LDFLAGS="$old_LDFLAGS"
+fi
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -4324,9 +4355,6 @@ if test "`cd $srcdir; pwd -P`" = "`pwd -P`"; then
   as_fn_error $? "you must configure in a separate build directory" "$LINENO" 5
 fi
 
-# This will get text that should go into config.make.
-config_vars=
-
 # Check for a --with-gd argument and set libgd-LDFLAGS in config.make.
 
 # Check whether --with-gd was given.
diff --git a/configure.ac b/configure.ac
index bdc385d03c..e48957f318 100644
--- a/configure.ac
+++ b/configure.ac
@@ -52,6 +52,22 @@ fi
 AC_SUBST(cross_compiling)
 AC_PROG_CPP
 
+# This will get text that should go into config.make.
+config_vars=
+
+AC_ARG_ENABLE([static-c++-tests],
+	      AS_HELP_STRING([--disable-static-c++-tests],
+			     [disable static C++ tests@<:@default=no@:>@]),
+	      [static_cxx_tests=$enableval],
+	      [static_cxx_tests=yes])
+LIBC_CONFIG_VAR([static-cxx-tests], [$static_cxx_tests])
+
+AC_ARG_ENABLE([static-c++-link-check],
+	      AS_HELP_STRING([--disable-static-c++-link-check],
+			     [disable static C++ link check @<:@default=no@:>@]),
+	      [static_cxx_link_check=$enableval],
+	      [static_cxx_link_check=yes])
+
 # We need the C++ compiler only for testing.
 AC_PROG_CXX
 # It's useless to us if it can't link programs (e.g. missing -lstdc++).
@@ -61,10 +77,11 @@ AC_LANG_PUSH([C++])
 AC_LINK_IFELSE([AC_LANG_PROGRAM([], [])],
 	       [libc_cv_cxx_link_ok=yes],
 	       [libc_cv_cxx_link_ok=no])
-# Static case.
-old_LDFLAGS="$LDFLAGS"
-LDFLAGS="$LDFLAGS -static"
-AC_LINK_IFELSE([AC_LANG_SOURCE([
+if test $static_cxx_link_check$static_cxx_tests = yesyes; then
+  # Static case.
+  old_LDFLAGS="$LDFLAGS"
+  LDFLAGS="$LDFLAGS -static"
+  AC_LINK_IFELSE([AC_LANG_SOURCE([
 #include <iostream>
 
 int
@@ -74,9 +91,10 @@ main()
   return 0;
 }
 ])],
-	       [],
-	       [libc_cv_cxx_link_ok=no])
-LDFLAGS="$old_LDFLAGS"
+		 [],
+		 [libc_cv_cxx_link_ok=no])
+  LDFLAGS="$old_LDFLAGS"
+fi
 AC_LANG_POP([C++])])
 AS_IF([test $libc_cv_cxx_link_ok != yes], [CXX=])
 
@@ -84,9 +102,6 @@ if test "`cd $srcdir; pwd -P`" = "`pwd -P`"; then
   AC_MSG_ERROR([you must configure in a separate build directory])
 fi
 
-# This will get text that should go into config.make.
-config_vars=
-
 # Check for a --with-gd argument and set libgd-LDFLAGS in config.make.
 AC_ARG_WITH([gd],
 	    AS_HELP_STRING([--with-gd=DIR],
diff --git a/elf/Makefile b/elf/Makefile
index bb6cd06dec..147f1d3437 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -73,6 +73,7 @@ dl-routines = \
   dl-origin \
   dl-printf \
   dl-reloc \
+  dl-rseq-symbols \
   dl-runtime \
   dl-scope \
   dl-setup_hash \
@@ -445,6 +446,7 @@ tests += \
   tst-p_align1 \
   tst-p_align2 \
   tst-p_align3 \
+  tst-recursive-tls \
   tst-relsort1 \
   tst-ro-dynamic \
   tst-rtld-run-static \
@@ -632,13 +634,19 @@ $(objpfx)tst-rtld-does-not-exist.out: tst-rtld-does-not-exist.sh $(objpfx)ld.so
 tests += $(tests-execstack-$(have-z-execstack))
 ifeq ($(run-built-tests),yes)
 tests-special += \
-  $(objpfx)noload-mem.out \
   $(objpfx)tst-ldconfig-X.out \
   $(objpfx)tst-ldconfig-p.out \
   $(objpfx)tst-ldconfig-soname.out \
-  $(objpfx)tst-leaks1-mem.out \
   $(objpfx)tst-rtld-help.out \
   # tests-special
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests-special += \
+  $(objpfx)noload-mem.out \
+  $(objpfx)tst-leaks1-mem.out \
+  # tests-special
+endif
+endif
 endif
 tlsmod17a-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 tlsmod18a-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
@@ -886,6 +894,23 @@ modules-names += \
   tst-null-argv-lib \
   tst-p_alignmod-base \
   tst-p_alignmod3 \
+  tst-recursive-tlsmallocmod \
+  tst-recursive-tlsmod0 \
+  tst-recursive-tlsmod1 \
+  tst-recursive-tlsmod2 \
+  tst-recursive-tlsmod3 \
+  tst-recursive-tlsmod4 \
+  tst-recursive-tlsmod5 \
+  tst-recursive-tlsmod6 \
+  tst-recursive-tlsmod7 \
+  tst-recursive-tlsmod8 \
+  tst-recursive-tlsmod9 \
+  tst-recursive-tlsmod10 \
+  tst-recursive-tlsmod11 \
+  tst-recursive-tlsmod12 \
+  tst-recursive-tlsmod13 \
+  tst-recursive-tlsmod14 \
+  tst-recursive-tlsmod15 \
   tst-relsort1mod1 \
   tst-relsort1mod2 \
   tst-ro-dynamic-mod \
@@ -1200,7 +1225,6 @@ tests-special += \
   $(objpfx)tst-trace3.out \
   $(objpfx)tst-trace4.out \
   $(objpfx)tst-trace5.out \
-  $(objpfx)tst-tunables-enable_secure-env.out \
   $(objpfx)tst-unused-dep-cmp.out \
   $(objpfx)tst-unused-dep.out \
   # tests-special
@@ -2228,13 +2252,7 @@ $(objpfx)tst-unused-dep-cmp.out: $(objpfx)tst-unused-dep.out
 	cmp $< /dev/null > $@; \
 	$(evaluate-test)
 
-$(objpfx)tst-tunables-enable_secure-env.out: $(objpfx)tst-tunables-enable_secure-env
-	$(test-wrapper-env) \
-	GLIBC_TUNABLES=glibc.rtld.enable_secure=1 \
-	$(rtld-prefix) \
-	  $< > $@; \
-	$(evaluate-test)
-
+tst-tunables-enable_secure-env-ARGS = -- $(host-test-program-cmd)
 
 $(objpfx)tst-audit11.out: $(objpfx)tst-auditmod11.so $(objpfx)tst-audit11mod1.so
 tst-audit11-ENV = LD_AUDIT=$(objpfx)tst-auditmod11.so
@@ -3093,3 +3111,11 @@ CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=$(have-mtls-descriptor)
 CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor)
 CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor)
 endif
+
+$(objpfx)tst-recursive-tls: $(objpfx)tst-recursive-tlsmallocmod.so
+# More objects than DTV_SURPLUS, to trigger DTV reallocation.
+$(objpfx)tst-recursive-tls.out: \
+  $(patsubst %,$(objpfx)tst-recursive-tlsmod%.so, \
+    0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+$(objpfx)tst-recursive-tlsmod%.os: tst-recursive-tlsmodN.c
+	$(compile-command.c) -DVAR=thread_$* -DFUNC=get_threadvar_$*
diff --git a/elf/dl-rseq-symbols.S b/elf/dl-rseq-symbols.S
new file mode 100644
index 0000000000..b4bba06a99
--- /dev/null
+++ b/elf/dl-rseq-symbols.S
@@ -0,0 +1,64 @@
+/* Define symbols used by rseq.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#if __WORDSIZE == 64
+#define RSEQ_OFFSET_SIZE	8
+#else
+#define RSEQ_OFFSET_SIZE	4
+#endif
+
+/* Some targets define a macro to denote the zero register.  */
+#undef zero
+
+/* Define 2 symbols: '__rseq_size' is public const and '_rseq_size' (an
+   alias of '__rseq_size') is hidden and writable for internal use by the
+   dynamic linker which will initialize the value both symbols point to
+   before copy relocations take place. */
+
+	.globl	__rseq_size
+	.type	__rseq_size, %object
+	.size	__rseq_size, 4
+	.hidden _rseq_size
+	.globl	_rseq_size
+	.type	_rseq_size, %object
+	.size	_rseq_size, 4
+	.section .data.rel.ro
+	.balign 4
+__rseq_size:
+_rseq_size:
+	.zero	4
+
+/* Define 2 symbols: '__rseq_offset' is public const and '_rseq_offset' (an
+   alias of '__rseq_offset') is hidden and writable for internal use by the
+   dynamic linker which will initialize the value both symbols point to
+   before copy relocations take place. */
+
+	.globl	__rseq_offset
+	.type	__rseq_offset, %object
+	.size	__rseq_offset, RSEQ_OFFSET_SIZE
+	.hidden _rseq_offset
+	.globl	_rseq_offset
+	.type	_rseq_offset, %object
+	.size	_rseq_offset, RSEQ_OFFSET_SIZE
+	.section .data.rel.ro
+	.balign RSEQ_OFFSET_SIZE
+__rseq_offset:
+_rseq_offset:
+	.zero	RSEQ_OFFSET_SIZE
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 670dbc42fc..3d221273f1 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -75,6 +75,31 @@
 /* Default for dl_tls_static_optional.  */
 #define OPTIONAL_TLS 512
 
+/* Used to count the number of threads currently executing dynamic TLS
+   updates.  Used to avoid recursive malloc calls in __tls_get_addr
+   for an interposed malloc that uses global-dynamic TLS (which is not
+   recommended); see _dl_tls_allocate_active checks.  This could be a
+   per-thread flag, but would need TLS access in the dynamic linker.  */
+unsigned int _dl_tls_threads_in_update;
+
+static inline void
+_dl_tls_allocate_begin (void)
+{
+  atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, 1);
+}
+
+static inline void
+_dl_tls_allocate_end (void)
+{
+  atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, -1);
+}
+
+static inline bool
+_dl_tls_allocate_active (void)
+{
+  return atomic_load_relaxed (&_dl_tls_threads_in_update) > 0;
+}
+
 /* Compute the static TLS surplus based on the namespace count and the
    TLS space that can be used for optimizations.  */
 static inline int
@@ -425,12 +450,18 @@ _dl_allocate_tls_storage (void)
   size += TLS_PRE_TCB_SIZE;
 #endif
 
-  /* Perform the allocation.  Reserve space for the required alignment
-     and the pointer to the original allocation.  */
+  /* Reserve space for the required alignment and the pointer to the
+     original allocation.  */
   size_t alignment = GLRO (dl_tls_static_align);
+
+  /* Perform the allocation.  */
+  _dl_tls_allocate_begin ();
   void *allocated = malloc (size + alignment + sizeof (void *));
   if (__glibc_unlikely (allocated == NULL))
-    return NULL;
+    {
+      _dl_tls_allocate_end ();
+      return NULL;
+    }
 
   /* Perform alignment and allocate the DTV.  */
 #if TLS_TCB_AT_TP
@@ -466,6 +497,8 @@ _dl_allocate_tls_storage (void)
   result = allocate_dtv (result);
   if (result == NULL)
     free (allocated);
+
+  _dl_tls_allocate_end ();
   return result;
 }
 
@@ -483,6 +516,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid)
   size_t newsize = max_modid + DTV_SURPLUS;
   size_t oldsize = dtv[-1].counter;
 
+  _dl_tls_allocate_begin ();
   if (dtv == GL(dl_initial_dtv))
     {
       /* This is the initial dtv that was either statically allocated in
@@ -502,6 +536,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid)
       if (newp == NULL)
 	oom ();
     }
+  _dl_tls_allocate_end ();
 
   newp[0].counter = newsize;
 
@@ -676,7 +711,9 @@ allocate_dtv_entry (size_t alignment, size_t size)
   if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
     {
       /* The alignment is supported by malloc.  */
+      _dl_tls_allocate_begin ();
       void *ptr = malloc (size);
+      _dl_tls_allocate_end ();
       return (struct dtv_pointer) { ptr, ptr };
     }
 
@@ -688,7 +725,10 @@ allocate_dtv_entry (size_t alignment, size_t size)
 
   /* Perform the allocation.  This is the pointer we need to free
      later.  */
+  _dl_tls_allocate_begin ();
   void *start = malloc (alloc_size);
+  _dl_tls_allocate_end ();
+
   if (start == NULL)
     return (struct dtv_pointer) {};
 
@@ -826,7 +866,11 @@ _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
 		 free implementation.  Checking here papers over at
 		 least some dynamic TLS usage by interposed mallocs.  */
 	      if (dtv[modid].pointer.to_free != NULL)
-		free (dtv[modid].pointer.to_free);
+		{
+		  _dl_tls_allocate_begin ();
+		  free (dtv[modid].pointer.to_free);
+		  _dl_tls_allocate_end ();
+		}
 	      dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
 	      dtv[modid].pointer.to_free = NULL;
 
@@ -956,10 +1000,22 @@ __tls_get_addr (GET_ADDR_ARGS)
   size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
   if (__glibc_unlikely (dtv[0].counter != gen))
     {
-      /* Update DTV up to the global generation, see CONCURRENCY NOTES
-         in _dl_update_slotinfo.  */
-      gen = atomic_load_acquire (&GL(dl_tls_generation));
-      return update_get_addr (GET_ADDR_PARAM, gen);
+      if (_dl_tls_allocate_active ()
+	  && GET_ADDR_MODULE < _dl_tls_initial_modid_limit)
+	  /* This is a reentrant __tls_get_addr call, but we can
+	     satisfy it because it's an initially-loaded module ID.
+	     These TLS slotinfo slots do not change, so the
+	     out-of-date generation counter does not matter.  However,
+	     if not in a TLS update, still update_get_addr below, to
+	     get off the slow path eventually.  */
+	;
+      else
+	{
+	  /* Update DTV up to the global generation, see CONCURRENCY NOTES
+	     in _dl_update_slotinfo.  */
+	  gen = atomic_load_acquire (&GL(dl_tls_generation));
+	  return update_get_addr (GET_ADDR_PARAM, gen);
+	}
     }
 
   void *p = dtv[GET_ADDR_MODULE].pointer.val;
@@ -969,7 +1025,7 @@ __tls_get_addr (GET_ADDR_ARGS)
 
   return (char *) p + GET_ADDR_OFFSET;
 }
-#endif
+#endif /* SHARED */
 
 
 /* Look up the module's TLS block as for __tls_get_addr,
@@ -1018,6 +1074,25 @@ _dl_tls_get_addr_soft (struct link_map *l)
   return data;
 }
 
+size_t _dl_tls_initial_modid_limit;
+
+void
+_dl_tls_initial_modid_limit_setup (void)
+{
+  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
+  size_t idx;
+  for (idx = 0; idx < listp->len; ++idx)
+    {
+      struct link_map *l = listp->slotinfo[idx].map;
+      if (l == NULL
+	  /* The object can be unloaded, so its modid can be
+	     reassociated.  */
+	  || !(l->l_type == lt_executable || l->l_type == lt_library))
+	break;
+    }
+  _dl_tls_initial_modid_limit = idx;
+}
+
 
 void
 _dl_add_to_slotinfo (struct link_map *l, bool do_add)
@@ -1050,9 +1125,11 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add)
 	 the first slot.  */
       assert (idx == 0);
 
+      _dl_tls_allocate_begin ();
       listp = (struct dtv_slotinfo_list *)
 	malloc (sizeof (struct dtv_slotinfo_list)
 		+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+      _dl_tls_allocate_end ();
       if (listp == NULL)
 	{
 	  /* We ran out of memory while resizing the dtv slotinfo list.  */
diff --git a/elf/rtld.c b/elf/rtld.c
index e9525ea987..bfdf632e77 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -788,6 +788,8 @@ init_tls (size_t naudit)
     _dl_fatal_printf ("\
 cannot allocate TLS data structures for initial thread\n");
 
+  _dl_tls_initial_modid_limit_setup ();
+
   /* Store for detection of the special case by __tls_get_addr
      so it knows not to pass this dtv to the normal realloc.  */
   GL(dl_initial_dtv) = GET_DTV (tcbp);
@@ -1325,7 +1327,7 @@ _dl_start_args_adjust (int skip_args, int skip_env)
 
   /* Shuffle auxv down. */
   ElfW(auxv_t) ax;
-  char *oldp = (char *) (p + 1);
+  char *oldp = (char *) (p + 1 + skip_env);
   char *newp = (char *) (sp + 1);
   do
     {
diff --git a/elf/tst-recursive-tls.c b/elf/tst-recursive-tls.c
new file mode 100644
index 0000000000..716d1f783a
--- /dev/null
+++ b/elf/tst-recursive-tls.c
@@ -0,0 +1,60 @@
+/* Test with interposed malloc with dynamic TLS.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <array_length.h>
+#include <stdio.h>
+#include <support/check.h>
+#include <support/xdlfcn.h>
+
+/* Defined in tst-recursive-tlsmallocmod.so.  */
+extern __thread unsigned int malloc_subsytem_counter;
+
+static int
+do_test (void)
+{
+  /* 16 is large enough to exercise the DTV resizing case.  */
+  void *handles[16];
+
+  for (unsigned int i = 0; i < array_length (handles); ++i)
+    {
+      /* Re-use the TLS slot for module 0.  */
+      if (i > 0)
+        xdlclose (handles[0]);
+
+      char soname[30];
+      snprintf (soname, sizeof (soname), "tst-recursive-tlsmod%u.so", i);
+      handles[i] = xdlopen (soname, RTLD_NOW);
+
+      if (i > 0)
+        {
+          handles[0] = xdlopen ("tst-recursive-tlsmod0.so", RTLD_NOW);
+          int (*fptr) (void) = xdlsym (handles[0], "get_threadvar_0");
+          /* May trigger TLS storage allocation using malloc.  */
+          TEST_COMPARE (fptr (), 0);
+        }
+    }
+
+  for (unsigned int i = 0; i < array_length (handles); ++i)
+    xdlclose (handles[i]);
+
+  printf ("info: malloc subsystem calls: %u\n", malloc_subsytem_counter);
+  TEST_VERIFY (malloc_subsytem_counter > 0);
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/elf/tst-recursive-tlsmallocmod.c b/elf/tst-recursive-tlsmallocmod.c
new file mode 100644
index 0000000000..c24e9945d1
--- /dev/null
+++ b/elf/tst-recursive-tlsmallocmod.c
@@ -0,0 +1,64 @@
+/* Interposed malloc with dynamic TLS.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <dlfcn.h>
+
+__thread unsigned int malloc_subsytem_counter;
+
+static __typeof (malloc) *malloc_fptr;
+static __typeof (free) *free_fptr;
+static __typeof (calloc) *calloc_fptr;
+static __typeof (realloc) *realloc_fptr;
+
+static void __attribute__ ((constructor))
+init (void)
+{
+  malloc_fptr = dlsym (RTLD_NEXT, "malloc");
+  free_fptr = dlsym (RTLD_NEXT, "free");
+  calloc_fptr = dlsym (RTLD_NEXT, "calloc");
+  realloc_fptr = dlsym (RTLD_NEXT, "realloc");
+}
+
+void *
+malloc (size_t size)
+{
+  ++malloc_subsytem_counter;
+  return malloc_fptr (size);
+}
+
+void
+free (void *ptr)
+{
+  ++malloc_subsytem_counter;
+  return free_fptr (ptr);
+}
+
+void *
+calloc (size_t a, size_t b)
+{
+  ++malloc_subsytem_counter;
+  return calloc_fptr (a, b);
+}
+
+void *
+realloc (void *ptr, size_t size)
+{
+  ++malloc_subsytem_counter;
+  return realloc_fptr (ptr, size);
+}
diff --git a/elf/tst-recursive-tlsmodN.c b/elf/tst-recursive-tlsmodN.c
new file mode 100644
index 0000000000..bb7592aee6
--- /dev/null
+++ b/elf/tst-recursive-tlsmodN.c
@@ -0,0 +1,28 @@
+/* Test module with global-dynamic TLS.  Used to trigger DTV reallocation.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Compiled with VAR and FUNC set via -D.  FUNC requires some
+   relocation against TLS variable VAR.  */
+
+__thread int VAR;
+
+int
+FUNC (void)
+{
+  return VAR;
+}
diff --git a/elf/tst-tunables-enable_secure-env.c b/elf/tst-tunables-enable_secure-env.c
index 24e846f299..01f121efc3 100644
--- a/elf/tst-tunables-enable_secure-env.c
+++ b/elf/tst-tunables-enable_secure-env.c
@@ -17,15 +17,136 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <array_length.h>
+#include <errno.h>
+#include <getopt.h>
+#include <intprops.h>
+#include <stdlib.h>
 #include <support/capture_subprocess.h>
 #include <support/check.h>
+#ifdef __linux__
+# define HAVE_AUXV 1
+# include <sys/auxv.h>
+#else
+# define HAVE_AUXV 0
+#endif
+
+/* Nonzero if the program gets called via `exec'.  */
+#define CMDLINE_OPTIONS \
+  { "restart", no_argument, &restart, 1 },
+static int restart;
+
+/* Hold the four initial argument used to respawn the process, plus the extra
+   '--direct', '--restart', auxiliary vector values, and final NULL.  */
+static char *spargs[11];
+
+#if HAVE_AUXV
+static void
+check_auxv (unsigned long type, char *argv)
+{
+  char *endptr;
+  errno = 0;
+  unsigned long int varg = strtol (argv, &endptr, 10);
+  TEST_VERIFY_EXIT (errno == 0);
+  TEST_VERIFY_EXIT (*endptr == '\0');
+  errno = 0;
+  unsigned long int v = getauxval (type);
+  TEST_COMPARE (errno, 0);
+  TEST_COMPARE (varg, v);
+}
+#endif
+
+/* Called on process re-execution.  */
+_Noreturn static void
+handle_restart (int argc, char *argv[])
+{
+  TEST_VERIFY (getenv ("GLIBC_TUNABLES") == NULL);
+  TEST_VERIFY (getenv ("LD_BIND_NOW") == NULL);
+
+#if HAVE_AUXV
+  TEST_VERIFY_EXIT (argc == 4);
+  check_auxv (AT_PHENT, argv[0]);
+  check_auxv (AT_PHNUM, argv[1]);
+  check_auxv (AT_PAGESZ, argv[2]);
+  check_auxv (AT_HWCAP, argv[3]);
+#endif
+
+  exit (EXIT_SUCCESS);
+}
 
 static int
 do_test (int argc, char *argv[])
 {
-  /* Ensure that no assertions are hit when a dynamically linked application
-     runs.  This test requires that GLIBC_TUNABLES=glibc.rtld.enable_secure=1
-     is set. */
+  /* We must have either:
+
+     - four parameter if called initially:
+       + path for ld.so            [optional]
+       + "--library-path"          [optional]
+       + the library path          [optional]
+       + the application name
+
+     - either parameters left if called through re-execution.
+       + auxiliary vector value 1
+       + auxiliary vector value 2
+       + auxiliary vector value 3
+       + auxiliary vector value 4
+  */
+  if (restart)
+    handle_restart (argc - 1, &argv[1]);
+
+  TEST_VERIFY_EXIT (argc == 2 || argc == 5);
+
+#if HAVE_AUXV
+  struct
+  {
+    unsigned long int type;
+    char str[INT_BUFSIZE_BOUND (unsigned long)];
+  } auxvals[] =
+  {
+    /* Check some auxiliary values that should be constant over process
+       re-execution.  */
+    { AT_PHENT },
+    { AT_PHNUM },
+    { AT_PAGESZ },
+    { AT_HWCAP },
+  };
+  for (int i = 0; i < array_length (auxvals); i++)
+  {
+    unsigned long int v = getauxval (auxvals[i].type);
+    snprintf (auxvals[i].str, sizeof auxvals[i].str, "%lu", v);
+  }
+#endif
+
+  {
+    int i;
+    for (i = 0; i < argc - 1; i++)
+      spargs[i] = argv[i + 1];
+    spargs[i++] = (char *) "--direct";
+    spargs[i++] = (char *) "--restart";
+#if HAVE_AUXV
+    for (int j = 0; j < array_length (auxvals); j++)
+      spargs[i++] = auxvals[j].str;
+#endif
+    spargs[i] = NULL;
+  }
+
+  {
+    char *envs[] =
+    {
+      /* Add some environment variable that should be filtered out.  */
+      (char *) "GLIBC_TUNABLES=glibc.rtld.enable_secure=1",
+      (char* ) "LD_BIND_NOW=0",
+      NULL,
+    };
+    struct support_capture_subprocess result
+      = support_capture_subprogram (spargs[0], spargs, envs);
+    support_capture_subprocess_check (&result,
+				      "tst-tunables-enable_secure-env",
+				      0,
+				      sc_allow_none);
+    support_capture_subprocess_free (&result);
+  }
+
   return 0;
 }
 
diff --git a/io/bits/fcntl2.h b/io/bits/fcntl2.h
index c8888b50c1..0cced392e7 100644
--- a/io/bits/fcntl2.h
+++ b/io/bits/fcntl2.h
@@ -61,13 +61,8 @@ open (const char *__path, int __oflag, ...)
   return __open_alias (__path, __oflag, __va_arg_pack ());
 }
 #elif __fortify_use_clang
-__fortify_function_error_function __attribute_overloadable__ int
-open (__fortify_clang_overload_arg (const char *, ,__path), int __oflag, ...)
-     __fortify_clang_unavailable ("open can be called either with 2 or 3 arguments, not more");
-
 __fortify_function __attribute_overloadable__ int
 open (__fortify_clang_overload_arg (const char *, ,__path), int __oflag)
-     __fortify_clang_prefer_this_overload
      __fortify_clang_error (__OPEN_NEEDS_MODE (__oflag),
 			    "open with O_CREAT or O_TMPFILE in second argument needs 3 arguments")
 {
diff --git a/libio/Makefile b/libio/Makefile
index f607edbefb..8720381fdc 100644
--- a/libio/Makefile
+++ b/libio/Makefile
@@ -261,15 +261,28 @@ tst-bz22415-ENV = MALLOC_TRACE=$(objpfx)tst-bz22415.mtrace \
 tst-bz24228-ENV = MALLOC_TRACE=$(objpfx)tst-bz24228.mtrace \
 		  LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
 
-generated += test-fmemopen.mtrace test-fmemopen.check
-generated += tst-fdopen-seek-failure.mtrace tst-fdopen-seek-failure.check
-generated += tst-fopenloc.mtrace tst-fopenloc.check
-generated += tst-bz22415.mtrace tst-bz22415.check
-
 aux	:= fileops genops stdfiles stdio strops
 
+ifeq ($(run-built-tests),yes)
+ifeq ($(build-shared),yes)
+ifneq ($(PERL),no)
+generated += \
+  test-fmemopen.check \
+  test-fmemopen.mtrace \
+  tst-bz22415.check \
+  tst-bz22415.mtrace \
+  tst-bz24228.check \
+  tst-bz24228.mtrace \
+  tst-fdopen-seek-failure.check \
+  tst-fdopen-seek-failure.mtrace \
+  tst-fopenloc.check \
+  tst-fopenloc.mtrace \
+  # generated
+endif
+endif
+endif
+
 ifeq ($(build-shared),yes)
-generated += tst-bz24228.mtrace tst-bz24228.check
 aux	+= oldfileops oldstdfiles
 tests += \
   tst-stderr-compat \
@@ -286,16 +299,23 @@ shared-only-routines = oldiofopen oldiofdopen oldiofclose oldfileops	\
 
 ifeq ($(run-built-tests),yes)
 tests-special += \
-  $(objpfx)test-fmemopen-mem.out \
   $(objpfx)test-freopen.out \
-  $(objpfx)tst-bz22415-mem.out \
-  $(objpfx)tst-fdopen-seek-failure-mem.out \
   # tests-special
 ifeq (yes,$(build-shared))
 # Run tst-fopenloc-cmp.out and tst-openloc-mem.out only if shared
 # library is enabled since they depend on tst-fopenloc.out.
-tests-special += $(objpfx)tst-fopenloc-cmp.out $(objpfx)tst-fopenloc-mem.out \
-		 $(objpfx)tst-bz24228-mem.out
+tests-special += $(objpfx)tst-fopenloc-cmp.out
+ifeq ($(build-shared),yes)
+ifneq ($(PERL),no)
+tests-special += \
+  $(objpfx)test-fmemopen-mem.out \
+  $(objpfx)tst-bz22415-mem.out \
+  $(objpfx)tst-bz24228-mem.out \
+  $(objpfx)tst-fdopen-seek-failure-mem.out \
+  $(objpfx)tst-fopenloc-mem.out \
+  # tests-special
+endif
+endif
 endif
 
 tests += \
diff --git a/manual/dynlink.texi b/manual/dynlink.texi
index d71f7a30d6..03565d4fb0 100644
--- a/manual/dynlink.texi
+++ b/manual/dynlink.texi
@@ -15,6 +15,7 @@ Dynamic linkers are sometimes called @dfn{dynamic loaders}.
 @menu
 * Dynamic Linker Invocation::   Explicit invocation of the dynamic linker.
 * Dynamic Linker Introspection::    Interfaces for querying mapping information.
+* Dynamic Linker Hardening::    Avoiding unexpected issues with dynamic linking.
 @end menu
 
 @node Dynamic Linker Invocation
@@ -535,6 +536,563 @@ information is processed.
 This function is a GNU extension.
 @end deftypefun
 
+@node Dynamic Linker Hardening
+@section Avoiding Unexpected Issues With Dynamic Linking
+
+This section details recommendations for increasing application
+robustness, by avoiding potential issues related to dynamic linking.
+The recommendations have two main aims: reduce the involvement of the
+dynamic linker in application execution after process startup, and
+restrict the application to a dynamic linker feature set whose behavior
+is more easily understood.
+
+Key aspects of limiting dynamic linker usage after startup are: no use
+of the @code{dlopen} function, disabling lazy binding, and using the
+static TLS model.  More easily understood dynamic linker behavior
+requires avoiding name conflicts (symbols and sonames) and highly
+customizable features like the audit subsystem.
+
+Note that while these steps can be considered a form of application
+hardening, they do not guard against potential harm from accidental or
+deliberate loading of untrusted or malicious code.  There is only
+limited overlap with traditional security hardening for applications
+running on GNU systems.
+
+@subsection Restricted Dynamic Linker Features
+
+Avoiding certain dynamic linker features can increase predictability of
+applications and reduce the risk of running into dynamic linker defects.
+
+@itemize @bullet
+@item
+Do not use the functions @code{dlopen}, @code{dlmopen}, or
+@code{dlclose}.  Dynamic loading and unloading of shared objects
+introduces substantial complications related to symbol and thread-local
+storage (TLS) management.
+
+@item
+Without the @code{dlopen} function, @code{dlsym} and @code{dlvsym}
+cannot be used with shared object handles.  Minimizing the use of both
+functions is recommended.  If they have to be used, only the
+@code{RTLD_DEFAULT} pseudo-handle should be used.
+
+@item
+Use the local-exec or initial-exec TLS models.  If @code{dlopen} is not
+used, there are no compatibility concerns for initial-exec TLS.  This
+TLS model avoids most of the complexity around TLS access.  In
+particular, there are no TLS-related run-time memory allocations after
+process or thread start.
+
+If shared objects are expected to be used more generally, outside the
+hardened, feature-restricted context, lack of compatibility between
+@code{dlopen} and initial-exec TLS could be a concern.  In that case,
+the second-best alternative is to use global-dynamic TLS with GNU2 TLS
+descriptors, for targets that fully implement them, including the fast
+path for access to TLS variables defined in the initially loaded set of
+objects.  Like initial-exec TLS, this avoids memory allocations after
+thread creation, but only if the @code{dlopen} function is not used.
+
+@item
+Do not use lazy binding.  Lazy binding may require run-time memory
+allocation, is not async-signal-safe, and introduces considerable
+complexity.
+
+@item
+Make dependencies on shared objects explicit.  Do not assume that
+certain libraries (such as @code{libc.so.6}) are always loaded.
+Specifically, if a main program or shared object references a symbol,
+create an ELF @code{DT_NEEDED} dependency on that shared object, or on
+another shared object that is documented (or otherwise guaranteed) to
+have the required explicit dependency.  Referencing a symbol without a
+matching link dependency results in underlinking, and underlinked
+objects cannot always be loaded correctly: Initialization of objects may
+not happen in the required order.
+
+@item
+Do not create dependency loops between shared objects (@code{libA.so.1}
+depending on @code{libB.so.1} depending on @code{libC.so.1} depending on
+@code{libA.so.1}).  @Theglibc{} has to initialize one of the objects in
+the cycle first, and the choice of that object is arbitrary and can
+change over time.  The object which is initialized first (and other
+objects involved in the cycle) may not run correctly because not all of
+its dependencies have been initialized.
+
+Underlinking (see above) can hide the presence of cycles.
+
+@item
+Limit the creation of indirect function (IFUNC) resolvers.  These
+resolvers run during relocation processing, when @theglibc{} is not in
+a fully consistent state.  If you write your own IFUNC resolvers, do
+not depend on external data or function references in those resolvers.
+
+@item
+Do not use the audit functionality (@code{LD_AUDIT}, @code{DT_AUDIT},
+@code{DT_DEPAUDIT}).  Its callback and hooking capabilities introduce a
+lot of complexity and subtly alter dynamic linker behavior in corner
+cases even if the audit module is inactive.
+
+@item
+Do not use symbol interposition.  Without symbol interposition, the
+exact order in which shared objects are searched are less relevant.
+
+Exceptions to this rule are copy relocations (see the next item), and
+vague linkage, as used by the C++ implementation (see below).
+
+@item
+One potential source of symbol interposition is a combination of static
+and dynamic linking, namely linking a static archive into multiple
+dynamic shared objects.  For such scenarios, the static library should
+be converted into its own dynamic shared object.
+
+A different approach to this situation uses hidden visibility for
+symbols in the static library, but this can cause problems if the
+library does not expect that multiple copies of its code coexist within
+the same process, with no or partial sharing of state.
+
+@item
+If you use shared objects that are linked with @option{-Wl,-Bsymbolic}
+(or equivalent) or use protected visibility, the code for the main
+program must be built as @option{-fpic} or @option{-fPIC} to avoid
+creating copy relocations (and the main program must not use copy
+relocations for other reasons).  Using @option{-fpie} or @option{-fPIE}
+is not an alternative to PIC code in this context.
+
+@item
+Be careful about explicit section annotations.  Make sure that the
+target section matches the properties of the declared entity (e.g., no
+writable objects in @code{.text}).
+
+@item
+Ensure that all assembler or object input files have the recommended
+security markup, particularly for non-executable stack.
+
+@item
+Avoid using non-default linker flags and features.  In particular, do
+not use the @code{DT_PREINIT_ARRAY} dynamic tag, and do not flag
+objects as @code{DF_1_INITFIRST}.  Do not change the default linker
+script of BFD ld.  Do not override ABI defaults, such as the dynamic
+linker path (with @option{--dynamic-linker}).
+
+@item
+Some features of @theglibc{} indirectly depend on run-time code loading
+and @code{dlopen}.  Use @code{iconv_open} with built-in converters only
+(such as @code{UTF-8}).  Do not use NSS functionality such as
+@code{getaddrinfo} or @code{getpwuid_r} unless the system is configured
+for built-in NSS service modules only (see below).
+@end itemize
+
+Several considerations apply to ELF constructors and destructors.
+
+@itemize @bullet
+@item
+The dynamic linker does not take constructor and destructor priorities
+into account when determining their execution order.  Priorities are
+only used by the link editor for ordering execution within a
+completely linked object.  If a dynamic shared object needs to be
+initialized before another object, this can be expressed with a
+@code{DT_NEEDED} dependency on the object that needs to be initialized
+earlier.
+
+@item
+The recommendations to avoid cyclic dependencies and symbol
+interposition make it less likely that ELF objects are accessed before
+their ELF constructors have run.  However, using @code{dlsym} and
+@code{dlvsym}, it is still possible to access uninitialized facilities
+even with these restrictions in place.  (Of course, access to
+uninitialized functionality is also possible within a single shared
+object or the main executable, without resorting to explicit symbol
+lookup.)  Consider using dynamic, on-demand initialization instead.  To
+deal with access after de-initialization, it may be necessary to
+implement special cases for that scenario, potentially with degraded
+functionality.
+
+@item
+Be aware that when ELF destructors are executed, it is possible to
+reference already-deconstructed shared objects.  This can happen even in
+the absence of @code{dlsym} and @code{dlvsym} function calls, for
+example if client code using a shared object has registered callbacks or
+objects with another shared object.  The ELF destructor for the client
+code is executed before the ELF destructor for the shared objects that
+it uses, based on the expected dependency order.
+
+@item
+If @code{dlopen} and @code{dlmopen} are not used, @code{DT_NEEDED}
+dependency information is complete, and lazy binding is disabled, the
+execution order of ELF destructors is expected to be the reverse of the
+ELF constructor order.  However, two separate dependency sort operations
+still occur.  Even though the listed preconditions should ensure that
+both sorts produce the same ordering, it is recommended not to depend on
+the destructor order being the reverse of the constructor order.
+@end itemize
+
+The following items provide C++-specific guidance for preparing
+applications.  If another programming language is used and it uses these
+toolchain features targeted at C++ to implement some language
+constructs, these restrictions and recommendations still apply in
+analogous ways.
+
+@itemize @bullet
+@item
+C++ inline functions, templates, and other constructs may need to be
+duplicated into multiple shared objects using vague linkage, resulting
+in symbol interposition.  This type of symbol interposition is
+unproblematic, as long as the C++ one definition rule (ODR) is followed,
+and all definitions in different translation units are equivalent
+according to the language C++ rules.
+
+@item
+Be aware that under C++ language rules, it is unspecified whether
+evaluating a string literal results in the same address for each
+evaluation.  This also applies to anonymous objects of static storage
+duration that GCC creates, for example to implement the compound
+literals C++ extension.  As a result, comparing pointers to such
+objects, or using them directly as hash table keys, may give unexpected
+results.
+
+By default, variables of block scope of static storage have consistent
+addresses across different translation units, even if defined in
+functions that use vague linkage.
+
+@item
+Special care is needed if a C++ project uses symbol visibility or
+symbol version management (for example, the GCC @samp{visibility}
+attribute, the GCC @option{-fvisibility} option, or a linker version
+script with the linker option @option{--version-script}).  It is
+necessary to ensure that the symbol management remains consistent with
+how the symbols are used.  Some C++ constructs are implemented with
+the help of ancillary symbols, which can make complicated to achieve
+consistency.  For example, an inline function that is always inlined
+into its callers has no symbol footprint for the function itself, but
+if the function contains a variable of static storage duration, this
+variable may result in the creation of one or more global symbols.
+For correctness, such symbols must be visible and bound to the same
+object in all other places where the inline function may be called.
+This requirement is not met if the symbol visibility is set to hidden,
+or if symbols are assigned a textually different symbol version
+(effectively creating two distinct symbols).
+
+Due to the complex interaction between ELF symbol management and C++
+symbol generation, it is recommended to use C++ language features for
+symbol management, in particular inline namespaces.
+
+@item
+The toolchain and dynamic linker have multiple mechanisms that bypass
+the usual symbol binding procedures.  This means that the C++ one
+definition rule (ODR) still holds even if certain symbol-based isolation
+mechanisms are used, and object addresses are not shared across
+translation units with incompatible type definitions.
+
+This does not matter if the original (language-independent) advice
+regarding symbol interposition is followed.  However, as the advice may
+be difficult to implement for C++ applications, it is recommended to
+avoid ODR violations across the entire process image.  Inline namespaces
+can be helpful in this context because they can be used to create
+distinct ELF symbols while maintaining source code compatibility at the
+C++ level.
+
+@item
+Be aware that as a special case of interposed symbols, symbols with the
+@code{STB_GNU_UNIQUE} binding type do not follow the usual ELF symbol
+namespace isolation rules: such symbols bind across @code{RTLD_LOCAL}
+boundaries.  Furthermore, symbol versioning is ignored for such symbols;
+they are bound by symbol name only.  All their definitions and uses must
+therefore be compatible.  Hidden visibility still prevents the creation
+of @code{STB_GNU_UNIQUE} symbols and can achieve isolation of
+incompatible definitions.
+
+@item
+C++ constructor priorities only affect constructor ordering within one
+shared object.  Global constructor order across shared objects is
+consistent with ELF dependency ordering if there are no ELF dependency
+cycles.
+
+@item
+C++ exception handling and run-time type information (RTTI), as
+implemented in the GNU toolchain, is not address-significant, and
+therefore is not affected by the symbol binding behaviour of the dynamic
+linker.  This means that types of the same fully-qualified name (in
+non-anonymous namespaces) are always considered the same from an
+exception-handling or RTTI perspective.  This is true even if the type
+information object or vtable has hidden symbol visibility, or the
+corresponding symbols are versioned under different symbol versions, or
+the symbols are not bound to the same objects due to the use of
+@code{RTLD_LOCAL} or @code{dlmopen}.
+
+This can cause issues in applications that contain multiple incompatible
+definitions of the same type.  Inline namespaces can be used to create
+distinct symbols at the ELF layer, avoiding this type of issue.
+
+@item
+C++ exception handling across multiple @code{dlmopen} namespaces may
+not work, particular with the unwinder in GCC versions before 12.
+Current toolchain versions are able to process unwinding tables across
+@code{dlmopen} boundaries.  However, note that type comparison is
+name-based, not address-based (see the previous item), so exception
+types may still be matched in unexpected ways.  An important special
+case of exception handling, invoking destructors for variables of block
+scope, is not impacted by this RTTI type-sharing.  Likewise, regular
+virtual member function dispatch for objects is unaffected (but still
+requires that the type definitions match in all directly involved
+translation units).
+
+Once more, inline namespaces can be used to create distinct ELF symbols
+for different types.
+
+@item
+Although the C++ standard requires that destructors for global objects
+run in the opposite order of their constructors, the Itanium C++ ABI
+requires a different destruction order in some cases.  As a result, do
+not depend on the precise destructor invocation order in applications
+that use @code{dlclose}.
+
+@item
+Registering destructors for later invocation allocates memory and may
+silently fail if insufficient memory is available.  As a result, the
+destructor is never invoked.  This applies to all forms of destructor
+registration, with the exception of thread-local variables (see the next
+item).  To avoid this issue, ensure that such objects merely have
+trivial destructors, avoiding the need for registration, and deallocate
+resources using a different mechanism (for example, from an ELF
+destructor).
+
+@item
+A similar issue exists for @code{thread_local} variables with thread
+storage duration of types that have non-trivial destructors.  However,
+in this case, memory allocation failure during registration leads to
+process termination.  If process termination is not acceptable, use
+@code{thread_local} variables with trivial destructors only.
+Functions for per-thread cleanup can be registered using
+@code{pthread_key_create} (globally for all threads) and activated
+using @code{pthread_setspecific} (on each thread).  Note that a
+@code{pthread_key_create} call may still fail (and
+@code{pthread_create} keys are a limited resource in @theglibc{}), but
+this failure can be handled without terminating the process.
+@end itemize
+
+@subsection Producing Matching Binaries
+
+This subsection recommends tools and build flags for producing
+applications that meet the recommendations of the previous subsection.
+
+@itemize @bullet
+@item
+Use BFD ld (@command{bfd.ld}) from GNU binutils to produce binaries,
+invoked through a compiler driver such as @command{gcc}.  The version
+should be not too far ahead of what was current when the version of
+@theglibc{} was first released.
+
+@item
+Do not use a binutils release that is older than the one used to build
+@theglibc{} itself.
+
+@item
+Compile with @option{-ftls-model=initial-exec} to force the initial-exec
+TLS model.
+
+@item
+Link with @option{-Wl,-z,now} to disable lazy binding.
+
+@item
+Link with @option{-Wl,-z,relro} to enable RELRO (which is the default on
+most targets).
+
+@item
+Specify all direct shared objects dependencies using @option{-l} options
+to avoid underlinking.  Rely on @code{.so} files (which can be linker
+scripts) and searching with the @option{-l} option.  Do not specify the
+file names of shared objects on the linker command line.
+
+@item
+Consider using @option{-Wl,-z,defs} to treat underlinking as an error
+condition.
+
+@item
+When creating a shared object (linked with @option{-shared}), use
+@option{-Wl,-soname,lib@dots{}} to set a soname that matches the final
+installed name of the file.
+
+@item
+Do not use the @option{-rpath} linker option.  (As explained below, all
+required shared objects should be installed into the default search
+path.)
+
+@item
+Use @option{-Wl,--error-rwx-segments} and @option{-Wl,--error-execstack} to
+instruct the link editor to fail the link if the resulting final object
+would have read-write-execute segments or an executable stack.  Such
+issues usually indicate that the input files are not marked up
+correctly.
+
+@item
+Ensure that for each @code{LOAD} segment in the ELF program header, file
+offsets, memory sizes, and load addresses are multiples of the largest
+page size supported at run time.  Similarly, the start address and size
+of the @code{GNU_RELRO} range should be multiples of the page size.
+
+Avoid creating gaps between @code{LOAD} segments.  The difference
+between the load addresses of two subsequent @code{LOAD} segments should
+be the size of the first @code{LOAD} segment.  (This may require linking
+with @option{-Wl,-z,noseparate-code}.)
+
+This may not be possible to achieve with the currently available link
+editors.
+
+@item
+If the multiple-of-page-size criterion for the @code{GNU_RELRO} region
+cannot be achieved, ensure that the process memory image right before
+the start of the region does not contain executable or writable memory.
+@c https://sourceware.org/pipermail/libc-alpha/2022-May/138638.html
+@end itemize
+
+@subsection Checking Binaries
+
+In some cases, if the previous recommendations are not followed, this
+can be determined from the produced binaries.  This section contains
+suggestions for verifying aspects of these binaries.
+
+@itemize @bullet
+@item
+To detect underlinking, examine the dynamic symbol table, for example
+using @samp{readelf -sDW}.  If the symbol is defined in a shared object
+that uses symbol versioning, it must carry a symbol version, as in
+@samp{pthread_kill@@GLIBC_2.34}.
+
+@item
+Examine the dynamic segment with @samp{readelf -dW} to check that all
+the required @code{NEEDED} entries are present.  (It is not necessary to
+list indirect dependencies if these dependencies are guaranteed to
+remain during the evolution of the explicitly listed direct
+dependencies.)
+
+@item
+The @code{NEEDED} entries should not contain full path names including
+slashes, only @code{sonames}.
+
+@item
+For a further consistency check, collect all shared objects referenced
+via @code{NEEDED} entries in dynamic segments, transitively, starting at
+the main program.  Then determine their dynamic symbol tables (using
+@samp{readelf -sDW}, for example).  Ideally, every symbol should be
+defined at most once, so that symbol interposition does not happen.
+
+If there are interposed data symbols, check if the single interposing
+definition is in the main program.  In this case, there must be a copy
+relocation for it.  (This only applies to targets with copy relocations.)
+
+Function symbols should only be interposed in C++ applications, to
+implement vague linkage.  (See the discussion in the C++ recommendations
+above.)
+
+@item
+Using the previously collected @code{NEEDED} entries, check that the
+dependency graph does not contain any cycles.
+
+@item
+The dynamic segment should also mention @code{BIND_NOW} on the
+@code{FLAGS} line or @code{NOW} on the @code{FLAGS_1} line (one is
+enough).
+
+@item
+For shared objects (not main programs), if the program header has a
+@code{PT_TLS} segment, the dynamic segment (as shown by @samp{readelf
+-dW}) should contain the @code{STATIC_TLS} flag on the @code{FLAGS}
+line.
+
+If @code{STATIC_TLS} is missing in shared objects, ensure that the
+appropriate relocations for GNU2 TLS descriptors are used (for example,
+@code{R_AARCH64_TLSDESC} or @code{R_X86_64_TLSDESC}).
+
+@item
+There should not be a reference to the symbols @code{__tls_get_addr},
+@code{__tls_get_offset}, @code{__tls_get_addr_opt} in the dynamic symbol
+table (in the @samp{readelf -sDW} output).  Thread-local storage must be
+accessed using the initial-exec (static) model, or using GNU2 TLS
+descriptors.
+
+@item
+Likewise, the functions @code{dlopen}, @code{dlmopen}, @code{dlclose}
+should not be referenced from the dynamic symbol table.
+
+@item
+For shared objects, there should be a @code{SONAME} entry that matches
+the file name (the base name, i.e., the part after the slash).  The
+@code{SONAME} string must not contain a slash @samp{/}.
+
+@item
+For all objects, the dynamic segment (as shown by @samp{readelf -dW})
+should not contain @code{RPATH} or @code{RUNPATH} entries.
+
+@item
+Likewise, the dynamic segment should not show any @code{AUDIT},
+@code{DEPAUDIT}, @code{AUXILIARY}, @code{FILTER}, or
+@code{PREINIT_ARRAY} tags.
+
+@item
+If the dynamic segment contains a (deprecated) @code{HASH} tag, it
+must also contain a @code{GNU_HASH} tag.
+
+@item
+The @code{INITFIRST} flag (undeer @code{FLAGS_1}) should not be used.
+
+@item
+The program header must not have @code{LOAD} segments that are writable
+and executable at the same time.
+
+@item
+All produced objects should have a @code{GNU_STACK} program header that
+is not marked as executable.  (However, on some newer targets, a
+non-executable stack is the default, so the @code{GNU_STACK} program
+header is not required.)
+@end itemize
+
+@subsection Run-time Considerations
+
+In addition to preparing program binaries in a recommended fashion, the
+run-time environment should be set up in such a way that problematic
+dynamic linker features are not used.
+
+@itemize @bullet
+@item
+Install shared objects using their sonames in a default search path
+directory (usually @file{/usr/lib64}).  Do not use symbolic links.
+@c This is currently not standard practice.
+
+@item
+The default search path must not contain objects with duplicate file
+names or sonames.
+
+@item
+Do not use environment variables (@code{LD_@dots{}} variables such as
+@code{LD_PRELOAD} or @code{LD_LIBRARY_PATH}, or @code{GLIBC_TUNABLES})
+to change default dynamic linker behavior.
+
+@item
+Do not install shared objects in non-default locations.  (Such locations
+are listed explicitly in the configuration file for @command{ldconfig},
+usually @file{/etc/ld.so.conf}, or in files included from there.)
+
+@item
+In relation to the previous item, do not install any objects it
+@code{glibc-hwcaps} subdirectories.
+
+@item
+Do not configure dynamically-loaded NSS service modules, to avoid
+accidental internal use of the @code{dlopen} facility.  The @code{files}
+and @code{dns} modules are built in and do not rely on @code{dlopen}.
+
+@item
+Do not truncate and overwrite files containing programs and shared
+objects in place, while they are used.  Instead, write the new version
+to a different path and use @code{rename} to replace the
+already-installed version.
+
+@item
+Be aware that during a component update procedure that involves multiple
+object files (shared objects and main programs), concurrently starting
+processes may observe an inconsistent combination of object files (some
+already updated, some still at the previous version).  For example,
+this can happen during an update of @theglibc{} itself.
+@end itemize
 
 @c FIXME these are undocumented:
 @c dladdr
diff --git a/manual/install.texi b/manual/install.texi
index 6504d02c62..a7847b02c0 100644
--- a/manual/install.texi
+++ b/manual/install.texi
@@ -252,6 +252,22 @@ configure with @option{--disable-werror}.
 By default for x86_64, @theglibc{} is built with the vector math library.
 Use this option to disable the vector math library.
 
+@item --disable-static-c++-tests
+By default, if the C++ toolchain lacks support for static linking,
+configure fails to find the C++ header files and the glibc build fails.
+@option{--disable-static-c++-link-check} allows the glibc build to finish,
+but static C++ tests will fail if the C++ toolchain doesn't have the
+necessary static C++ libraries.  Use this option to skip the static C++
+tests.  This option implies @option{--disable-static-c++-link-check}.
+
+@item --disable-static-c++-link-check
+By default, if the C++ toolchain lacks support for static linking,
+configure fails to find the C++ header files and the glibc build fails.
+Use this option to disable the static C++ link check so that the C++
+header files can be located.  The newly built libc.a can be used to
+create static C++ tests if the C++ toolchain has the necessary static
+C++ libraries.
+
 @item --disable-scv
 Disable using @code{scv} instruction for syscalls. All syscalls will use
 @code{sc} instead, even if the kernel supports @code{scv}. PowerPC only.
diff --git a/manual/llio.texi b/manual/llio.texi
index fe1807a849..78c7c79913 100644
--- a/manual/llio.texi
+++ b/manual/llio.texi
@@ -1573,10 +1573,15 @@ permitted.  They include @code{PROT_READ}, @code{PROT_WRITE}, and
 of address space for future use.  The @code{mprotect} function can be
 used to change the protection flags.  @xref{Memory Protection}.
 
-@var{flags} contains flags that control the nature of the map.
-One of @code{MAP_SHARED} or @code{MAP_PRIVATE} must be specified.
+The @var{flags} parameter contains flags that control the nature of
+the map.  One of @code{MAP_SHARED}, @code{MAP_SHARED_VALIDATE}, or
+@code{MAP_PRIVATE} must be specified.  Additional flags may be bitwise
+OR'd to further define the mapping.
 
-They include:
+Note that, aside from @code{MAP_PRIVATE} and @code{MAP_SHARED}, not
+all flags are supported on all versions of all operating systems.
+Consult the kernel-specific documentation for details.  The flags
+include:
 
 @vtable @code
 @item MAP_PRIVATE
@@ -1598,9 +1603,19 @@ Note that actual writing may take place at any time.  You need to use
 @code{msync}, described below, if it is important that other processes
 using conventional I/O get a consistent view of the file.
 
+@item MAP_SHARED_VALIDATE
+Similar to @code{MAP_SHARED} except that additional flags will be
+validated by the kernel, and the call will fail if an unrecognized
+flag is provided.  With @code{MAP_SHARED} using a flag on a kernel
+that doesn't support it causes the flag to be ignored.
+@code{MAP_SHARED_VALIDATE} should be used when the behavior of all
+flags is required.
+
 @item MAP_FIXED
 This forces the system to use the exact mapping address specified in
-@var{address} and fail if it can't.
+@var{address} and fail if it can't.  Note that if the new mapping
+would overlap an existing mapping, the overlapping portion of the
+existing map is unmapped.
 
 @c One of these is official - the other is obviously an obsolete synonym
 @c Which is which?
@@ -1641,10 +1656,73 @@ The @code{MAP_HUGETLB} flag is specific to Linux.
 @c There is a mechanism to select different hugepage sizes; see
 @c include/uapi/asm-generic/hugetlb_encode.h in the kernel sources.
 
-@c Linux has some other MAP_ options, which I have not discussed here.
-@c MAP_DENYWRITE, MAP_EXECUTABLE and MAP_GROWSDOWN don't seem applicable to
-@c user programs (and I don't understand the last two).  MAP_LOCKED does
-@c not appear to be implemented.
+@item MAP_32BIT
+Require addresses that can be accessed with a signed 32 bit pointer,
+i.e., within the first 2 GiB.  Ignored if MAP_FIXED is specified.
+
+@item MAP_DENYWRITE
+@itemx MAP_EXECUTABLE
+@itemx MAP_FILE
+
+Provided for compatibility.  Ignored by the Linux kernel.
+
+@item MAP_FIXED_NOREPLACE
+Similar to @code{MAP_FIXED} except the call will fail with
+@code{EEXIST} if the new mapping would overwrite an existing mapping.
+To test for support for this flag, specify MAP_FIXED_NOREPLACE without
+MAP_FIXED, and (if the call was successful) check the actual address
+returned.  If it does not match the address passed, then this flag is
+not supported.
+
+@item MAP_GROWSDOWN
+This flag is used to make stacks, and is typically only needed inside
+the program loader to set up the main stack for the running process.
+The mapping is created according to the other flags, except an
+additional page just prior to the mapping is marked as a ``guard
+page''.  If a write is attempted inside this guard page, that page is
+mapped, the mapping is extended, and a new guard page is created.
+Thus, the mapping continues to grow towards lower addresses until it
+encounters some other mapping.
+
+Note that accessing memory beyond the guard page will not trigger this
+feature.  In gcc, use @code{-fstack-clash-protection} to ensure the
+guard page is always touched.
+
+@item MAP_LOCKED
+A hint that requests that mapped pages are locked in memory (i.e. not
+paged out).  Note that this is a request and not a requirement; use
+@code{mlock} if locking is required.
+
+@item MAP_POPULATE
+@itemx MAP_NONBLOCK
+@code{MAP_POPULATE} is a hint that requests that the kernel read-ahead
+a file-backed mapping, causing pages to be mapped before they're
+needed.  @code{MAP_NONBLOCK} is a hint that requests that the kernel
+@emph{not} attempt such except for pages are already in memory.  Note
+that neither of these hints affects future paging activity, use
+@code{mlock} if such needs to be controlled.
+
+@item MAP_NORESERVE
+Asks the kernel to not reserve physical backing (i.e. space in a swap
+device) for a mapping.  This would be useful for, for example, a very
+large but sparsely used mapping which need not be limited in total
+length by available RAM, but with very few mapped pages.  Note that
+writes to such a mapping may cause a @code{SIGSEGV} if the system is
+unable to map a page due to lack of resources.
+
+On Linux, this flag's behavior may be overwridden by
+@file{/proc/sys/vm/overcommit_memory} as documented in the proc(5) man
+page.
+
+@item MAP_STACK
+Ensures that the resulting mapping is suitable for use as a program
+stack.  For example, the use of huge pages might be precluded.
+
+@item MAP_SYNC
+This is a special flag for DAX devices, which tells the kernel to
+write dirty metadata out whenever dirty data is written out.  Unlike
+most other flags, this one will fail unless @code{MAP_SHARED_VALIDATE}
+is also given.
 
 @end vtable
 
@@ -1655,6 +1733,24 @@ Possible errors include:
 
 @table @code
 
+@item EACCES
+
+@var{filedes} was not open for the type of access specified in @var{protect}.
+
+@item EAGAIN
+
+The system has temporarily run out of resources.
+
+@item EBADF
+
+The @var{fd} passed is invalid, and a valid file descriptor is
+required (i.e. MAP_ANONYMOUS was not specified).
+
+@item EEXIST
+
+@code{MAP_FIXED_NOREPLACE} was specified and an existing mapping was
+found overlapping the requested address range.
+
 @item EINVAL
 
 Either @var{address} was unusable (because it is not a multiple of the
@@ -1663,23 +1759,37 @@ applicable page size), or inconsistent @var{flags} were given.
 If @code{MAP_HUGETLB} was specified, the file or system does not support
 large page sizes.
 
-@item EACCES
+@item ENODEV
 
-@var{filedes} was not open for the type of access specified in @var{protect}.
+This file is of a type that doesn't support mapping, the process has
+exceeded its data space limit, or the map request would exceed the
+process's virtual address space.
 
 @item ENOMEM
 
-Either there is not enough memory for the operation, or the process is
-out of address space.
-
-@item ENODEV
-
-This file is of a type that doesn't support mapping.
+There is not enough memory for the operation, the process is out of
+address space, or there are too many mappings.  On Linux, the maximum
+number of mappings can be controlled via
+@file{/proc/sys/vm/max_map_count} or, if your OS supports it, via
+the @code{vm.max_map_count} @code{sysctl} setting.
 
 @item ENOEXEC
 
 The file is on a filesystem that doesn't support mapping.
 
+@item EPERM
+
+@code{PROT_EXEC} was requested but the file is on a filesystem that
+was mounted with execution denied, a file seal prevented the mapping,
+or the caller set MAP_HUDETLB but does not have the required
+priviledges.
+
+@item EOVERFLOW
+
+Either the offset into the file plus the length of the mapping causes
+internal page counts to overflow, or the offset requested exceeds the
+length of the file.
+
 @c On Linux, EAGAIN will appear if the file has a conflicting mandatory lock.
 @c However mandatory locks are not discussed in this manual.
 @c
diff --git a/misc/Makefile b/misc/Makefile
index c273ec6974..5d17c562fe 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -214,12 +214,18 @@ routines_no_fortify += \
   syslog \
   # routines_no_fortify
 
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
 generated += \
   tst-allocate_once-mem.out \
   tst-allocate_once.mtrace \
   tst-error1-mem.out \
   tst-error1.mtrace \
   # generated
+endif
+endif
+endif
 
 aux := init-misc
 install-lib := libg.a
@@ -285,8 +291,14 @@ tests-internal += tst-fd_to_filename
 tests-static += tst-fd_to_filename
 
 ifeq ($(run-built-tests),yes)
-tests-special += $(objpfx)tst-error1-mem.out \
-  $(objpfx)tst-allocate_once-mem.out
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests-special += \
+  $(objpfx)tst-allocate_once-mem.out \
+  $(objpfx)tst-error1-mem.out \
+  # tests-special
+endif
+endif
 endif
 
 tests-container := \
diff --git a/nptl/Makefile b/nptl/Makefile
index b3f8af2e1c..c4c27e0d23 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -545,6 +545,9 @@ tests-static += \
   # tests-static
 
 tests += tst-cancel24-static
+ifeq ($(static-cxx-tests),no)
+tests-unsupported += tst-cancel24-static
+endif
 
 tests-internal += \
   tst-sem11-static \
@@ -556,10 +559,12 @@ xtests-static += tst-setuid1-static
 ifeq ($(run-built-tests),yes)
 tests-special += \
   $(objpfx)tst-oddstacklimit.out \
-  $(objpfx)tst-stack3-mem.out \
   # tests-special
 ifeq ($(build-shared),yes)
 tests-special += $(objpfx)tst-tls6.out
+ifneq ($(PERL),no)
+tests-special += $(objpfx)tst-stack3-mem.out
+endif
 endif
 endif
 
@@ -617,10 +622,17 @@ tst-stack3-ENV = MALLOC_TRACE=$(objpfx)tst-stack3.mtrace \
 $(objpfx)tst-stack3-mem.out: $(objpfx)tst-stack3.out
 	$(common-objpfx)malloc/mtrace $(objpfx)tst-stack3.mtrace > $@; \
 	$(evaluate-test)
+
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
 generated += \
   tst-stack3-mem.out \
   tst-stack3.mtrace \
   # generated
+endif
+endif
+endif
 
 tst-stack4mod.sos=$(shell for i in 0 1 2 3 4 5 6 7 8 9 10 \
 				   11 12 13 14 15 16 17 18 19; do \
diff --git a/posix/Makefile b/posix/Makefile
index a1e84853a8..2c598cd20a 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -418,6 +418,17 @@ generated += \
   $(addprefix wordexp-test-result, 1 2 3 4 5 6 7 8 9 10) \
   annexc \
   annexc.out \
+  getconf.speclist \
+  ptestcases.h \
+  testcases.h \
+  tst-getconf.out \
+  wordexp-tst.out \
+  # generated
+
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+generated += \
   bug-ga2-mem.out \
   bug-ga2.mtrace \
   bug-glob2-mem.out \
@@ -430,23 +441,22 @@ generated += \
   bug-regex21.mtrace \
   bug-regex31-mem.out \
   bug-regex31.mtrace \
+  bug-regex36-mem.out \
   bug-regex36.mtrace \
-  getconf.speclist \
-  ptestcases.h \
-  testcases.h \
   tst-boost-mem.out \
   tst-boost.mtrace \
   tst-fnmatch-mem.out \
   tst-fnmatch.mtrace \
-  tst-getconf.out \
   tst-pcre-mem.out \
   tst-pcre.mtrace \
   tst-rxspencer-no-utf8-mem.out \
   tst-rxspencer-no-utf8.mtrace \
   tst-vfork3-mem.out \
   tst-vfork3.mtrace \
-  wordexp-tst.out \
   # generated
+endif
+endif
+endif
 
 ifeq ($(run-built-tests),yes)
 ifeq (yes,$(build-shared))
@@ -461,6 +471,9 @@ endif
 # XXX Please note that for now we ignore the result of this test.
 tests-special += $(objpfx)annexc.out
 ifeq ($(run-built-tests),yes)
+tests-special += $(objpfx)tst-getconf.out
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
 tests-special += \
   $(objpfx)bug-ga2-mem.out \
   $(objpfx)bug-glob2-mem.out \
@@ -471,13 +484,14 @@ tests-special += \
   $(objpfx)bug-regex36-mem.out \
   $(objpfx)tst-boost-mem.out \
   $(objpfx)tst-fnmatch-mem.out \
-  $(objpfx)tst-getconf.out \
   $(objpfx)tst-glob-tilde-mem.out \
   $(objpfx)tst-pcre-mem.out \
   $(objpfx)tst-rxspencer-no-utf8-mem.out \
   $(objpfx)tst-vfork3-mem.out \
   # tests-special
 endif
+endif
+endif
 
 include ../Rules
 
diff --git a/signal/Makefile b/signal/Makefile
index e8e3dce0cf..7cddbc3c65 100644
--- a/signal/Makefile
+++ b/signal/Makefile
@@ -46,11 +46,22 @@ routines	:= signal raise killpg \
 		   allocrtsig sigtimedwait sigwaitinfo sigqueue \
 		   sighold sigrelse sigignore sigset
 
-tests		:= tst-signal tst-sigset tst-sigsimple tst-raise tst-sigset2 \
-		   tst-sigwait-eintr tst-sigaction \
-		   tst-minsigstksz-1 tst-minsigstksz-2 tst-minsigstksz-3 \
-		   tst-minsigstksz-3a tst-minsigstksz-4 tst-minsigstksz-5 \
-		   tst-sigisemptyset
+tests := \
+  tst-minsigstksz-1 \
+  tst-minsigstksz-2 \
+  tst-minsigstksz-3 \
+  tst-minsigstksz-3a \
+  tst-minsigstksz-4 \
+  tst-minsigstksz-5 \
+  tst-raise \
+  tst-sigaction \
+  tst-sigisemptyset \
+  tst-signal \
+  tst-sigset \
+  tst-sigset2 \
+  tst-sigsimple \
+  tst-sigwait-eintr \
+# tests
 
 include ../Rules
 
diff --git a/socket/Makefile b/socket/Makefile
index fc1bd0a260..df732fa9b7 100644
--- a/socket/Makefile
+++ b/socket/Makefile
@@ -71,6 +71,7 @@ tests := \
   tst-cmsg_cloexec \
   tst-cmsghdr \
   tst-connect \
+  tst-shutdown \
   tst-sockopt \
   # tests
 
diff --git a/socket/tst-shutdown.c b/socket/tst-shutdown.c
new file mode 100644
index 0000000000..a305e5e494
--- /dev/null
+++ b/socket/tst-shutdown.c
@@ -0,0 +1,257 @@
+/* Test the shutdown function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/xsocket.h>
+#include <support/xunistd.h>
+#include <sys/socket.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+
+struct connection
+{
+  int sockets[2];
+};
+
+void
+establish_connection (struct connection *conn)
+{
+  if (socketpair (AF_UNIX, SOCK_STREAM, 0, conn->sockets) != 0)
+    {
+      FAIL_EXIT1 ("socketpair (AF_UNIX, SOCK_STREAM, 0): %m\n");
+    }
+}
+
+void
+close_connection (struct connection *conn)
+{
+  xclose (conn->sockets[0]);
+  xclose (conn->sockets[1]);
+}
+
+/* Open a file and check that shutdown fails with the ENOTSOCK error code.  */
+void
+do_test_enotsock (void)
+{
+  /* Open file and check that shutdown will fail with ENOTSOCK.  */
+  int fd = xopen ("/dev/null", O_RDWR, 0);
+
+  int result = shutdown (fd, SHUT_RD);
+  if (result == 0 || errno != ENOTSOCK)
+    {
+      FAIL_EXIT1 ("shutdown should fail with ENOTSOCK");
+    }
+  xclose (fd);
+}
+
+/* Test shutdown with SHUT_RD.  */
+void
+do_test_shut_rd (void)
+{
+  struct connection conn;
+  const char *str = "AAAAAAA";
+  int len = 8;
+  int ret;
+  void *s_buf = xmalloc (len);
+  bzero (s_buf, len);
+
+  establish_connection (&conn);
+  int server = conn.sockets[0];
+  int client = conn.sockets[1];
+
+  /* Call shutdown with SHUT_RD on server socket.  */
+  if (shutdown (server, SHUT_RD) != 0)
+    {
+      FAIL_EXIT1 ("shutdown with SHUT_RD on socket %d failed", server);
+    }
+
+  ret = send (server, str, len, 0);
+  if (ret <= 0)
+    {
+      FAIL_EXIT1 ("send (%d, data, %d): %m", server, len);
+    }
+
+  ret = recv (client, s_buf, len, 0);
+  if (ret <= 0)
+    {
+      FAIL_EXIT1 ("recv (%d, data, %d): %m", client, len);
+    }
+
+  TEST_COMPARE_BLOB (str, len, s_buf, len);
+
+  /* Send data should be disallowed on shutdown socket.  */
+  errno = 0;
+  ret = send (client, str, len, MSG_NOSIGNAL);
+  if (ret >= 0 || errno != EPIPE)
+    {
+      FAIL_EXIT1 ("Send on SHUT_RD socket should be disallowed: %m");
+    }
+
+  /* Recv should return zero and no error.  */
+  errno = 0;
+  ret = recv (server, s_buf, len, 0);
+  if (ret != 0 || errno != 0)
+    {
+      FAIL_EXIT1 ("recv should return 0 without error: %m");
+    }
+
+  close_connection (&conn);
+}
+
+/* Test shutdown with SHUT_WR.  */
+void
+do_test_shut_wr (void)
+{
+  struct connection conn;
+  const char *str1 = "CCCCCCC";
+  const char *str2 = "DDDDDDD";
+  const char *str3 = "EEEEEEE";
+  int len = 8;
+  int ret;
+  void *c_buf = xmalloc (len);
+  void *s_buf = xmalloc (len);
+
+  establish_connection (&conn);
+  int server = conn.sockets[0];
+  int client = conn.sockets[1];
+
+  xwrite (client, str1, len);
+
+  if (shutdown (client, SHUT_WR) != 0)
+    {
+      FAIL_EXIT1 ("shutdown with SHUT_WR on socket %d failed", client);
+    }
+
+  ret = send (client, str2, len, MSG_NOSIGNAL);
+  if (ret >= 0)
+    {
+      FAIL_EXIT1 ("send on SHUT_WR socket should fail");
+    }
+
+  /* Read data written before shutdown and check if it's correct.  */
+  xread (server, s_buf, len);
+  TEST_COMPARE_BLOB (str1, len, s_buf, len);
+
+  /* Second read should return zero without error.  */
+  errno = 0;
+  if (read (server, s_buf, len) != 0 || errno != 0)
+    {
+      FAIL_EXIT1 ("read after shutdown should return zero without error: %m");
+    }
+
+  /* Write some data to socket and check it still can be read on other side.  */
+  memcpy (s_buf, str3, len);
+  xwrite (server, s_buf, len);
+
+  xread (client, c_buf, len);
+  TEST_COMPARE_BLOB (s_buf, len, c_buf, len);
+
+  close_connection (&conn);
+}
+
+/* Test shutdown with SHUT_RDWR.  */
+void
+do_test_shut_rdwr (void)
+{
+  struct connection conn;
+  struct sockaddr peer;
+  socklen_t peer_len = sizeof (peer);
+
+  const char *str1 = "FFFFFFF";
+  const char *str2 = "GGGGGGG";
+  int len = 8;
+  int ret;
+  void *s_buf = xmalloc (len);
+  bzero (s_buf, len);
+
+  establish_connection (&conn);
+  int server = conn.sockets[0];
+  int client = conn.sockets[1];
+
+  /* Send some data to both sockets before shutdown.  */
+  xwrite (client, str1, len);
+  xwrite (server, str2, len);
+
+  /* Call shutdown with SHUT_RDWR on client socket.  */
+  if (shutdown (client, SHUT_RDWR) != 0)
+    {
+      FAIL_EXIT1 ("shutdown with SHUT_RDWR on socket %d failed", client);
+    }
+
+  /* Verify that socket is still connected.  */
+  xgetsockname (client, &peer, &peer_len);
+
+  /* Read data written before shutdown.  */
+  xread (client, s_buf, len);
+  TEST_COMPARE_BLOB (s_buf, len, str2, len);
+
+  /* Second read should return zero, but no error.  */
+  errno = 0;
+  if (read (client, s_buf, len) != 0 || errno != 0)
+    {
+      FAIL_EXIT1 ("read after shutdown should return zero without error: %m");
+    }
+
+  /* Send some data to shutdown socket and expect error.  */
+  errno = 0;
+  ret = send (server, str2, len, MSG_NOSIGNAL);
+  if (ret >= 0 || errno != EPIPE)
+    {
+      FAIL_EXIT1 ("send to RDWR shutdown socket should fail with EPIPE");
+    }
+
+  /* Read data written before shutdown.  */
+  xread (server, s_buf, len);
+  TEST_COMPARE_BLOB (s_buf, len, str1, len);
+
+  /* Second read should return zero, but no error.  */
+  errno = 0;
+  if (read (server, s_buf, len) != 0 || errno != 0)
+    {
+      FAIL_EXIT1 ("read after shutdown should return zero without error: %m");
+    }
+
+  /* Send some data to shutdown socket and expect error.  */
+  errno = 0;
+  ret = send (client, str1, len, MSG_NOSIGNAL);
+  if (ret >= 0 || errno != EPIPE)
+    {
+      FAIL_EXIT1 ("send to RDWR shutdown socket should fail with EPIPE");
+    }
+
+  close_connection (&conn);
+}
+
+static int
+do_test (void)
+{
+  do_test_enotsock ();
+  do_test_shut_rd ();
+  do_test_shut_wr ();
+  do_test_shut_rdwr ();
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 6bc972af1a..a63c05a120 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -229,10 +229,6 @@ tests := \
   tst-popen \
   tst-popen2 \
   tst-printf-binary \
-  tst-printf-bz18872 \
-  tst-printf-bz25691 \
-  tst-printf-fp-free \
-  tst-printf-fp-leak \
   tst-printf-intn \
   tst-printf-oct \
   tst-printf-round \
@@ -261,7 +257,6 @@ tests := \
   tst-vfprintf-mbs-prec \
   tst-vfprintf-user-type \
   tst-vfprintf-width-i18n \
-  tst-vfprintf-width-prec \
   tst-vfprintf-width-prec-alloc \
   tst-wc-printf \
   tstdiomisc \
@@ -270,6 +265,20 @@ tests := \
   xbug \
   # tests
 
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests += \
+  tst-printf-bz18872 \
+  tst-printf-bz25691 \
+  tst-printf-fp-free \
+  tst-printf-fp-leak \
+  tst-vfprintf-width-prec \
+  # tests
+endif
+endif
+endif
+
 tests-container += \
   tst-popen3
   # tests-container
@@ -293,14 +302,19 @@ test-srcs = \
 
 ifeq ($(run-built-tests),yes)
 tests-special += \
-  $(objpfx)tst-printf-bz18872-mem.out \
-  $(objpfx)tst-printf-bz25691-mem.out \
-  $(objpfx)tst-printf-fp-free-mem.out \
-  $(objpfx)tst-printf-fp-leak-mem.out \
   $(objpfx)tst-printf.out \
   $(objpfx)tst-printfsz-islongdouble.out \
   $(objpfx)tst-setvbuf1-cmp.out \
   $(objpfx)tst-unbputc.out \
+  # tests-special
+
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests-special += \
+  $(objpfx)tst-printf-bz18872-mem.out \
+  $(objpfx)tst-printf-bz25691-mem.out \
+  $(objpfx)tst-printf-fp-free-mem.out \
+  $(objpfx)tst-printf-fp-leak-mem.out \
   $(objpfx)tst-vfprintf-width-prec-mem.out \
   # tests-special
 
@@ -317,6 +331,8 @@ generated += \
   tst-vfprintf-width-prec-mem.out \
   tst-vfprintf-width-prec.mtrace \
   # generated
+endif
+endif
 endif # $(run-built-tests)
 
 tests-special += $(objpfx)tst-errno-manual.out
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index ef5ea9ab8c..3e251cc234 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -15,6 +15,7 @@ sysdep_routines += \
   memset_generic \
   memset_kunpeng \
   memset_mops \
+  memset_oryon1 \
   memset_zva64 \
   strlen_asimd \
   strlen_generic \
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index 65c56b9b41..b2fda541f9 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -56,6 +56,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
   IFUNC_IMPL (i, name, memset,
 	      IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva64)
+	      IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_oryon1)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
 #if HAVE_AARCH64_SVE_ASM
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
index 34bce045dd..bd063c16c9 100644
--- a/sysdeps/aarch64/multiarch/memset.c
+++ b/sysdeps/aarch64/multiarch/memset.c
@@ -1,5 +1,6 @@
 /* Multiple versions of memset. AARCH64 version.
    Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright The GNU Toolchain Authors.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -34,6 +35,7 @@ extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
 extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
 extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
 extern __typeof (__redirect_memset) __memset_mops attribute_hidden;
+extern __typeof (__redirect_memset) __memset_oryon1 attribute_hidden;
 
 static inline __typeof (__redirect_memset) *
 select_memset_ifunc (void)
@@ -49,6 +51,9 @@ select_memset_ifunc (void)
 	return __memset_a64fx;
     }
 
+  if (IS_ORYON1 (midr) && zva_size == 64)
+    return __memset_oryon1;
+
   if (IS_KUNPENG920 (midr))
     return __memset_kunpeng;
 
diff --git a/sysdeps/aarch64/multiarch/memset_oryon1.S b/sysdeps/aarch64/multiarch/memset_oryon1.S
new file mode 100644
index 0000000000..b43a43b54e
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_oryon1.S
@@ -0,0 +1,169 @@
+/* Optimized memset for Qualcomm's oyron-1 core.
+   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright The GNU Toolchain Authors.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "memset-reg.h"
+
+/* Assumptions:
+   ARMv8-a, AArch64, unaligned accesses
+ */
+
+ENTRY (__memset_oryon1)
+
+	PTR_ARG (0)
+	SIZE_ARG (2)
+
+	bfi	valw, valw, 8, 8
+	bfi	valw, valw, 16, 16
+	bfi	val, val, 32, 32
+
+	add	dstend, dstin, count
+
+	cmp	count, 96
+	b.hi	L(set_long)
+	cmp	count, 16
+	b.hs	L(set_medium)
+
+	/* Set 0..15 bytes.  */
+	tbz	count, 3, 1f
+	str	val, [dstin]
+	str	val, [dstend, -8]
+	ret
+
+	.p2align 3
+1:	tbz	count, 2, 2f
+	str	valw, [dstin]
+	str	valw, [dstend, -4]
+	ret
+2:	cbz	count, 3f
+	strb	valw, [dstin]
+	tbz	count, 1, 3f
+	strh	valw, [dstend, -2]
+3:	ret
+
+	.p2align 3
+	/* Set 16..96 bytes.  */
+L(set_medium):
+	stp	val, val, [dstin]
+	tbnz	count, 6, L(set96)
+	stp	val, val, [dstend, -16]
+	tbz	count, 5, 1f
+	stp	val, val, [dstin, 16]
+	stp	val, val, [dstend, -32]
+1:	ret
+
+	.p2align 6
+	/* Set 64..96 bytes.  Write 64 bytes from the start and
+	   32 bytes from the end.  */
+L(set96):
+	stp	val, val, [dstin, 16]
+	stp	val, val, [dstin, 32]
+	stp	val, val, [dstin, 48]
+	stp	val, val, [dstend, -32]
+	stp	val, val, [dstend, -16]
+	ret
+
+	.p2align 6
+L(set_long):
+	stp	val, val, [dstin]
+	bic	dst, dstin, 15
+	cmp	count, 256
+	ccmp	valw, 0, 0, cs
+	b.eq	L(try_zva)
+	cmp	count, #32768
+	b.hi	L(set_long_with_nontemp)
+	/* Small-size or non-zero memset does not use DC ZVA. */
+	sub	count, dstend, dst
+
+	/* Adjust count and bias for loop. By subtracting extra 1 from count,
+	  it is easy to use tbz instruction to check whether loop tailing
+	  count is less than 33 bytes, so as to bypass 2 unnecessary stps. */
+	sub	count, count, 64+16+1
+
+1:	stp	val, val, [dst, 16]
+	stp	val, val, [dst, 32]
+	stp	val, val, [dst, 48]
+	stp	val, val, [dst, 64]!
+	subs	count, count, 64
+	b.hs	1b
+
+	tbz	count, 5, 1f	/* Remaining count is less than 33 bytes? */
+	stp	val, val, [dst, 16]
+	stp	val, val, [dst, 32]
+1:	stp	val, val, [dstend, -32]
+	stp	val, val, [dstend, -16]
+	ret
+
+L(set_long_with_nontemp):
+	/* Small-size or non-zero memset does not use DC ZVA. */
+	sub	count, dstend, dst
+
+	/* Adjust count and bias for loop. By subtracting extra 1 from count,
+	   it is easy to use tbz instruction to check whether loop tailing
+	   count is less than 33 bytes, so as to bypass 2 unnecessary stps. */
+	sub	count, count, 64+16+1
+
+1:	stnp	val, val, [dst, 16]
+	stnp	val, val, [dst, 32]
+	stnp	val, val, [dst, 48]
+	stnp	val, val, [dst, 64]
+	add	dst, dst, #64
+	subs	count, count, 64
+	b.hs	1b
+
+	tbz	count, 5, 1f	/* Remaining count is less than 33 bytes? */
+	stnp	val, val, [dst, 16]
+	stnp	val, val, [dst, 32]
+1:	stnp	val, val, [dstend, -32]
+	stnp	val, val, [dstend, -16]
+	ret
+
+L(try_zva):
+	/* Write the first and last 64 byte aligned block using stp rather
+	   than using DC ZVA as it is faster. */
+	.p2align 6
+L(zva_64):
+	stp	val, val, [dst, 16]
+	stp	val, val, [dst, 32]
+	stp	val, val, [dst, 48]
+	bic	dst, dst, 63
+	stp	val, val, [dst, 64]
+	stp	val, val, [dst, 64+16]
+	stp	val, val, [dst, 96]
+	stp	val, val, [dst, 96+16]
+	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+	sub	count, count, 128+64+64	/* Adjust count and bias for loop.  */
+	add	dst, dst, 128
+1:	dc	zva, dst
+	add	dst, dst, 64
+	subs	count, count, 64
+	b.hi	1b
+	stp	val, val, [dst, 0]
+	stp	val, val, [dst, 16]
+	stp	val, val, [dst, 32]
+	stp	val, val, [dst, 48]
+
+	stp	val, val, [dstend, -64]
+	stp	val, val, [dstend, -64+16]
+	stp	val, val, [dstend, -32]
+	stp	val, val, [dstend, -16]
+	ret
+
+END (__memset_oryon1)
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 50f58a60e3..656e8a3fa0 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -1256,6 +1256,20 @@ extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid,
 					     size_t gen)
      attribute_hidden;
 
+/* The last TLS module ID that is initially loaded, plus 1.  TLS
+   addresses for modules with IDs lower than that can be obtained from
+   the DTV even if its generation is outdated.  */
+extern size_t _dl_tls_initial_modid_limit attribute_hidden attribute_relro;
+
+/* Compute _dl_tls_initial_modid_limit.  To be called after initial
+   relocation.  */
+void _dl_tls_initial_modid_limit_setup (void) attribute_hidden;
+
+/* Number of threads currently in a TLS update.  This is used to
+   detect reentrant __tls_get_addr calls without a per-thread
+   flag.  */
+extern unsigned int _dl_tls_threads_in_update attribute_hidden;
+
 /* Look up the module's TLS block as for __tls_get_addr,
    but never touch anything.  Return null if it's not allocated yet.  */
 extern void *_dl_tls_get_addr_soft (struct link_map *l) attribute_hidden;
diff --git a/sysdeps/mips/fpu/math-use-builtins-fma.h b/sysdeps/mips/fpu/math-use-builtins-fma.h
new file mode 100644
index 0000000000..57108f968e
--- /dev/null
+++ b/sysdeps/mips/fpu/math-use-builtins-fma.h
@@ -0,0 +1,36 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* MIPSr6 has MADDF.s/MADDF.d instructions, which are fused.  In MIPS
+   ISA, double support can be subsetted.  Only FMAF is enabled for this
+   case.  */
+
+#include <sysdep.h>
+
+#if __mips_isa_rev >= 6
+# ifdef __mips_single_float
+#  define USE_FMA_BUILTIN 0
+# else
+#  define USE_FMA_BUILTIN 1
+# endif
+# define USE_FMAF_BUILTIN 1
+#else
+# define USE_FMA_BUILTIN 0
+# define USE_FMAF_BUILTIN 0
+#endif
+#define USE_FMAL_BUILTIN 0
+#define USE_FMAF128_BUILTIN 0
diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
index 092c274f36..7eb35fb133 100644
--- a/sysdeps/nptl/dl-tls_init_tp.c
+++ b/sysdeps/nptl/dl-tls_init_tp.c
@@ -45,8 +45,10 @@ rtld_mutex_dummy (pthread_mutex_t *lock)
 #endif
 
 const unsigned int __rseq_flags;
-const unsigned int __rseq_size attribute_relro;
-const ptrdiff_t __rseq_offset attribute_relro;
+
+/* The variables are in .data.relro but are not yet write-protected.  */
+extern unsigned int _rseq_size attribute_hidden;
+extern ptrdiff_t _rseq_offset attribute_hidden;
 
 void
 __tls_pre_init_tp (void)
@@ -105,10 +107,7 @@ __tls_init_tp (void)
     do_rseq = TUNABLE_GET (rseq, int, NULL);
     if (rseq_register_current_thread (pd, do_rseq))
       {
-        /* We need a writable view of the variables.  They are in
-           .data.relro and are not yet write-protected.  */
-        extern unsigned int size __asm__ ("__rseq_size");
-        size = sizeof (pd->rseq_area);
+        _rseq_size = sizeof (pd->rseq_area);
       }
 
 #ifdef RSEQ_SIG
@@ -117,8 +116,7 @@ __tls_init_tp (void)
        all targets support __thread_pointer, so set __rseq_offset only
        if the rseq registration may have happened because RSEQ_SIG is
        defined.  */
-    extern ptrdiff_t offset __asm__ ("__rseq_offset");
-    offset = (char *) &pd->rseq_area - (char *) __thread_pointer ();
+    _rseq_offset = (char *) &pd->rseq_area - (char *) __thread_pointer ();
 #endif
   }
 
diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps
index d8cff3e077..9ad64d1d85 100644
--- a/sysdeps/riscv/nofpu/libm-test-ulps
+++ b/sysdeps/riscv/nofpu/libm-test-ulps
@@ -972,6 +972,11 @@ double: 2
 float: 1
 ldouble: 3
 
+Function: "exp10m1":
+double: 2
+float: 1
+ldouble: 1
+
 Function: "exp2":
 double: 1
 ldouble: 1
@@ -989,6 +994,11 @@ double: 1
 float: 1
 ldouble: 2
 
+Function: "exp2m1":
+double: 1
+float: 1
+ldouble: 1
+
 Function: "exp_downward":
 double: 1
 float: 1
@@ -1161,6 +1171,11 @@ double: 2
 float: 2
 ldouble: 1
 
+Function: "log10p1":
+double: 1
+float: 1
+ldouble: 3
+
 Function: "log1p":
 double: 1
 float: 1
@@ -1198,6 +1213,11 @@ Function: "log2_upward":
 double: 3
 ldouble: 1
 
+Function: "log2p1":
+double: 1
+float: 1
+ldouble: 3
+
 Function: "log_downward":
 ldouble: 1
 
diff --git a/sysdeps/unix/sysv/linux/hppa/sysdep.h b/sysdeps/unix/sysv/linux/hppa/sysdep.h
index af62f7501e..e47975e5cf 100644
--- a/sysdeps/unix/sysv/linux/hppa/sysdep.h
+++ b/sysdeps/unix/sysv/linux/hppa/sysdep.h
@@ -473,11 +473,8 @@ L(pre_end):					ASM_LINE_SEP	\
 
 #ifdef __LP64__
 # define HAVE_CLOCK_GETTIME_VSYSCALL    "__vdso_clock_gettime"
-# define HAVE_GETTIMEOFDAY_VSYSCALL     "__vdso_gettimeofday"
 #else
-# define HAVE_CLOCK_GETTIME_VSYSCALL    "__vdso_clock_gettime"
 # define HAVE_CLOCK_GETTIME64_VSYSCALL  "__vdso_clock_gettime64"
-# define HAVE_GETTIMEOFDAY_VSYSCALL     "__vdso_gettimeofday"
 #endif /* __LP64__ */
 
 #endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 3d7c2819d7..e501e084ef 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -1023,39 +1023,59 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
 
       model += extended_model;
       if (family == 0x6)
-        {
-          if (model == 0xf || model == 0x19)
-            {
+	{
+	  /* Tuning for older Zhaoxin processors.  */
+	  if (model == 0xf || model == 0x19)
+	    {
 	      CPU_FEATURE_UNSET (cpu_features, AVX);
 	      CPU_FEATURE_UNSET (cpu_features, AVX2);
 
-              cpu_features->preferred[index_arch_Slow_SSE4_2]
-                |= bit_arch_Slow_SSE4_2;
+	      cpu_features->preferred[index_arch_Slow_SSE4_2]
+		  |= bit_arch_Slow_SSE4_2;
 
+	      /*  Unaligned AVX loads are slower.  */
 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
-		&= ~bit_arch_AVX_Fast_Unaligned_Load;
-            }
-        }
+		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
+	    }
+	}
       else if (family == 0x7)
-        {
-	  if (model == 0x1b)
+	{
+	  switch (model)
 	    {
+	      /* Wudaokou microarch tuning.  */
+	    case 0x1b:
 	      CPU_FEATURE_UNSET (cpu_features, AVX);
 	      CPU_FEATURE_UNSET (cpu_features, AVX2);
 
 	      cpu_features->preferred[index_arch_Slow_SSE4_2]
-		|= bit_arch_Slow_SSE4_2;
+		  |= bit_arch_Slow_SSE4_2;
 
 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
-		&= ~bit_arch_AVX_Fast_Unaligned_Load;
-	    }
-	  else if (model == 0x3b)
-	    {
+		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
+	      break;
+
+	      /* Lujiazui microarch tuning.  */
+	    case 0x3b:
 	      CPU_FEATURE_UNSET (cpu_features, AVX);
 	      CPU_FEATURE_UNSET (cpu_features, AVX2);
 
 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
-		&= ~bit_arch_AVX_Fast_Unaligned_Load;
+		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
+	      break;
+
+	      /* Yongfeng and Shijidadao mircoarch tuning.  */
+	    case 0x5b:
+	      cpu_features->cachesize_non_temporal_divisor = 2;
+	    case 0x6b:
+	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
+
+	      /* To use sse2_unaligned versions of memset, strcpy and strcat.
+	       */
+	      cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
+		  |= (bit_arch_Prefer_No_VZEROUPPER
+		      | bit_arch_Fast_Unaligned_Load);
+	      break;
 	    }
 	}
     }
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index 3a6ec4ef9f..5e77345a6e 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -934,8 +934,10 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
   /* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
      a higher risk of actually thrashing the cache as they don't have a HW LRU
      hint. As well, their performance in highly parallel situations is
-     noticeably worse.  */
-  if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+     noticeably worse. Zhaoxin processors are an exception, the lowbound is not
+     suitable for them based on actual test data.  */
+  if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS)
+      && cpu_features->basic.kind != arch_kind_zhaoxin)
     non_temporal_threshold = non_temporal_threshold_lowbound;
   /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
      'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c
index 869023bbba..b3c1e4fcd7 100644
--- a/sysdeps/x86_64/dl-tls.c
+++ b/sysdeps/x86_64/dl-tls.c
@@ -41,7 +41,10 @@ __tls_get_addr_slow (GET_ADDR_ARGS)
   dtv_t *dtv = THREAD_DTV ();
 
   size_t gen = atomic_load_acquire (&GL(dl_tls_generation));
-  if (__glibc_unlikely (dtv[0].counter != gen))
+  if (__glibc_unlikely (dtv[0].counter != gen)
+      /* See comment in __tls_get_addr in elf/dl-tls.c.  */
+      && !(_dl_tls_allocate_active ()
+           && GET_ADDR_MODULE < _dl_tls_initial_modid_limit))
     return update_get_addr (GET_ADDR_PARAM, gen);
 
   return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S
index 048d015712..01008fd981 100644
--- a/sysdeps/x86_64/multiarch/memmove-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S
@@ -151,13 +151,10 @@ L(more_2x_vec):
 	   loop.  */
 	movups	%xmm0, (%rdi)
 
-# ifdef SHARED_CACHE_SIZE_HALF
-	cmp	$SHARED_CACHE_SIZE_HALF, %RDX_LP
-# else
-	cmp	__x86_shared_cache_size_half(%rip), %rdx
-# endif
+	cmp	__x86_shared_non_temporal_threshold(%rip), %rdx
 	ja	L(large_memcpy)
 
+L(loop_fwd):
 	leaq	-64(%rdi, %rdx), %r8
 	andq	$-16, %rdi
 	movl	$48, %edx
@@ -199,6 +196,13 @@ L(large_memcpy):
 	movups	-64(%r9, %rdx), %xmm10
 	movups	-80(%r9, %rdx), %xmm11
 
+	/* Check if src and dst overlap. If they do use cacheable
+	   writes to potentially gain positive interference between
+	   the loads during the memmove.  */
+	subq	%rdi, %r9
+	cmpq	%rdx, %r9
+	jb	L(loop_fwd)
+
 	sall	$5, %ecx
 	leal	(%rcx, %rcx, 2), %r8d
 	leaq	-96(%rdi, %rdx), %rcx