about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--csu/libc-tls.c7
-rw-r--r--elf/Makefile6
-rw-r--r--elf/dl-support.c9
-rw-r--r--elf/dl-thread_gscope_wait.c2
-rw-r--r--elf/rtld.c12
-rw-r--r--nptl/allocatestack.c147
-rw-r--r--nptl/descr.h3
-rw-r--r--nptl/nptl-init.c11
-rw-r--r--nptl/pthreadP.h4
-rw-r--r--nptl/pthread_create.c8
-rw-r--r--nptl_db/structs.def4
-rw-r--r--nptl_db/td_init.c14
-rw-r--r--nptl_db/td_ta_map_lwp2thr.c2
-rw-r--r--nptl_db/td_ta_thr_iter.c4
-rw-r--r--nptl_db/td_thr_tlsbase.c7
-rw-r--r--nptl_db/td_thr_validate.c39
-rw-r--r--nptl_db/thread_dbP.h13
-rw-r--r--sysdeps/aarch64/nptl/tls.h2
-rw-r--r--sysdeps/alpha/nptl/tls.h2
-rw-r--r--sysdeps/arc/nptl/tls.h2
-rw-r--r--sysdeps/arm/nptl/tls.h2
-rw-r--r--sysdeps/csky/nptl/tls.h2
-rw-r--r--sysdeps/generic/ldsodefs.h19
-rw-r--r--sysdeps/hppa/nptl/tls.h2
-rw-r--r--sysdeps/i386/nptl/tls.h2
-rw-r--r--sysdeps/ia64/nptl/tls.h2
-rw-r--r--sysdeps/m68k/nptl/tls.h2
-rw-r--r--sysdeps/microblaze/nptl/tls.h2
-rw-r--r--sysdeps/mips/nptl/tls.h2
-rw-r--r--sysdeps/nios2/nptl/tls.h2
-rw-r--r--sysdeps/nptl/dl-thread_gscope_wait.c80
-rw-r--r--sysdeps/powerpc/nptl/tls.h2
-rw-r--r--sysdeps/riscv/nptl/tls.h2
-rw-r--r--sysdeps/s390/nptl/tls.h2
-rw-r--r--sysdeps/sh/nptl/tls.h2
-rw-r--r--sysdeps/sparc/nptl/tls.h2
-rw-r--r--sysdeps/x86_64/nptl/tls.h2
37 files changed, 243 insertions, 184 deletions
diff --git a/csu/libc-tls.c b/csu/libc-tls.c
index 06e76bd395..c3589f0a7d 100644
--- a/csu/libc-tls.c
+++ b/csu/libc-tls.c
@@ -24,6 +24,7 @@
 #include <stdio.h>
 #include <sys/param.h>
 #include <array_length.h>
+#include <list.h>
 
 #ifdef SHARED
  #error makefile bug, this file is for static only
@@ -193,6 +194,12 @@ __libc_setup_tls (void)
   if (__builtin_expect (lossage != NULL, 0))
     _startup_fatal (lossage);
 
+#if THREAD_GSCOPE_IN_TCB
+  INIT_LIST_HEAD (&_dl_stack_used);
+  INIT_LIST_HEAD (&_dl_stack_user);
+  list_add (&THREAD_SELF->list, &_dl_stack_user);
+#endif
+
   /* Update the executable's link map with enough information to make
      the TLS routines happy.  */
   main_map->l_tls_align = align;
diff --git a/elf/Makefile b/elf/Makefile
index f10cc59e7c..2015383eb2 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -34,7 +34,8 @@ dl-routines	= $(addprefix dl-,load lookup object reloc deps \
 				  version profile tls origin scope \
 				  execstack open close trampoline \
 				  exception sort-maps lookup-direct \
-				  call-libc-early-init write)
+				  call-libc-early-init write \
+				  thread_gscope_wait)
 ifeq (yes,$(use-ldconfig))
 dl-routines += dl-cache
 endif
@@ -54,7 +55,8 @@ endif
 all-dl-routines = $(dl-routines) $(sysdep-dl-routines)
 # But they are absent from the shared libc, because that code is in ld.so.
 elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \
-		    dl-sysdep dl-exception dl-reloc-static-pie
+		    dl-sysdep dl-exception dl-reloc-static-pie \
+		    thread_gscope_wait
 
 # ld.so uses those routines, plus some special stuff for being the program
 # interpreter and operating independent of libc.
diff --git a/elf/dl-support.c b/elf/dl-support.c
index afbc94df54..15e6d787c9 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -183,10 +183,11 @@ ElfW(Word) _dl_stack_flags = DEFAULT_STACK_PERMS;
 int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable;
 
 
-/* Function in libpthread to wait for termination of lookups.  */
-void (*_dl_wait_lookup_done) (void);
-
-#if !THREAD_GSCOPE_IN_TCB
+#if THREAD_GSCOPE_IN_TCB
+list_t _dl_stack_used;
+list_t _dl_stack_user;
+int _dl_stack_cache_lock;
+#else
 int _dl_thread_gscope_count;
 #endif
 struct dl_scope_free_list *_dl_scope_free_list;
diff --git a/elf/dl-thread_gscope_wait.c b/elf/dl-thread_gscope_wait.c
new file mode 100644
index 0000000000..28e1494730
--- /dev/null
+++ b/elf/dl-thread_gscope_wait.c
@@ -0,0 +1,2 @@
+/* By default, the dynamic linker does not use an out-of-line
+   __thread_gscope_wait function.  */
diff --git a/elf/rtld.c b/elf/rtld.c
index 5d117d0d2c..c4ffc8d4b7 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -48,6 +48,7 @@
 #include <array_length.h>
 #include <libc-early-init.h>
 #include <dl-main.h>
+#include <list.h>
 
 #include <assert.h>
 
@@ -799,6 +800,9 @@ cannot allocate TLS data structures for initial thread\n");
   const char *lossage = TLS_INIT_TP (tcbp);
   if (__glibc_unlikely (lossage != NULL))
     _dl_fatal_printf ("cannot set up thread-local storage: %s\n", lossage);
+#if THREAD_GSCOPE_IN_TCB
+  list_add (&THREAD_SELF->list, &GL (dl_stack_user));
+#endif
   tls_init_tp_called = true;
 
   return tcbp;
@@ -1139,6 +1143,11 @@ dl_main (const ElfW(Phdr) *phdr,
   GL(dl_rtld_unlock_recursive) = rtld_lock_default_unlock_recursive;
 #endif
 
+#if THREAD_GSCOPE_IN_TCB
+  INIT_LIST_HEAD (&GL (dl_stack_used));
+  INIT_LIST_HEAD (&GL (dl_stack_user));
+#endif
+
   /* The explicit initialization here is cheaper than processing the reloc
      in the _rtld_local definition's initializer.  */
   GL(dl_make_stack_executable_hook) = &_dl_make_stack_executable;
@@ -2383,6 +2392,9 @@ dl_main (const ElfW(Phdr) *phdr,
       if (__glibc_unlikely (lossage != NULL))
 	_dl_fatal_printf ("cannot set up thread-local storage: %s\n",
 			  lossage);
+#if THREAD_GSCOPE_IN_TCB
+      list_add (&THREAD_SELF->list, &GL (dl_stack_user));
+#endif
     }
 
   /* Make sure no new search directories have been added.  */
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 4b45f8c884..b7f9eeebf6 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -106,26 +106,14 @@
 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
 static size_t stack_cache_actsize;
 
-/* Mutex protecting this variable.  */
-static int stack_cache_lock = LLL_LOCK_INITIALIZER;
-
 /* List of queued stack frames.  */
 static LIST_HEAD (stack_cache);
 
-/* List of the stacks in use.  */
-static LIST_HEAD (stack_used);
-
 /* We need to record what list operations we are going to do so that,
    in case of an asynchronous interruption due to a fork() call, we
    can correct for the work.  */
 static uintptr_t in_flight_stack;
 
-/* List of the threads with user provided stacks in use.  No need to
-   initialize this, since it's done in __pthread_initialize_minimal.  */
-list_t __stack_user __attribute__ ((nocommon));
-hidden_data_def (__stack_user)
-
-
 /* Check whether the stack is still used or not.  */
 #define FREE_P(descr) ((descr)->tid <= 0)
 
@@ -173,7 +161,7 @@ get_cached_stack (size_t *sizep, void **memp)
   struct pthread *result = NULL;
   list_t *entry;
 
-  lll_lock (stack_cache_lock, LLL_PRIVATE);
+  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
   /* Search the cache for a matching entry.  We search for the
      smallest stack which has at least the required size.  Note that
@@ -206,7 +194,7 @@ get_cached_stack (size_t *sizep, void **memp)
       || __builtin_expect (result->stackblock_size > 4 * size, 0))
     {
       /* Release the lock.  */
-      lll_unlock (stack_cache_lock, LLL_PRIVATE);
+      lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
       return NULL;
     }
@@ -218,13 +206,13 @@ get_cached_stack (size_t *sizep, void **memp)
   stack_list_del (&result->list);
 
   /* And add to the list of stacks in use.  */
-  stack_list_add (&result->list, &stack_used);
+  stack_list_add (&result->list, &GL (dl_stack_used));
 
   /* And decrease the cache size.  */
   stack_cache_actsize -= result->stackblock_size;
 
   /* Release the lock early.  */
-  lll_unlock (stack_cache_lock, LLL_PRIVATE);
+  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
   /* Report size and location of the stack to the caller.  */
   *sizep = result->stackblock_size;
@@ -510,12 +498,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 
 
       /* Prepare to modify global data.  */
-      lll_lock (stack_cache_lock, LLL_PRIVATE);
+      lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
       /* And add to the list of stacks in use.  */
-      list_add (&pd->list, &__stack_user);
+      list_add (&pd->list, &GL (dl_stack_user));
 
-      lll_unlock (stack_cache_lock, LLL_PRIVATE);
+      lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
     }
   else
     {
@@ -644,12 +632,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 
 
 	  /* Prepare to modify global data.  */
-	  lll_lock (stack_cache_lock, LLL_PRIVATE);
+	  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
 	  /* And add to the list of stacks in use.  */
-	  stack_list_add (&pd->list, &stack_used);
+	  stack_list_add (&pd->list, &GL (dl_stack_used));
 
-	  lll_unlock (stack_cache_lock, LLL_PRIVATE);
+	  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
 
 	  /* There might have been a race.  Another thread might have
@@ -690,12 +678,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 	  if (__mprotect (guard, guardsize, PROT_NONE) != 0)
 	    {
 	    mprot_error:
-	      lll_lock (stack_cache_lock, LLL_PRIVATE);
+	      lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
 	      /* Remove the thread from the list.  */
 	      stack_list_del (&pd->list);
 
-	      lll_unlock (stack_cache_lock, LLL_PRIVATE);
+	      lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
 	      /* Get rid of the TLS block we allocated.  */
 	      _dl_deallocate_tls (TLS_TPADJ (pd), false);
@@ -799,7 +787,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 void
 __deallocate_stack (struct pthread *pd)
 {
-  lll_lock (stack_cache_lock, LLL_PRIVATE);
+  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
   /* Remove the thread from the list of threads with user defined
      stacks.  */
@@ -815,7 +803,7 @@ __deallocate_stack (struct pthread *pd)
     /* Free the memory associated with the ELF TLS.  */
     _dl_deallocate_tls (TLS_TPADJ (pd), false);
 
-  lll_unlock (stack_cache_lock, LLL_PRIVATE);
+  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 }
 
 
@@ -831,10 +819,10 @@ __make_stacks_executable (void **stack_endp)
   const size_t pagemask = ~(__getpagesize () - 1);
 #endif
 
-  lll_lock (stack_cache_lock, LLL_PRIVATE);
+  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
   list_t *runp;
-  list_for_each (runp, &stack_used)
+  list_for_each (runp, &GL (dl_stack_used))
     {
       err = change_stack_perm (list_entry (runp, struct pthread, list)
 #ifdef NEED_SEPARATE_REGISTER_STACK
@@ -860,7 +848,7 @@ __make_stacks_executable (void **stack_endp)
 	  break;
       }
 
-  lll_unlock (stack_cache_lock, LLL_PRIVATE);
+  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
   return err;
 }
@@ -891,8 +879,8 @@ __reclaim_stacks (void)
 	     pointers at the head of the list are inconsistent.  */
 	  list_t *l = NULL;
 
-	  if (stack_used.next->prev != &stack_used)
-	    l = &stack_used;
+	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
+	    l = &GL (dl_stack_used);
 	  else if (stack_cache.next->prev != &stack_cache)
 	    l = &stack_cache;
 
@@ -914,7 +902,7 @@ __reclaim_stacks (void)
 
   /* Mark all stacks except the still running one as free.  */
   list_t *runp;
-  list_for_each (runp, &stack_used)
+  list_for_each (runp, &GL (dl_stack_used))
     {
       struct pthread *curp = list_entry (runp, struct pthread, list);
       if (curp != self)
@@ -948,7 +936,7 @@ __reclaim_stacks (void)
     }
 
   /* Add the stack of all running threads to the cache.  */
-  list_splice (&stack_used, &stack_cache);
+  list_splice (&GL (dl_stack_used), &stack_cache);
 
   /* Remove the entry for the current thread to from the cache list
      and add it to the list of running threads.  Which of the two
@@ -956,13 +944,13 @@ __reclaim_stacks (void)
   stack_list_del (&self->list);
 
   /* Re-initialize the lists for all the threads.  */
-  INIT_LIST_HEAD (&stack_used);
-  INIT_LIST_HEAD (&__stack_user);
+  INIT_LIST_HEAD (&GL (dl_stack_used));
+  INIT_LIST_HEAD (&GL (dl_stack_user));
 
   if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
-    list_add (&self->list, &__stack_user);
+    list_add (&self->list, &GL (dl_stack_user));
   else
-    list_add (&self->list, &stack_used);
+    list_add (&self->list, &GL (dl_stack_used));
 
   /* There is one thread running.  */
   __nptl_nthreads = 1;
@@ -970,7 +958,7 @@ __reclaim_stacks (void)
   in_flight_stack = 0;
 
   /* Initialize locks.  */
-  stack_cache_lock = LLL_LOCK_INITIALIZER;
+  GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
   __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
 }
 
@@ -1083,7 +1071,7 @@ __nptl_setxid (struct xid_command *cmdp)
 {
   int signalled;
   int result;
-  lll_lock (stack_cache_lock, LLL_PRIVATE);
+  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
   __xidcmd = cmdp;
   cmdp->cntr = 0;
@@ -1093,7 +1081,7 @@ __nptl_setxid (struct xid_command *cmdp)
 
   /* Iterate over the list with system-allocated threads first.  */
   list_t *runp;
-  list_for_each (runp, &stack_used)
+  list_for_each (runp, &GL (dl_stack_used))
     {
       struct pthread *t = list_entry (runp, struct pthread, list);
       if (t == self)
@@ -1103,7 +1091,7 @@ __nptl_setxid (struct xid_command *cmdp)
     }
 
   /* Now the list with threads using user-allocated stacks.  */
-  list_for_each (runp, &__stack_user)
+  list_for_each (runp, &GL (dl_stack_user))
     {
       struct pthread *t = list_entry (runp, struct pthread, list);
       if (t == self)
@@ -1119,7 +1107,7 @@ __nptl_setxid (struct xid_command *cmdp)
     {
       signalled = 0;
 
-      list_for_each (runp, &stack_used)
+      list_for_each (runp, &GL (dl_stack_used))
 	{
 	  struct pthread *t = list_entry (runp, struct pthread, list);
 	  if (t == self)
@@ -1128,7 +1116,7 @@ __nptl_setxid (struct xid_command *cmdp)
 	  signalled += setxid_signal_thread (cmdp, t);
 	}
 
-      list_for_each (runp, &__stack_user)
+      list_for_each (runp, &GL (dl_stack_user))
 	{
 	  struct pthread *t = list_entry (runp, struct pthread, list);
 	  if (t == self)
@@ -1149,7 +1137,7 @@ __nptl_setxid (struct xid_command *cmdp)
 
   /* Clean up flags, so that no thread blocks during exit waiting
      for a signal which will never come.  */
-  list_for_each (runp, &stack_used)
+  list_for_each (runp, &GL (dl_stack_used))
     {
       struct pthread *t = list_entry (runp, struct pthread, list);
       if (t == self)
@@ -1158,7 +1146,7 @@ __nptl_setxid (struct xid_command *cmdp)
       setxid_unmark_thread (cmdp, t);
     }
 
-  list_for_each (runp, &__stack_user)
+  list_for_each (runp, &GL (dl_stack_user))
     {
       struct pthread *t = list_entry (runp, struct pthread, list);
       if (t == self)
@@ -1180,7 +1168,7 @@ __nptl_setxid (struct xid_command *cmdp)
     }
   __nptl_setxid_error (cmdp, error);
 
-  lll_unlock (stack_cache_lock, LLL_PRIVATE);
+  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
   return result;
 }
 
@@ -1204,75 +1192,16 @@ void
 attribute_hidden
 __pthread_init_static_tls (struct link_map *map)
 {
-  lll_lock (stack_cache_lock, LLL_PRIVATE);
+  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
   /* Iterate over the list with system-allocated threads first.  */
   list_t *runp;
-  list_for_each (runp, &stack_used)
+  list_for_each (runp, &GL (dl_stack_used))
     init_one_static_tls (list_entry (runp, struct pthread, list), map);
 
   /* Now the list with threads using user-allocated stacks.  */
-  list_for_each (runp, &__stack_user)
+  list_for_each (runp, &GL (dl_stack_user))
     init_one_static_tls (list_entry (runp, struct pthread, list), map);
 
-  lll_unlock (stack_cache_lock, LLL_PRIVATE);
-}
-
-
-void
-attribute_hidden
-__wait_lookup_done (void)
-{
-  lll_lock (stack_cache_lock, LLL_PRIVATE);
-
-  struct pthread *self = THREAD_SELF;
-
-  /* Iterate over the list with system-allocated threads first.  */
-  list_t *runp;
-  list_for_each (runp, &stack_used)
-    {
-      struct pthread *t = list_entry (runp, struct pthread, list);
-      if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
-	continue;
-
-      int *const gscope_flagp = &t->header.gscope_flag;
-
-      /* We have to wait until this thread is done with the global
-	 scope.  First tell the thread that we are waiting and
-	 possibly have to be woken.  */
-      if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
-						THREAD_GSCOPE_FLAG_WAIT,
-						THREAD_GSCOPE_FLAG_USED))
-	continue;
-
-      do
-	futex_wait_simple ((unsigned int *) gscope_flagp,
-			   THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
-      while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
-    }
-
-  /* Now the list with threads using user-allocated stacks.  */
-  list_for_each (runp, &__stack_user)
-    {
-      struct pthread *t = list_entry (runp, struct pthread, list);
-      if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
-	continue;
-
-      int *const gscope_flagp = &t->header.gscope_flag;
-
-      /* We have to wait until this thread is done with the global
-	 scope.  First tell the thread that we are waiting and
-	 possibly have to be woken.  */
-      if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
-						THREAD_GSCOPE_FLAG_WAIT,
-						THREAD_GSCOPE_FLAG_USED))
-	continue;
-
-      do
-	futex_wait_simple ((unsigned int *) gscope_flagp,
-			   THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
-      while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
-    }
-
-  lll_unlock (stack_cache_lock, LLL_PRIVATE);
+  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 }
diff --git a/nptl/descr.h b/nptl/descr.h
index d8343ff9a1..b172ee408b 100644
--- a/nptl/descr.h
+++ b/nptl/descr.h
@@ -162,7 +162,8 @@ struct pthread
     void *__padding[24];
   };
 
-  /* This descriptor's link on the `stack_used' or `__stack_user' list.  */
+  /* This descriptor's link on the GL (dl_stack_used) or
+     GL (dl_stack_user) list.  */
   list_t list;
 
   /* Thread ID - which is also a 'is this thread descriptor (and
diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
index 4aa1231bec..53b817715d 100644
--- a/nptl/nptl-init.c
+++ b/nptl/nptl-init.c
@@ -251,12 +251,9 @@ __pthread_initialize_minimal_internal (void)
      purposes this is good enough.  */
   THREAD_SETMEM (pd, stackblock_size, (size_t) __libc_stack_end);
 
-  /* Initialize the list of all running threads with the main thread.  */
-  INIT_LIST_HEAD (&__stack_user);
-  list_add (&pd->list, &__stack_user);
-
-  /* Before initializing __stack_user, the debugger could not find us and
-     had to set __nptl_initial_report_events.  Propagate its setting.  */
+  /* Before initializing GL (dl_stack_user), the debugger could not
+     find us and had to set __nptl_initial_report_events.  Propagate
+     its setting.  */
   THREAD_SETMEM (pd, report_events, __nptl_initial_report_events);
 
   struct sigaction sa;
@@ -336,8 +333,6 @@ __pthread_initialize_minimal_internal (void)
 
   GL(dl_init_static_tls) = &__pthread_init_static_tls;
 
-  GL(dl_wait_lookup_done) = &__wait_lookup_done;
-
   /* Register the fork generation counter with the libc.  */
 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
   __libc_multiple_threads_ptr =
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index 686f54aa93..a7510f9f63 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -208,10 +208,6 @@ extern void __default_pthread_attr_freeres (void) attribute_hidden;
 extern size_t __static_tls_size attribute_hidden;
 extern size_t __static_tls_align_m1 attribute_hidden;
 
-/* Thread descriptor handling.  */
-extern list_t __stack_user;
-hidden_proto (__stack_user)
-
 /* Attribute handling.  */
 extern struct pthread_attr *__attr_list attribute_hidden;
 extern int __attr_list_lock attribute_hidden;
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index 447f005ece..bad4e57a84 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -213,9 +213,9 @@ __find_in_stack_list (struct pthread *pd)
   list_t *entry;
   struct pthread *result = NULL;
 
-  lll_lock (stack_cache_lock, LLL_PRIVATE);
+  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
-  list_for_each (entry, &stack_used)
+  list_for_each (entry, &GL (dl_stack_used))
     {
       struct pthread *curp;
 
@@ -228,7 +228,7 @@ __find_in_stack_list (struct pthread *pd)
     }
 
   if (result == NULL)
-    list_for_each (entry, &__stack_user)
+    list_for_each (entry, &GL (dl_stack_user))
       {
 	struct pthread *curp;
 
@@ -240,7 +240,7 @@ __find_in_stack_list (struct pthread *pd)
 	  }
       }
 
-  lll_unlock (stack_cache_lock, LLL_PRIVATE);
+  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
   return result;
 }
diff --git a/nptl_db/structs.def b/nptl_db/structs.def
index 472e9008fa..2ca33598b1 100644
--- a/nptl_db/structs.def
+++ b/nptl_db/structs.def
@@ -69,8 +69,6 @@ DB_STRUCT (td_eventbuf_t)
 DB_STRUCT_FIELD (td_eventbuf_t, eventnum)
 DB_STRUCT_FIELD (td_eventbuf_t, eventdata)
 
-DB_SYMBOL (stack_used)
-DB_SYMBOL (__stack_user)
 DB_SYMBOL (nptl_version)
 DB_FUNCTION (__nptl_create_event)
 DB_FUNCTION (__nptl_death_event)
@@ -106,6 +104,8 @@ DB_STRUCT (rtld_global)
 DB_RTLD_VARIABLE (_rtld_global)
 #endif
 DB_RTLD_GLOBAL_FIELD (dl_tls_dtv_slotinfo_list)
+DB_RTLD_GLOBAL_FIELD (dl_stack_user)
+DB_RTLD_GLOBAL_FIELD (dl_stack_used)
 
 DB_STRUCT (dtv_slotinfo_list)
 DB_STRUCT_FIELD (dtv_slotinfo_list, len)
diff --git a/nptl_db/td_init.c b/nptl_db/td_init.c
index 86773dcb06..79a1f90312 100644
--- a/nptl_db/td_init.c
+++ b/nptl_db/td_init.c
@@ -29,3 +29,17 @@ td_init (void)
   LOG ("td_init");
   return TD_OK;
 }
+
+bool
+__td_ta_rtld_global (td_thragent_t *ta)
+{
+  if (ta->ta_addr__rtld_global == 0
+      && td_mod_lookup (ta->ph, LD_SO, SYM__rtld_global,
+                        &ta->ta_addr__rtld_global) != PS_OK)
+    {
+      ta->ta_addr__rtld_global = (void*)-1;
+      return false;
+    }
+  else
+    return ta->ta_addr__rtld_global != (void*)-1;
+}
diff --git a/nptl_db/td_ta_map_lwp2thr.c b/nptl_db/td_ta_map_lwp2thr.c
index d04d4bec23..81ff48808c 100644
--- a/nptl_db/td_ta_map_lwp2thr.c
+++ b/nptl_db/td_ta_map_lwp2thr.c
@@ -187,7 +187,7 @@ td_ta_map_lwp2thr (const td_thragent_t *ta_arg,
      fake a special descriptor for the initial thread.  */
 
   psaddr_t list;
-  td_err_e err = DB_GET_SYMBOL (list, ta, __stack_user);
+  td_err_e err = __td_ta_stack_user (ta, &list);
   if (err != TD_OK)
     return err;
 
diff --git a/nptl_db/td_ta_thr_iter.c b/nptl_db/td_ta_thr_iter.c
index d59782bd6e..e406bcbd39 100644
--- a/nptl_db/td_ta_thr_iter.c
+++ b/nptl_db/td_ta_thr_iter.c
@@ -133,14 +133,14 @@ td_ta_thr_iter (const td_thragent_t *ta_arg, td_thr_iter_f *callback,
      have to iterate over both lists separately.  We start with the
      list of threads with user-defined stacks.  */
 
-  err = DB_GET_SYMBOL (list, ta, __stack_user);
+  err = __td_ta_stack_user (ta, &list);
   if (err == TD_OK)
     err = iterate_thread_list (ta, callback, cbdata_p, state, ti_pri,
 			       list, true);
 
   /* And the threads with stacks allocated by the implementation.  */
   if (err == TD_OK)
-    err = DB_GET_SYMBOL (list, ta, stack_used);
+    err = __td_ta_stack_used (ta, &list);
   if (err == TD_OK)
     err = iterate_thread_list (ta, callback, cbdata_p, state, ti_pri,
 			       list, false);
diff --git a/nptl_db/td_thr_tlsbase.c b/nptl_db/td_thr_tlsbase.c
index 8198934112..eeba0a04d8 100644
--- a/nptl_db/td_thr_tlsbase.c
+++ b/nptl_db/td_thr_tlsbase.c
@@ -28,12 +28,7 @@ dtv_slotinfo_list (td_thragent_t *ta,
   td_err_e err;
   psaddr_t head;
 
-  if (ta->ta_addr__rtld_global == 0
-      && td_mod_lookup (ta->ph, LD_SO, SYM__rtld_global,
-			&ta->ta_addr__rtld_global) != PS_OK)
-    ta->ta_addr__rtld_global = (void*)-1;
-
-  if (ta->ta_addr__rtld_global != (void*)-1)
+  if (__td_ta_rtld_global (ta))
     {
       err = DB_GET_FIELD (head, ta, ta->ta_addr__rtld_global,
 			  rtld_global, _dl_tls_dtv_slotinfo_list, 0);
diff --git a/nptl_db/td_thr_validate.c b/nptl_db/td_thr_validate.c
index c709c5e9be..d2f4107cf2 100644
--- a/nptl_db/td_thr_validate.c
+++ b/nptl_db/td_thr_validate.c
@@ -20,6 +20,41 @@
 #include "thread_dbP.h"
 #include <stdbool.h>
 
+td_err_e
+__td_ta_stack_user (td_thragent_t *ta, psaddr_t *plist)
+{
+  if (__td_ta_rtld_global (ta))
+    return DB_GET_FIELD_ADDRESS (*plist, ta, ta->ta_addr__rtld_global,
+				 rtld_global, _dl_stack_user, 0);
+  else
+    {
+      if (ta->ta_addr__dl_stack_user == 0
+	  && td_mod_lookup (ta->ph, NULL, SYM__dl_stack_user,
+			    &ta->ta_addr__dl_stack_user) != PS_OK)
+	return TD_ERR;
+      *plist = ta->ta_addr__dl_stack_user;
+      return TD_OK;
+    }
+}
+
+td_err_e
+__td_ta_stack_used (td_thragent_t *ta, psaddr_t *plist)
+{
+
+  if (__td_ta_rtld_global (ta))
+    return DB_GET_FIELD_ADDRESS (*plist, ta, ta->ta_addr__rtld_global,
+				 rtld_global, _dl_stack_used, 0);
+  else
+    {
+      if (ta->ta_addr__dl_stack_used == 0
+	  && td_mod_lookup (ta->ph, NULL, SYM__dl_stack_used,
+			    &ta->ta_addr__dl_stack_used) != PS_OK)
+	return TD_ERR;
+      *plist = ta->ta_addr__dl_stack_used;
+      return TD_OK;
+    }
+}
+
 static td_err_e
 check_thread_list (const td_thrhandle_t *th, psaddr_t head, bool *uninit)
 {
@@ -62,7 +97,7 @@ td_thr_validate (const td_thrhandle_t *th)
 
   /* First check the list with threads using user allocated stacks.  */
   bool uninit = false;
-  err = DB_GET_SYMBOL (list, th->th_ta_p, __stack_user);
+  err = __td_ta_stack_user (th->th_ta_p, &list);
   if (err == TD_OK)
     err = check_thread_list (th, list, &uninit);
 
@@ -70,7 +105,7 @@ td_thr_validate (const td_thrhandle_t *th)
      using implementation allocated stacks.  */
   if (err == TD_NOTHR)
     {
-      err = DB_GET_SYMBOL (list, th->th_ta_p, stack_used);
+      err = __td_ta_stack_used (th->th_ta_p, &list);
       if (err == TD_OK)
 	err = check_thread_list (th, list, &uninit);
 
diff --git a/nptl_db/thread_dbP.h b/nptl_db/thread_dbP.h
index 8a666b8d4f..9574e68d70 100644
--- a/nptl_db/thread_dbP.h
+++ b/nptl_db/thread_dbP.h
@@ -269,4 +269,17 @@ extern td_err_e _td_check_sizeof (td_thragent_t *ta, uint32_t *sizep,
 extern td_err_e __td_ta_lookup_th_unique (const td_thragent_t *ta,
 					  lwpid_t lwpid, td_thrhandle_t *th);
 
+/* Try to initialize TA->ta_addr__rtld_global.  Return true on
+   success, false on failure (which may be cached).  */
+bool __td_ta_rtld_global (td_thragent_t *ta) attribute_hidden;
+
+/* Obtain the address of the list_t fields _dl_stack_user and
+   _dl_stack_used in _rtld_global, or fall back to the global
+   variables of the same name (to support statically linked
+   programs).  */
+td_err_e __td_ta_stack_user (td_thragent_t *ta, psaddr_t *plist)
+  attribute_hidden;
+td_err_e __td_ta_stack_used (td_thragent_t *ta, psaddr_t *plist)
+  attribute_hidden;
+
 #endif /* thread_dbP.h */
diff --git a/sysdeps/aarch64/nptl/tls.h b/sysdeps/aarch64/nptl/tls.h
index e5627f777e..dbef4704f1 100644
--- a/sysdeps/aarch64/nptl/tls.h
+++ b/sysdeps/aarch64/nptl/tls.h
@@ -129,8 +129,6 @@ typedef struct
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-# define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 # endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/alpha/nptl/tls.h b/sysdeps/alpha/nptl/tls.h
index 82549607fd..f3101f57c9 100644
--- a/sysdeps/alpha/nptl/tls.h
+++ b/sysdeps/alpha/nptl/tls.h
@@ -123,8 +123,6 @@ typedef struct
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #else /* __ASSEMBLER__ */
 # include <tcb-offsets.h>
diff --git a/sysdeps/arc/nptl/tls.h b/sysdeps/arc/nptl/tls.h
index 184b550ab5..cc5abb1931 100644
--- a/sysdeps/arc/nptl/tls.h
+++ b/sysdeps/arc/nptl/tls.h
@@ -131,8 +131,6 @@ typedef struct
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* !__ASSEMBLER__ */
 
diff --git a/sysdeps/arm/nptl/tls.h b/sysdeps/arm/nptl/tls.h
index 24a488ff37..91dd7498e4 100644
--- a/sysdeps/arm/nptl/tls.h
+++ b/sysdeps/arm/nptl/tls.h
@@ -120,8 +120,6 @@ typedef struct
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/csky/nptl/tls.h b/sysdeps/csky/nptl/tls.h
index bcca9674a1..f308773d40 100644
--- a/sysdeps/csky/nptl/tls.h
+++ b/sysdeps/csky/nptl/tls.h
@@ -147,8 +147,6 @@ typedef struct
       atomic_write_barrier ();						      \
     }									      \
   while (0)
-# define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 382eeb9be0..b1da03cafe 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -38,6 +38,7 @@
 #include <libc-lock.h>
 #include <hp-timing.h>
 #include <tls.h>
+#include <list_t.h>
 
 __BEGIN_DECLS
 
@@ -461,15 +462,22 @@ struct rtld_global
 
   EXTERN void (*_dl_init_static_tls) (struct link_map *);
 
-  EXTERN void (*_dl_wait_lookup_done) (void);
-
   /* Scopes to free after next THREAD_GSCOPE_WAIT ().  */
   EXTERN struct dl_scope_free_list
   {
     size_t count;
     void *list[50];
   } *_dl_scope_free_list;
-#if !THREAD_GSCOPE_IN_TCB
+#if THREAD_GSCOPE_IN_TCB
+  /* List of active thread stacks, with memory managed by glibc.  */
+  EXTERN list_t _dl_stack_used;
+
+  /* List of thread stacks that were allocated by the application.  */
+  EXTERN list_t _dl_stack_user;
+
+  /* Mutex protecting the stack lists.  */
+  EXTERN int _dl_stack_cache_lock;
+#else
   EXTERN int _dl_thread_gscope_count;
 #endif
 #ifdef SHARED
@@ -1252,6 +1260,11 @@ link_map_audit_state (struct link_map *l, size_t index)
 }
 #endif /* SHARED */
 
+#if THREAD_GSCOPE_IN_TCB
+void __thread_gscope_wait (void) attribute_hidden;
+# define THREAD_GSCOPE_WAIT() __thread_gscope_wait ()
+#endif
+
 __END_DECLS
 
 #endif /* ldsodefs.h */
diff --git a/sysdeps/hppa/nptl/tls.h b/sysdeps/hppa/nptl/tls.h
index 2315340735..f319cb42e2 100644
--- a/sysdeps/hppa/nptl/tls.h
+++ b/sysdeps/hppa/nptl/tls.h
@@ -154,8 +154,6 @@ static inline void __set_cr27(struct pthread *cr27)
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* !__ASSEMBLER__ */
 
diff --git a/sysdeps/i386/nptl/tls.h b/sysdeps/i386/nptl/tls.h
index 5042d52b98..b74347bacd 100644
--- a/sysdeps/i386/nptl/tls.h
+++ b/sysdeps/i386/nptl/tls.h
@@ -387,8 +387,6 @@ tls_fill_user_desc (union user_desc_init *desc,
   while (0)
 #define THREAD_GSCOPE_SET_FLAG() \
   THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/ia64/nptl/tls.h b/sysdeps/ia64/nptl/tls.h
index f5b1684ef3..42b082dad6 100644
--- a/sysdeps/ia64/nptl/tls.h
+++ b/sysdeps/ia64/nptl/tls.h
@@ -175,8 +175,6 @@ register struct pthread *__thread_self __asm__("r13");
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/m68k/nptl/tls.h b/sysdeps/m68k/nptl/tls.h
index 68ea952e79..69e174484e 100644
--- a/sysdeps/m68k/nptl/tls.h
+++ b/sysdeps/m68k/nptl/tls.h
@@ -153,8 +153,6 @@ extern void * __m68k_read_tp (void);
       atomic_write_barrier ();						\
     }									\
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/microblaze/nptl/tls.h b/sysdeps/microblaze/nptl/tls.h
index a094fdb798..78f63019dc 100644
--- a/sysdeps/microblaze/nptl/tls.h
+++ b/sysdeps/microblaze/nptl/tls.h
@@ -136,8 +136,6 @@ typedef struct
       atomic_write_barrier ();                                              \
     }                                                                       \
   while (0)
-# define THREAD_GSCOPE_WAIT() \
-  GL (dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/mips/nptl/tls.h b/sysdeps/mips/nptl/tls.h
index 8b55f19c37..7e7ac43d6c 100644
--- a/sysdeps/mips/nptl/tls.h
+++ b/sysdeps/mips/nptl/tls.h
@@ -178,8 +178,6 @@ typedef struct
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/nios2/nptl/tls.h b/sysdeps/nios2/nptl/tls.h
index facb27c761..776fe9bb2d 100644
--- a/sysdeps/nios2/nptl/tls.h
+++ b/sysdeps/nios2/nptl/tls.h
@@ -157,8 +157,6 @@ register struct pthread *__thread_self __asm__("r23");
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/nptl/dl-thread_gscope_wait.c b/sysdeps/nptl/dl-thread_gscope_wait.c
new file mode 100644
index 0000000000..a9bfbee24c
--- /dev/null
+++ b/sysdeps/nptl/dl-thread_gscope_wait.c
@@ -0,0 +1,80 @@
+/* Out-of-line notification function for the GSCOPE locking mechanism.
+   Copyright (C) 2007-2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <nptl/descr.h>
+#include <futex-internal.h>
+#include <ldsodefs.h>
+#include <list.h>
+#include <lowlevellock.h>
+
+void
+__thread_gscope_wait (void)
+{
+  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
+
+  struct pthread *self = THREAD_SELF;
+
+  /* Iterate over the list with system-allocated threads first.  */
+  list_t *runp;
+  list_for_each (runp, &GL (dl_stack_used))
+    {
+      struct pthread *t = list_entry (runp, struct pthread, list);
+      if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
+        continue;
+
+      int *const gscope_flagp = &t->header.gscope_flag;
+
+      /* We have to wait until this thread is done with the global
+         scope.  First tell the thread that we are waiting and
+         possibly have to be woken.  */
+      if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
+                                                THREAD_GSCOPE_FLAG_WAIT,
+                                                THREAD_GSCOPE_FLAG_USED))
+        continue;
+
+      do
+        futex_wait_simple ((unsigned int *) gscope_flagp,
+                           THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
+      while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
+    }
+
+  /* Now the list with threads using user-allocated stacks.  */
+  list_for_each (runp, &GL (dl_stack_user))
+    {
+      struct pthread *t = list_entry (runp, struct pthread, list);
+      if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
+        continue;
+
+      int *const gscope_flagp = &t->header.gscope_flag;
+
+      /* We have to wait until this thread is done with the global
+         scope.  First tell the thread that we are waiting and
+         possibly have to be woken.  */
+      if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
+                                                THREAD_GSCOPE_FLAG_WAIT,
+                                                THREAD_GSCOPE_FLAG_USED))
+        continue;
+
+      do
+        futex_wait_simple ((unsigned int *) gscope_flagp,
+                           THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
+      while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
+    }
+
+  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
+}
diff --git a/sysdeps/powerpc/nptl/tls.h b/sysdeps/powerpc/nptl/tls.h
index b1f0b30fbb..261eecfd18 100644
--- a/sysdeps/powerpc/nptl/tls.h
+++ b/sysdeps/powerpc/nptl/tls.h
@@ -245,8 +245,6 @@ register void *__thread_register __asm__ ("r13");
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/riscv/nptl/tls.h b/sysdeps/riscv/nptl/tls.h
index a9167bc143..41d9db10cf 100644
--- a/sysdeps/riscv/nptl/tls.h
+++ b/sysdeps/riscv/nptl/tls.h
@@ -139,8 +139,6 @@ typedef struct
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-# define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/s390/nptl/tls.h b/sysdeps/s390/nptl/tls.h
index 4a9b3570eb..7653109617 100644
--- a/sysdeps/s390/nptl/tls.h
+++ b/sysdeps/s390/nptl/tls.h
@@ -185,8 +185,6 @@ typedef struct
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/sh/nptl/tls.h b/sysdeps/sh/nptl/tls.h
index 6ccad251f9..2d5e3731b2 100644
--- a/sysdeps/sh/nptl/tls.h
+++ b/sysdeps/sh/nptl/tls.h
@@ -161,8 +161,6 @@ typedef struct
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/sysdeps/sparc/nptl/tls.h b/sysdeps/sparc/nptl/tls.h
index 18a548e86b..b08a156131 100644
--- a/sysdeps/sparc/nptl/tls.h
+++ b/sysdeps/sparc/nptl/tls.h
@@ -158,8 +158,6 @@ register struct pthread *__thread_self __asm__("%g7");
       atomic_write_barrier ();						     \
     }									     \
   while (0)
-#define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* !ASSEMBLER */
 
diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h
index fbd7f9cb89..a08bf972de 100644
--- a/sysdeps/x86_64/nptl/tls.h
+++ b/sysdeps/x86_64/nptl/tls.h
@@ -332,8 +332,6 @@ _Static_assert (offsetof (tcbhead_t, __glibc_unused2) == 0x80,
   while (0)
 # define THREAD_GSCOPE_SET_FLAG() \
   THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED)
-# define THREAD_GSCOPE_WAIT() \
-  GL(dl_wait_lookup_done) ()
 
 #endif /* __ASSEMBLER__ */