about summary refs log tree commit diff
path: root/elf/dl-tls.c
diff options
context:
space:
mode:
authorSzabolcs Nagy <szabolcs.nagy@arm.com>2021-02-16 12:55:13 +0000
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2021-04-13 08:43:40 +0100
commitb116855de71098ef7dd2875dd3237f8f3ecc12c2 (patch)
tree6da94b9ca548a5042d743626fdbe1990fa5221a2 /elf/dl-tls.c
parentf8ea2b9982e39fd950d157f5dba31121ceb51df3 (diff)
downloadglibc-nsz/bug19329-v2.tar.gz
glibc-nsz/bug19329-v2.tar.xz
glibc-nsz/bug19329-v2.zip
RFC elf: Fix slow tls access after dlopen [BZ #19924] nsz/bug19329-v2
In short: __tls_get_addr checks the global generation counter,
_dl_update_slotinfo updates up to the generation of the accessed
module. If the global generation is newer than geneneration of the
module then __tls_get_addr keeps hitting the slow path that updates
the dtv.

Possible approaches i can see:

1. update to global generation instead of module,
2. check the module generation in the fast path.

This patch is 1.: it needs additional sync (load acquire) so the
slotinfo list is up to date with the observed global generation.

Approach 2. would require walking the slotinfo list at all times.
I don't know how to make that fast with many modules.

Note: in the x86_64 version of dl-tls.c the generation is only loaded
once, since relaxed mo is not faster than acquire mo load.

I have not benchmarked this yet.
Diffstat (limited to 'elf/dl-tls.c')
-rw-r--r--elf/dl-tls.c28
1 files changed, 12 insertions, 16 deletions
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index b0257185e9..b51a4f3a19 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -701,7 +701,7 @@ allocate_and_init (struct link_map *map)
 
 
 struct link_map *
-_dl_update_slotinfo (unsigned long int req_modid)
+_dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
 {
   struct link_map *the_map = NULL;
   dtv_t *dtv = THREAD_DTV ();
@@ -718,19 +718,12 @@ _dl_update_slotinfo (unsigned long int req_modid)
      code and therefore add to the slotinfo list.  This is a problem
      since we must not pick up any information about incomplete work.
      The solution to this is to ignore all dtv slots which were
-     created after the one we are currently interested.  We know that
-     dynamic loading for this module is completed and this is the last
-     load operation we know finished.  */
-  unsigned long int idx = req_modid;
+     created after the generation we are interested in.  We know that
+     dynamic loading for this generation is completed and this is the
+     last load operation we know finished.  */
   struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
 
-  while (idx >= listp->len)
-    {
-      idx -= listp->len;
-      listp = listp->next;
-    }
-
-  if (dtv[0].counter < listp->slotinfo[idx].gen)
+  if (dtv[0].counter < new_gen)
     {
       /* CONCURRENCY NOTES:
 
@@ -751,7 +744,6 @@ _dl_update_slotinfo (unsigned long int req_modid)
 	 other entries are racy.  However updating a non-relevant dtv
 	 entry does not affect correctness.  For a relevant module m,
 	 max_modid >= modid of m.  */
-      size_t new_gen = listp->slotinfo[idx].gen;
       size_t total = 0;
       size_t max_modid  = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
       assert (max_modid >= req_modid);
@@ -894,9 +886,9 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
 
 static struct link_map *
 __attribute_noinline__
-update_get_addr (GET_ADDR_ARGS)
+update_get_addr (GET_ADDR_ARGS, size_t gen)
 {
-  struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE);
+  struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE, gen);
   dtv_t *dtv = THREAD_DTV ();
 
   void *p = dtv[GET_ADDR_MODULE].pointer.val;
@@ -931,7 +923,11 @@ __tls_get_addr (GET_ADDR_ARGS)
      by user code, see CONCURRENCY NOTES in _dl_update_slotinfo.  */
   size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
   if (__glibc_unlikely (dtv[0].counter != gen))
-    return update_get_addr (GET_ADDR_PARAM);
+    {
+// TODO: needs comment update if we rely on consistent generation with slotinfo
+      gen = atomic_load_acquire (&GL(dl_tls_generation));
+      return update_get_addr (GET_ADDR_PARAM, gen);
+    }
 
   void *p = dtv[GET_ADDR_MODULE].pointer.val;