summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Weimer <fweimer@redhat.com>2022-01-07 13:21:57 +0100
committerFlorian Weimer <fweimer@redhat.com>2022-01-07 13:21:57 +0100
commitacbaad31e8ea10fce8b9c0aef58afb388bf7489d (patch)
treeeb8a5902a2080aa094a823ef02a64f78c2da4d71
parentd5b0046e3ddf8ea82a3eff74068b8fd2665b98db (diff)
downloadglibc-acbaad31e8ea10fce8b9c0aef58afb388bf7489d.tar.gz
glibc-acbaad31e8ea10fce8b9c0aef58afb388bf7489d.tar.xz
glibc-acbaad31e8ea10fce8b9c0aef58afb388bf7489d.zip
elf: Fix fences in _dl_find_object_update (bug 28745)
As explained in Hans Boehm, Can Seqlocks Get Along with Programming
Language Memory Models?, an acquire fence is needed in
_dlfo_read_success.  The lack of a fence resulted in an observable
bug on powerpc64le compile-time load reordering.

The fence in _dlfo_mappings_begin_update has been reordered, turning
the fence/store sequence into a release MO store equivalent.

Relaxed MO loads are used on the reader side, and relaxed MO stores
on the writer side for the shared data, to avoid formal data races.
This is just to be conservative; it should not actually be necessary
given how the data is used.

This commit also fixes the test run time.  The intent was to run it
for 3 seconds, but 0.3 seconds was enough to uncover the bug very
occasionally (while 3 seconds did not reliably show the bug on every
test run).

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
-rw-r--r--elf/dl-find_object.c162
-rw-r--r--elf/dl-find_object.h44
-rw-r--r--elf/tst-dl_find_object-threads.c4
3 files changed, 133 insertions, 77 deletions
diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c
index 721fed50d6..0eb8607edd 100644
--- a/elf/dl-find_object.c
+++ b/elf/dl-find_object.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
+#include <atomic.h>
 #include <atomic_wide_counter.h>
 #include <dl-find_object.h>
 #include <dlfcn.h>
@@ -80,13 +81,18 @@ static struct dl_find_object_internal *_dlfo_nodelete_mappings
    over all segments, even though the data is not stored in one
    contiguous array.
 
-   During updates, the segments are overwritten in place, and a
-   software transactional memory construct (involving the
+   During updates, the segments are overwritten in place.  A software
+   transactional memory construct (involving the
    _dlfo_loaded_mappings_version variable) is used to detect
-   concurrent modification, and retry as necessary.  The memory
-   allocations are never deallocated, but slots used for objects that
-   have been dlclose'd can be reused by dlopen.  The memory can live
-   in the regular C malloc heap.
+   concurrent modification, and retry as necessary.  (This approach is
+   similar to seqlocks, except that two copies are used, and there is
+   only one writer, ever, due to the loader lock.)  Technically,
+   relaxed MO loads and stores need to be used for the shared TM data,
+   to avoid data races.
+
+   The memory allocations are never deallocated, but slots used for
+   objects that have been dlclose'd can be reused by dlopen.  The
+   memory can live in the regular C malloc heap.
 
    The segments are populated from the start of the list, with the
    mappings with the highest address.  Only if this segment is full,
@@ -101,17 +107,18 @@ static struct dl_find_object_internal *_dlfo_nodelete_mappings
    needed.  */
 struct dlfo_mappings_segment
 {
-  /* The previous segment has lower base addresses.  */
+  /* The previous segment has lower base addresses.  Constant after
+     initialization; read in the TM region.  */
   struct dlfo_mappings_segment *previous;
 
   /* Used by __libc_freeres to deallocate malloc'ed memory.  */
   void *to_free;
 
   /* Count of array elements in use and allocated.  */
-  size_t size;
+  size_t size;                  /* Read in the TM region.  */
   size_t allocated;
 
-  struct dl_find_object_internal objects[];
+  struct dl_find_object_internal objects[]; /* Read in the TM region.  */
 };
 
 /* To achieve async-signal-safety, two copies of the data structure
@@ -240,7 +247,8 @@ static inline uint64_t
 _dlfo_read_start_version (void)
 {
   /* Acquire MO load synchronizes with the fences at the beginning and
-     end of the TM update region.  */
+     end of the TM update region in _dlfo_mappings_begin_update,
+     _dlfo_mappings_end_update, _dlfo_mappings_end_update_no_switch.  */
   return __atomic_wide_counter_load_acquire (&_dlfo_loaded_mappings_version);
 }
 
@@ -258,34 +266,30 @@ _dlfo_read_version_locked (void)
 static inline unsigned int
 _dlfo_mappings_begin_update (void)
 {
-  unsigned int v
-    = __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
-                                               2);
-  /* Subsequent stores to the TM data must not be reordered before the
-     store above with the version update.  */
+  /* The store synchronizes with loads in _dlfo_read_start_version
+     (also called from _dlfo_read_success).  */
   atomic_thread_fence_release ();
-  return v & 1;
+  return __atomic_wide_counter_fetch_add_relaxed
+    (&_dlfo_loaded_mappings_version, 2);
 }
 
 /* Installs the just-updated version as the active version.  */
 static inline void
 _dlfo_mappings_end_update (void)
 {
-  /* The previous writes to the TM data must not be reordered after
-     the version update below.  */
+  /* The store synchronizes with loads in _dlfo_read_start_version
+     (also called from _dlfo_read_success).  */
   atomic_thread_fence_release ();
-  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
-                                           1);
+  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, 1);
 }
 /* Completes an in-place update without switching versions.  */
 static inline void
 _dlfo_mappings_end_update_no_switch (void)
 {
-  /* The previous writes to the TM data must not be reordered after
-     the version update below.  */
+  /* The store synchronizes with loads in _dlfo_read_start_version
+     (also called from _dlfo_read_success).  */
   atomic_thread_fence_release ();
-  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
-                                           2);
+  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, 2);
 }
 
 /* Return true if the read was successful, given the start
@@ -293,6 +297,19 @@ _dlfo_mappings_end_update_no_switch (void)
 static inline bool
 _dlfo_read_success (uint64_t start_version)
 {
+  /* See Hans Boehm, Can Seqlocks Get Along with Programming Language
+     Memory Models?, Section 4.  This is necessary so that loads in
+     the TM region are not ordered past the version check below.  */
+  atomic_thread_fence_acquire ();
+
+  /* Synchronizes with stores in _dlfo_mappings_begin_update,
+     _dlfo_mappings_end_update, _dlfo_mappings_end_update_no_switch.
+     It is important that all stores from the last update have been
+     visible, and stores from the next updates are not.
+
+     Unlike with seqlocks, there is no check for odd versions here
+     because we have read the unmodified copy (confirmed to be
+     unmodified by the unchanged version).  */
   return _dlfo_read_start_version () == start_version;
 }
 
@@ -318,7 +335,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
     {
       size_t half = size >> 1;
       struct dl_find_object_internal *middle = first + half;
-      if (middle->map_start < pc)
+      if (atomic_load_relaxed (&middle->map_start) < pc)
         {
           first = middle + 1;
           size -= half + 1;
@@ -327,9 +344,9 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
         size = half;
     }
 
-  if (first != end && pc == first->map_start)
+  if (first != end && pc == atomic_load_relaxed (&first->map_start))
     {
-      if (pc < first->map_end)
+      if (pc < atomic_load_relaxed (&first->map_end))
         return first;
       else
         /* Zero-length mapping after dlclose.  */
@@ -339,7 +356,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
     {
       /* Check to see if PC is in the previous mapping.  */
       --first;
-      if (pc < first->map_end)
+      if (pc < atomic_load_relaxed (&first->map_end))
         /* pc >= first->map_start implied by the search above.  */
         return first;
       else
@@ -408,39 +425,47 @@ _dl_find_object (void *pc1, struct dl_find_object *result)
          size on earlier unused segments.  */
       for (struct dlfo_mappings_segment *seg
              = _dlfo_mappings_active_segment (start_version);
-           seg != NULL && seg->size > 0; seg = seg->previous)
-        if (pc >= seg->objects[0].map_start)
-          {
-            /* PC may lie within this segment.  If it is less than the
-               segment start address, it can only lie in a previous
-               segment, due to the base address sorting.  */
-            struct dl_find_object_internal *obj
-              = _dlfo_lookup (pc, seg->objects, seg->size);
+           seg != NULL;
+           seg = atomic_load_acquire (&seg->previous))
+        {
+          size_t seg_size = atomic_load_relaxed (&seg->size);
+          if (seg_size == 0)
+            break;
 
-            if (obj != NULL)
-              {
-                /* Found the right mapping.  Copy out the data prior to
-                   checking if the read transaction was successful.  */
-                struct dl_find_object_internal copy = *obj;
-                if (_dlfo_read_success (start_version))
-                  {
-                    _dl_find_object_to_external (&copy, result);
-                    return 0;
-                  }
-                else
-                  /* Read transaction failure.  */
-                  goto retry;
-              }
-            else
-              {
-                /* PC is not covered by this mapping.  */
-                if (_dlfo_read_success (start_version))
-                  return -1;
-                else
-                  /* Read transaction failure.  */
-                  goto retry;
-              }
-          } /* if: PC might lie within the current seg.  */
+          if (pc >= atomic_load_relaxed (&seg->objects[0].map_start))
+            {
+              /* PC may lie within this segment.  If it is less than the
+                 segment start address, it can only lie in a previous
+                 segment, due to the base address sorting.  */
+              struct dl_find_object_internal *obj
+                = _dlfo_lookup (pc, seg->objects, seg_size);
+
+              if (obj != NULL)
+                {
+                  /* Found the right mapping.  Copy out the data prior to
+                     checking if the read transaction was successful.  */
+                  struct dl_find_object_internal copy;
+                  _dl_find_object_internal_copy (obj, &copy);
+                  if (_dlfo_read_success (start_version))
+                    {
+                      _dl_find_object_to_external (&copy, result);
+                      return 0;
+                    }
+                  else
+                    /* Read transaction failure.  */
+                    goto retry;
+                }
+              else
+                {
+                  /* PC is not covered by this mapping.  */
+                  if (_dlfo_read_success (start_version))
+                    return -1;
+                  else
+                    /* Read transaction failure.  */
+                    goto retry;
+                }
+            } /* if: PC might lie within the current seg.  */
+        }
 
       /* PC is not covered by any segment.  */
       if (_dlfo_read_success (start_version))
@@ -619,15 +644,19 @@ static inline size_t
 _dlfo_update_init_seg (struct dlfo_mappings_segment *seg,
                        size_t remaining_to_add)
 {
+  size_t new_seg_size;
   if (remaining_to_add < seg->allocated)
     /* Partially filled segment.  */
-    seg->size = remaining_to_add;
+    new_seg_size = remaining_to_add;
   else
-    seg->size = seg->allocated;
-  return seg->size;
+    new_seg_size = seg->allocated;
+  atomic_store_relaxed (&seg->size, new_seg_size);
+  return new_seg_size;
 }
 
-/* Invoked from _dl_find_object_update after sorting.  */
+/* Invoked from _dl_find_object_update after sorting.  Stores to the
+   shared data need to use relaxed MO.  But plain loads can be used
+   because the loader lock prevents concurrent stores.  */
 static bool
 _dl_find_object_update_1 (struct link_map **loaded, size_t count)
 {
@@ -727,7 +756,8 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count)
         {
           /* Prefer mapping in current_seg.  */
           assert (current_seg_index1 > 0);
-          *dlfo = current_seg->objects[current_seg_index1 - 1];
+          _dl_find_object_internal_copy
+            (&current_seg->objects[current_seg_index1 - 1], dlfo);
           --current_seg_index1;
         }
       else
@@ -753,7 +783,7 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count)
 
   /* Prevent searching further into unused segments.  */
   if (target_seg->previous != NULL)
-    target_seg->previous->size = 0;
+    atomic_store_relaxed (&target_seg->previous->size, 0);
 
   _dlfo_mappings_end_update ();
   return true;
diff --git a/elf/dl-find_object.h b/elf/dl-find_object.h
index 937d443581..3b49877e0e 100644
--- a/elf/dl-find_object.h
+++ b/elf/dl-find_object.h
@@ -20,6 +20,7 @@
 #define _DL_FIND_EH_FRAME_H
 
 #include <assert.h>
+#include <atomic.h>
 #include <dlfcn.h>
 #include <ldsodefs.h>
 #include <stdbool.h>
@@ -44,6 +45,30 @@ struct dl_find_object_internal
 #endif
 };
 
+/* Create a copy of *SOURCE in *COPY using relaxed MO loads and
+   stores.  */
+static inline void
+_dl_find_object_internal_copy (const struct dl_find_object_internal *source,
+                               struct dl_find_object_internal *copy)
+{
+  atomic_store_relaxed (&copy->map_start,
+                        atomic_load_relaxed (&source->map_start));
+  atomic_store_relaxed (&copy->map_end,
+                        atomic_load_relaxed (&source->map_end));
+  atomic_store_relaxed (&copy->map,
+                        atomic_load_relaxed (&source->map));
+  atomic_store_relaxed (&copy->eh_frame,
+                        atomic_load_relaxed (&source->eh_frame));
+#if DLFO_STRUCT_HAS_EH_DBASE
+  atomic_store_relaxed (&copy->eh_dbase,
+                        atomic_load_relaxed (&source->eh_dbase));
+#endif
+#if DLFO_STRUCT_HAS_EH_COUNT
+  atomic_store_relaxed (&copy->eh_count,
+                        atomic_load_relaxed (&source->eh_count));
+#endif
+}
+
 static inline void
 _dl_find_object_to_external (struct dl_find_object_internal *internal,
                              struct dl_find_object *external)
@@ -62,34 +87,35 @@ _dl_find_object_to_external (struct dl_find_object_internal *internal,
 }
 
 /* Extract the object location data from a link map and writes it to
-   *RESULT.  */
+   *RESULT using relaxed MO stores.  */
 static void __attribute__ ((unused))
 _dl_find_object_from_map (struct link_map *l,
                           struct dl_find_object_internal *result)
 {
-  result->map_start = (uintptr_t) l->l_map_start;
-  result->map_end = (uintptr_t) l->l_map_end;
-  result->map = l;
+  atomic_store_relaxed (&result->map_start, (uintptr_t) l->l_map_start);
+  atomic_store_relaxed (&result->map_end, (uintptr_t) l->l_map_end);
+  atomic_store_relaxed (&result->map, l);
 
 #if DLFO_STRUCT_HAS_EH_DBASE
-  result->eh_dbase = (void *) l->l_info[DT_PLTGOT];
+  atomic_store_relaxed (&result->eh_dbase, (void *) l->l_info[DT_PLTGOT]);
 #endif
 
   for (const ElfW(Phdr) *ph = l->l_phdr, *ph_end = l->l_phdr + l->l_phnum;
        ph < ph_end; ++ph)
     if (ph->p_type == DLFO_EH_SEGMENT_TYPE)
       {
-        result->eh_frame = (void *) (ph->p_vaddr + l->l_addr);
+        atomic_store_relaxed (&result->eh_frame,
+                              (void *) (ph->p_vaddr + l->l_addr));
 #if DLFO_STRUCT_HAS_EH_COUNT
-        result->eh_count = ph->p_memsz / 8;
+        atomic_store_relaxed (&result->eh_count, ph->p_memsz / 8);
 #endif
         return;
       }
 
   /* Object has no exception handling segment.  */
-  result->eh_frame = NULL;
+  atomic_store_relaxed (&result->eh_frame, NULL);
 #if DLFO_STRUCT_HAS_EH_COUNT
-  result->eh_count = 0;
+  atomic_store_relaxed (&result->eh_count, 0);
 #endif
 }
 
diff --git a/elf/tst-dl_find_object-threads.c b/elf/tst-dl_find_object-threads.c
index de3b468fb8..331b90f6ec 100644
--- a/elf/tst-dl_find_object-threads.c
+++ b/elf/tst-dl_find_object-threads.c
@@ -138,12 +138,12 @@ check (void *address, struct dl_find_object *expected, int line)
 #endif
 }
 
-/* Request process termination after 3 seconds.  */
+/* Request process termination after 0.3 seconds.  */
 static bool exit_requested;
 static void *
 exit_thread (void *ignored)
 {
-  usleep (3 * 100 * 1000);
+  usleep (300 * 1000);
   __atomic_store_n (&exit_requested, true,  __ATOMIC_RELAXED);
   return NULL;
 }