about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--elf/dl-find_object.c162
-rw-r--r--elf/dl-find_object.h44
-rw-r--r--elf/tst-dl_find_object-threads.c4
3 files changed, 133 insertions, 77 deletions
diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c
index 721fed50d6..0eb8607edd 100644
--- a/elf/dl-find_object.c
+++ b/elf/dl-find_object.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
+#include <atomic.h>
 #include <atomic_wide_counter.h>
 #include <dl-find_object.h>
 #include <dlfcn.h>
@@ -80,13 +81,18 @@ static struct dl_find_object_internal *_dlfo_nodelete_mappings
    over all segments, even though the data is not stored in one
    contiguous array.
 
-   During updates, the segments are overwritten in place, and a
-   software transactional memory construct (involving the
+   During updates, the segments are overwritten in place.  A software
+   transactional memory construct (involving the
    _dlfo_loaded_mappings_version variable) is used to detect
-   concurrent modification, and retry as necessary.  The memory
-   allocations are never deallocated, but slots used for objects that
-   have been dlclose'd can be reused by dlopen.  The memory can live
-   in the regular C malloc heap.
+   concurrent modification, and retry as necessary.  (This approach is
+   similar to seqlocks, except that two copies are used, and there is
+   only one writer, ever, due to the loader lock.)  Technically,
+   relaxed MO loads and stores need to be used for the shared TM data,
+   to avoid data races.
+
+   The memory allocations are never deallocated, but slots used for
+   objects that have been dlclose'd can be reused by dlopen.  The
+   memory can live in the regular C malloc heap.
 
    The segments are populated from the start of the list, with the
    mappings with the highest address.  Only if this segment is full,
@@ -101,17 +107,18 @@ static struct dl_find_object_internal *_dlfo_nodelete_mappings
    needed.  */
 struct dlfo_mappings_segment
 {
-  /* The previous segment has lower base addresses.  */
+  /* The previous segment has lower base addresses.  Constant after
+     initialization; read in the TM region.  */
   struct dlfo_mappings_segment *previous;
 
   /* Used by __libc_freeres to deallocate malloc'ed memory.  */
   void *to_free;
 
   /* Count of array elements in use and allocated.  */
-  size_t size;
+  size_t size;                  /* Read in the TM region.  */
   size_t allocated;
 
-  struct dl_find_object_internal objects[];
+  struct dl_find_object_internal objects[]; /* Read in the TM region.  */
 };
 
 /* To achieve async-signal-safety, two copies of the data structure
@@ -240,7 +247,8 @@ static inline uint64_t
 _dlfo_read_start_version (void)
 {
   /* Acquire MO load synchronizes with the fences at the beginning and
-     end of the TM update region.  */
+     end of the TM update region in _dlfo_mappings_begin_update,
+     _dlfo_mappings_end_update, _dlfo_mappings_end_update_no_switch.  */
   return __atomic_wide_counter_load_acquire (&_dlfo_loaded_mappings_version);
 }
 
@@ -258,34 +266,30 @@ _dlfo_read_version_locked (void)
 static inline unsigned int
 _dlfo_mappings_begin_update (void)
 {
-  unsigned int v
-    = __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
-                                               2);
-  /* Subsequent stores to the TM data must not be reordered before the
-     store above with the version update.  */
+  /* The store synchronizes with loads in _dlfo_read_start_version
+     (also called from _dlfo_read_success).  */
   atomic_thread_fence_release ();
-  return v & 1;
+  return __atomic_wide_counter_fetch_add_relaxed
+    (&_dlfo_loaded_mappings_version, 2);
 }
 
 /* Installs the just-updated version as the active version.  */
 static inline void
 _dlfo_mappings_end_update (void)
 {
-  /* The previous writes to the TM data must not be reordered after
-     the version update below.  */
+  /* The store synchronizes with loads in _dlfo_read_start_version
+     (also called from _dlfo_read_success).  */
   atomic_thread_fence_release ();
-  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
-                                           1);
+  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, 1);
 }
 /* Completes an in-place update without switching versions.  */
 static inline void
 _dlfo_mappings_end_update_no_switch (void)
 {
-  /* The previous writes to the TM data must not be reordered after
-     the version update below.  */
+  /* The store synchronizes with loads in _dlfo_read_start_version
+     (also called from _dlfo_read_success).  */
   atomic_thread_fence_release ();
-  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
-                                           2);
+  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, 2);
 }
 
 /* Return true if the read was successful, given the start
@@ -293,6 +297,19 @@ _dlfo_mappings_end_update_no_switch (void)
 static inline bool
 _dlfo_read_success (uint64_t start_version)
 {
+  /* See Hans Boehm, Can Seqlocks Get Along with Programming Language
+     Memory Models?, Section 4.  This is necessary so that loads in
+     the TM region are not ordered past the version check below.  */
+  atomic_thread_fence_acquire ();
+
+  /* Synchronizes with stores in _dlfo_mappings_begin_update,
+     _dlfo_mappings_end_update, _dlfo_mappings_end_update_no_switch.
+     It is important that all stores from the last update have been
+     visible, and stores from the next updates are not.
+
+     Unlike with seqlocks, there is no check for odd versions here
+     because we have read the unmodified copy (confirmed to be
+     unmodified by the unchanged version).  */
   return _dlfo_read_start_version () == start_version;
 }
 
@@ -318,7 +335,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
     {
       size_t half = size >> 1;
       struct dl_find_object_internal *middle = first + half;
-      if (middle->map_start < pc)
+      if (atomic_load_relaxed (&middle->map_start) < pc)
         {
           first = middle + 1;
           size -= half + 1;
@@ -327,9 +344,9 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
         size = half;
     }
 
-  if (first != end && pc == first->map_start)
+  if (first != end && pc == atomic_load_relaxed (&first->map_start))
     {
-      if (pc < first->map_end)
+      if (pc < atomic_load_relaxed (&first->map_end))
         return first;
       else
         /* Zero-length mapping after dlclose.  */
@@ -339,7 +356,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
     {
       /* Check to see if PC is in the previous mapping.  */
       --first;
-      if (pc < first->map_end)
+      if (pc < atomic_load_relaxed (&first->map_end))
         /* pc >= first->map_start implied by the search above.  */
         return first;
       else
@@ -408,39 +425,47 @@ _dl_find_object (void *pc1, struct dl_find_object *result)
          size on earlier unused segments.  */
       for (struct dlfo_mappings_segment *seg
              = _dlfo_mappings_active_segment (start_version);
-           seg != NULL && seg->size > 0; seg = seg->previous)
-        if (pc >= seg->objects[0].map_start)
-          {
-            /* PC may lie within this segment.  If it is less than the
-               segment start address, it can only lie in a previous
-               segment, due to the base address sorting.  */
-            struct dl_find_object_internal *obj
-              = _dlfo_lookup (pc, seg->objects, seg->size);
+           seg != NULL;
+           seg = atomic_load_acquire (&seg->previous))
+        {
+          size_t seg_size = atomic_load_relaxed (&seg->size);
+          if (seg_size == 0)
+            break;
 
-            if (obj != NULL)
-              {
-                /* Found the right mapping.  Copy out the data prior to
-                   checking if the read transaction was successful.  */
-                struct dl_find_object_internal copy = *obj;
-                if (_dlfo_read_success (start_version))
-                  {
-                    _dl_find_object_to_external (&copy, result);
-                    return 0;
-                  }
-                else
-                  /* Read transaction failure.  */
-                  goto retry;
-              }
-            else
-              {
-                /* PC is not covered by this mapping.  */
-                if (_dlfo_read_success (start_version))
-                  return -1;
-                else
-                  /* Read transaction failure.  */
-                  goto retry;
-              }
-          } /* if: PC might lie within the current seg.  */
+          if (pc >= atomic_load_relaxed (&seg->objects[0].map_start))
+            {
+              /* PC may lie within this segment.  If it is less than the
+                 segment start address, it can only lie in a previous
+                 segment, due to the base address sorting.  */
+              struct dl_find_object_internal *obj
+                = _dlfo_lookup (pc, seg->objects, seg_size);
+
+              if (obj != NULL)
+                {
+                  /* Found the right mapping.  Copy out the data prior to
+                     checking if the read transaction was successful.  */
+                  struct dl_find_object_internal copy;
+                  _dl_find_object_internal_copy (obj, &copy);
+                  if (_dlfo_read_success (start_version))
+                    {
+                      _dl_find_object_to_external (&copy, result);
+                      return 0;
+                    }
+                  else
+                    /* Read transaction failure.  */
+                    goto retry;
+                }
+              else
+                {
+                  /* PC is not covered by this mapping.  */
+                  if (_dlfo_read_success (start_version))
+                    return -1;
+                  else
+                    /* Read transaction failure.  */
+                    goto retry;
+                }
+            } /* if: PC might lie within the current seg.  */
+        }
 
       /* PC is not covered by any segment.  */
       if (_dlfo_read_success (start_version))
@@ -619,15 +644,19 @@ static inline size_t
 _dlfo_update_init_seg (struct dlfo_mappings_segment *seg,
                        size_t remaining_to_add)
 {
+  size_t new_seg_size;
   if (remaining_to_add < seg->allocated)
     /* Partially filled segment.  */
-    seg->size = remaining_to_add;
+    new_seg_size = remaining_to_add;
   else
-    seg->size = seg->allocated;
-  return seg->size;
+    new_seg_size = seg->allocated;
+  atomic_store_relaxed (&seg->size, new_seg_size);
+  return new_seg_size;
 }
 
-/* Invoked from _dl_find_object_update after sorting.  */
+/* Invoked from _dl_find_object_update after sorting.  Stores to the
+   shared data need to use relaxed MO.  But plain loads can be used
+   because the loader lock prevents concurrent stores.  */
 static bool
 _dl_find_object_update_1 (struct link_map **loaded, size_t count)
 {
@@ -727,7 +756,8 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count)
         {
           /* Prefer mapping in current_seg.  */
           assert (current_seg_index1 > 0);
-          *dlfo = current_seg->objects[current_seg_index1 - 1];
+          _dl_find_object_internal_copy
+            (&current_seg->objects[current_seg_index1 - 1], dlfo);
           --current_seg_index1;
         }
       else
@@ -753,7 +783,7 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count)
 
   /* Prevent searching further into unused segments.  */
   if (target_seg->previous != NULL)
-    target_seg->previous->size = 0;
+    atomic_store_relaxed (&target_seg->previous->size, 0);
 
   _dlfo_mappings_end_update ();
   return true;
diff --git a/elf/dl-find_object.h b/elf/dl-find_object.h
index 937d443581..3b49877e0e 100644
--- a/elf/dl-find_object.h
+++ b/elf/dl-find_object.h
@@ -20,6 +20,7 @@
 #define _DL_FIND_EH_FRAME_H
 
 #include <assert.h>
+#include <atomic.h>
 #include <dlfcn.h>
 #include <ldsodefs.h>
 #include <stdbool.h>
@@ -44,6 +45,30 @@ struct dl_find_object_internal
 #endif
 };
 
+/* Create a copy of *SOURCE in *COPY using relaxed MO loads and
+   stores.  */
+static inline void
+_dl_find_object_internal_copy (const struct dl_find_object_internal *source,
+                               struct dl_find_object_internal *copy)
+{
+  atomic_store_relaxed (&copy->map_start,
+                        atomic_load_relaxed (&source->map_start));
+  atomic_store_relaxed (&copy->map_end,
+                        atomic_load_relaxed (&source->map_end));
+  atomic_store_relaxed (&copy->map,
+                        atomic_load_relaxed (&source->map));
+  atomic_store_relaxed (&copy->eh_frame,
+                        atomic_load_relaxed (&source->eh_frame));
+#if DLFO_STRUCT_HAS_EH_DBASE
+  atomic_store_relaxed (&copy->eh_dbase,
+                        atomic_load_relaxed (&source->eh_dbase));
+#endif
+#if DLFO_STRUCT_HAS_EH_COUNT
+  atomic_store_relaxed (&copy->eh_count,
+                        atomic_load_relaxed (&source->eh_count));
+#endif
+}
+
 static inline void
 _dl_find_object_to_external (struct dl_find_object_internal *internal,
                              struct dl_find_object *external)
@@ -62,34 +87,35 @@ _dl_find_object_to_external (struct dl_find_object_internal *internal,
 }
 
 /* Extract the object location data from a link map and writes it to
-   *RESULT.  */
+   *RESULT using relaxed MO stores.  */
 static void __attribute__ ((unused))
 _dl_find_object_from_map (struct link_map *l,
                           struct dl_find_object_internal *result)
 {
-  result->map_start = (uintptr_t) l->l_map_start;
-  result->map_end = (uintptr_t) l->l_map_end;
-  result->map = l;
+  atomic_store_relaxed (&result->map_start, (uintptr_t) l->l_map_start);
+  atomic_store_relaxed (&result->map_end, (uintptr_t) l->l_map_end);
+  atomic_store_relaxed (&result->map, l);
 
 #if DLFO_STRUCT_HAS_EH_DBASE
-  result->eh_dbase = (void *) l->l_info[DT_PLTGOT];
+  atomic_store_relaxed (&result->eh_dbase, (void *) l->l_info[DT_PLTGOT]);
 #endif
 
   for (const ElfW(Phdr) *ph = l->l_phdr, *ph_end = l->l_phdr + l->l_phnum;
        ph < ph_end; ++ph)
     if (ph->p_type == DLFO_EH_SEGMENT_TYPE)
       {
-        result->eh_frame = (void *) (ph->p_vaddr + l->l_addr);
+        atomic_store_relaxed (&result->eh_frame,
+                              (void *) (ph->p_vaddr + l->l_addr));
 #if DLFO_STRUCT_HAS_EH_COUNT
-        result->eh_count = ph->p_memsz / 8;
+        atomic_store_relaxed (&result->eh_count, ph->p_memsz / 8);
 #endif
         return;
       }
 
   /* Object has no exception handling segment.  */
-  result->eh_frame = NULL;
+  atomic_store_relaxed (&result->eh_frame, NULL);
 #if DLFO_STRUCT_HAS_EH_COUNT
-  result->eh_count = 0;
+  atomic_store_relaxed (&result->eh_count, 0);
 #endif
 }
 
diff --git a/elf/tst-dl_find_object-threads.c b/elf/tst-dl_find_object-threads.c
index de3b468fb8..331b90f6ec 100644
--- a/elf/tst-dl_find_object-threads.c
+++ b/elf/tst-dl_find_object-threads.c
@@ -138,12 +138,12 @@ check (void *address, struct dl_find_object *expected, int line)
 #endif
 }
 
-/* Request process termination after 3 seconds.  */
+/* Request process termination after 0.3 seconds.  */
 static bool exit_requested;
 static void *
 exit_thread (void *ignored)
 {
-  usleep (3 * 100 * 1000);
+  usleep (300 * 1000);
   __atomic_store_n (&exit_requested, true,  __ATOMIC_RELAXED);
   return NULL;
 }