about summary refs log tree commit diff
path: root/elf/dl-find_object.c
diff options
context:
space:
mode:
authorFlorian Weimer <fweimer@redhat.com>2021-12-28 22:52:56 +0100
committerFlorian Weimer <fweimer@redhat.com>2021-12-28 22:52:56 +0100
commit5d28a8962dcb6ec056b81d730e3c6fb57185a210 (patch)
tree3d714aaef575deba322fa5a1e29c76c6f96dc850 /elf/dl-find_object.c
parent83b8d5027d2f80c4603cd706da95d6c9a09a4e16 (diff)
downloadglibc-5d28a8962dcb6ec056b81d730e3c6fb57185a210.tar.gz
glibc-5d28a8962dcb6ec056b81d730e3c6fb57185a210.tar.xz
glibc-5d28a8962dcb6ec056b81d730e3c6fb57185a210.zip
elf: Add _dl_find_object function
It can be used to speed up the libgcc unwinder, and the internal
_dl_find_dso_for_object function (which is used for caller
identification in dlopen and related functions, and in dladdr).

_dl_find_object is in the internal namespace due to bug 28503.
If libgcc switches to _dl_find_object, this namespace issue will
be fixed.  It is located in libc for two reasons: it is necessary
to forward the call to the static libc after static dlopen, and
there is a link ordering issue with -static-libgcc and libgcc_eh.a
because libc.so is not a linker script that includes ld.so in the
glibc build tree (so that GCC's internal -lc after libgcc_eh.a does
not pick up ld.so).

It is necessary to do the i386 customization in the
sysdeps/x86/bits/dl_find_object.h header shared with x86-64 because
otherwise, multilib installations are broken.

The implementation uses software transactional memory, as suggested
by Torvald Riegel.  Two copies of the supporting data structures are
used, also achieving full async-signal-safety.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
Diffstat (limited to 'elf/dl-find_object.c')
-rw-r--r--elf/dl-find_object.c842
1 files changed, 842 insertions, 0 deletions
diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c
new file mode 100644
index 0000000000..324f40742d
--- /dev/null
+++ b/elf/dl-find_object.c
@@ -0,0 +1,842 @@
+/* Locating objects in the process image.  ld.so implementation.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <atomic_wide_counter.h>
+#include <dl-find_object.h>
+#include <dlfcn.h>
+#include <ldsodefs.h>
+#include <link.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* Fallback implementation of _dl_find_object.  It uses a linear
+   search, needs locking, and is not async-signal-safe.  It is used in
+   _dl_find_object prior to initialization, when called from audit
+   modules.  It also serves as the reference implementation for
+   _dl_find_object.  */
+static int
+_dl_find_object_slow (void *pc, struct dl_find_object *result)
+{
+  ElfW(Addr) addr = (ElfW(Addr)) pc;
+  for (Lmid_t ns = 0; ns < GL(dl_nns); ++ns)
+    for (struct link_map *l = GL(dl_ns)[ns]._ns_loaded; l != NULL;
+         l = l->l_next)
+      if (addr >= l->l_map_start && addr < l->l_map_end
+          && (l->l_contiguous || _dl_addr_inside_object (l, addr)))
+        {
+          assert (ns == l->l_ns);
+          struct dl_find_object_internal internal;
+          _dl_find_object_from_map (l, &internal);
+          _dl_find_object_to_external (&internal, result);
+          return 1;
+        }
+
+  /* Object not found.  */
+  return -1;
+}
+
+/* Data for the main executable.  There is usually a large gap between
+   the main executable and initially loaded shared objects.  Record
+   the main executable separately, to increase the chance that the
+   range for the non-closeable mappings below covers only the shared
+   objects (and not also the gap between main executable and shared
+   objects).  */
+static struct dl_find_object_internal _dlfo_main attribute_relro;
+
+/* Data for initially loaded shared objects that cannot be unloaded.
+   (This may also contain non-contiguous mappings from the main
+   executable.)  The mappings are stored in address order in the
+   _dlfo_nodelete_mappings array (containing
+   _dlfo_nodelete_mappings_size elements).  It is not modified after
+   initialization.  */
+static uintptr_t _dlfo_nodelete_mappings_end attribute_relro;
+static size_t _dlfo_nodelete_mappings_size attribute_relro;
+static struct dl_find_object_internal *_dlfo_nodelete_mappings
+  attribute_relro;
+
+/* Mappings created by dlopen can go away with dlclose, so a dynamic
+   data structure with some synchronization is needed.  Individual
+   segments are similar to the _dlfo_nodelete_mappings array above.
+   The previous segment contains lower addresses and is at most half
+   as long.  Checking the address of the base address of the first
+   element during a lookup can therefore approximate a binary search
+   over all segments, even though the data is not stored in one
+   contiguous array.
+
+   During updates, the segments are overwritten in place, and a
+   software transactional memory construct (involving the
+   _dlfo_loaded_mappings_version variable) is used to detect
+   concurrent modification, and retry as necessary.  The memory
+   allocations are never deallocated, but slots used for objects that
+   have been dlclose'd can be reused by dlopen.  The memory can live
+   in the regular C malloc heap.
+
+   The segments are populated from the start of the list, with the
+   mappings with the highest address.  Only if this segment is full,
+   previous segments are used for mappings at lower addresses.  The
+   remaining segments are populated as needed, but after allocating
+   further segments, some of the initial segments (at the end of the
+   linked list) can be empty (with size 0).
+
+   Adding new elements to this data structure is another source of
+   quadratic behavior for dlopen.  If the other causes of quadratic
+   behavior are eliminated, a more complicated data structure will be
+   needed.  */
+struct dlfo_mappings_segment
+{
+  /* The previous segment has lower base addresses.  */
+  struct dlfo_mappings_segment *previous;
+
+  /* Used by __libc_freeres to deallocate malloc'ed memory.  */
+  void *to_free;
+
+  /* Count of array elements in use and allocated.  */
+  size_t size;
+  size_t allocated;
+
+  struct dl_find_object_internal objects[];
+};
+
+/* To achieve async-signal-safety, two copies of the data structure
+   are used, so that a signal handler can still use this data even if
+   dlopen or dlclose modify the other copy.  The the MSB in
+   _dlfo_loaded_mappings_version determines which array element is the
+   currently active region.  */
+static struct dlfo_mappings_segment *_dlfo_loaded_mappings[2];
+
+/* Returns the number of actually used elements in all segments
+   starting at SEG.  */
+static inline size_t
+_dlfo_mappings_segment_count_used (struct dlfo_mappings_segment *seg)
+{
+  size_t count = 0;
+  for (; seg != NULL && seg->size > 0; seg = seg->previous)
+    for (size_t i = 0; i < seg->size; ++i)
+      /* Exclude elements which have been dlclose'd.  */
+      count += seg->objects[i].map != NULL;
+  return count;
+}
+
+/* Compute the total number of available allocated segments linked
+   from SEG.  */
+static inline size_t
+_dlfo_mappings_segment_count_allocated (struct dlfo_mappings_segment *seg)
+{
+  size_t count = 0;
+  for (; seg != NULL; seg = seg->previous)
+    count += seg->allocated;
+  return count;
+}
+
+/* This is essentially an arbitrary value.  dlopen allocates plenty of
+   memory anyway, so over-allocated a bit does not hurt.  Not having
+   many small-ish segments helps to avoid many small binary searches.
+   Not using a power of 2 means that we do not waste an extra page
+   just for the malloc header if a mapped allocation is used in the
+   glibc allocator.  */
+enum { dlfo_mappings_initial_segment_size = 63 };
+
+/* Allocate an empty segment.  This used for the first ever
+   allocation.  */
+static struct dlfo_mappings_segment *
+_dlfo_mappings_segment_allocate_unpadded (size_t size)
+{
+  if (size < dlfo_mappings_initial_segment_size)
+    size = dlfo_mappings_initial_segment_size;
+  /* No overflow checks here because the size is a mapping count, and
+     struct link_map is larger than what we allocate here.  */
+  enum
+    {
+      element_size = sizeof ((struct dlfo_mappings_segment) {}.objects[0])
+    };
+  size_t to_allocate = (sizeof (struct dlfo_mappings_segment)
+                        + size * element_size);
+  struct dlfo_mappings_segment *result = malloc (to_allocate);
+  if (result != NULL)
+    {
+      result->previous = NULL;
+      result->to_free = NULL; /* Minimal malloc memory cannot be freed.  */
+      result->size = 0;
+      result->allocated = size;
+    }
+  return result;
+}
+
+/* Allocate an empty segment that is at least SIZE large.  PREVIOUS
+   points to the chain of previously allocated segments and can be
+   NULL.  */
+static struct dlfo_mappings_segment *
+_dlfo_mappings_segment_allocate (size_t size,
+                                 struct dlfo_mappings_segment * previous)
+{
+  /* Exponential sizing policies, so that lookup approximates a binary
+     search.  */
+  {
+    size_t minimum_growth;
+    if (previous == NULL)
+      minimum_growth = dlfo_mappings_initial_segment_size;
+    else
+      minimum_growth = 2* previous->allocated;
+    if (size < minimum_growth)
+      size = minimum_growth;
+  }
+  enum { cache_line_size_estimate = 128 };
+  /* No overflow checks here because the size is a mapping count, and
+     struct link_map is larger than what we allocate here.  */
+  enum
+    {
+      element_size = sizeof ((struct dlfo_mappings_segment) {}.objects[0])
+    };
+  size_t to_allocate = (sizeof (struct dlfo_mappings_segment)
+                        + size * element_size
+                        + 2 * cache_line_size_estimate);
+  char *ptr = malloc (to_allocate);
+  if (ptr == NULL)
+    return NULL;
+  char *original_ptr = ptr;
+  /* Start and end at a (conservative) 128-byte cache line boundary.
+     Do not use memalign for compatibility with partially interposing
+     malloc implementations.  */
+  char *end = PTR_ALIGN_DOWN (ptr + to_allocate, cache_line_size_estimate);
+  ptr = PTR_ALIGN_UP (ptr, cache_line_size_estimate);
+  struct dlfo_mappings_segment *result
+    = (struct dlfo_mappings_segment *) ptr;
+  result->previous = previous;
+  result->to_free = original_ptr;
+  result->size = 0;
+  /* We may have obtained slightly more space if malloc happened
+     to provide an over-aligned pointer.  */
+  result->allocated = (((uintptr_t) (end - ptr)
+                        - sizeof (struct dlfo_mappings_segment))
+                       / element_size);
+  assert (result->allocated >= size);
+  return result;
+}
+
+/* Monotonic counter for software transactional memory.  The lowest
+   bit indicates which element of the _dlfo_loaded_mappings contains
+   up-to-date data.  */
+static __atomic_wide_counter _dlfo_loaded_mappings_version;
+
+/* TM version at the start of the read operation.  */
+static inline uint64_t
+_dlfo_read_start_version (void)
+{
+  /* Acquire MO load synchronizes with the fences at the beginning and
+     end of the TM update region.  */
+  return __atomic_wide_counter_load_acquire (&_dlfo_loaded_mappings_version);
+}
+
+/* Optimized variant of _dlfo_read_start_version which can be called
+   when the loader is write-locked.  */
+static inline uint64_t
+_dlfo_read_version_locked (void)
+{
+  return __atomic_wide_counter_load_relaxed (&_dlfo_loaded_mappings_version);
+}
+
+/* Update the version to reflect that an update is happening.  This
+   does not change the bit that controls the active segment chain.
+   Returns the index of the currently active segment chain.  */
+static inline unsigned int
+_dlfo_mappings_begin_update (void)
+{
+  unsigned int v
+    = __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
+                                               2);
+  /* Subsequent stores to the TM data must not be reordered before the
+     store above with the version update.  */
+  atomic_thread_fence_release ();
+  return v & 1;
+}
+
+/* Installs the just-updated version as the active version.  */
+static inline void
+_dlfo_mappings_end_update (void)
+{
+  /* The previous writes to the TM data must not be reordered after
+     the version update below.  */
+  atomic_thread_fence_release ();
+  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
+                                           1);
+}
+/* Completes an in-place update without switching versions.  */
+static inline void
+_dlfo_mappings_end_update_no_switch (void)
+{
+  /* The previous writes to the TM data must not be reordered after
+     the version update below.  */
+  atomic_thread_fence_release ();
+  __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
+                                           2);
+}
+
+/* Return true if the read was successful, given the start
+   version.  */
+static inline bool
+_dlfo_read_success (uint64_t start_version)
+{
+  return _dlfo_read_start_version () == start_version;
+}
+
+/* Returns the active segment identified by the specified start
+   version.  */
+static struct dlfo_mappings_segment *
+_dlfo_mappings_active_segment (uint64_t start_version)
+{
+  return _dlfo_loaded_mappings[start_version & 1];
+}
+
+/* Searches PC amoung the address-sorted array [FIRST1, FIRST1 +
+   SIZE).  Assumes PC >= FIRST1->map_start.  Returns a pointer to the
+   element that contains PC, or NULL if there is no such element.  */
+static inline struct dl_find_object_internal *
+_dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
+{
+  struct dl_find_object_internal *end = first1 + size;
+
+  /* Search for a lower bound in first.  */
+  struct dl_find_object_internal *first = first1;
+  while (size > 0)
+    {
+      size_t half = size >> 1;
+      struct dl_find_object_internal *middle = first + half;
+      if (middle->map_start < pc)
+        {
+          first = middle + 1;
+          size -= half + 1;
+        }
+      else
+        size = half;
+    }
+
+  if (first != end && pc == first->map_start)
+    {
+      if (pc < first->map_end)
+        return first;
+      else
+        /* Zero-length mapping after dlclose.  */
+        return NULL;
+    }
+  else
+    {
+      /* Check to see if PC is in the previous mapping.  */
+      --first;
+      if (pc < first->map_end)
+        /* pc >= first->map_start implied by the search above.  */
+        return first;
+      else
+        return NULL;
+    }
+}
+
+int
+_dl_find_object (void *pc1, struct dl_find_object *result)
+{
+  uintptr_t pc = (uintptr_t) pc1;
+
+  if (__glibc_unlikely (_dlfo_main.map_end == 0))
+    {
+      /* Not initialized.  No locking is needed here because this can
+         only be called from audit modules, which cannot create
+         threads.  */
+      return _dl_find_object_slow (pc1, result);
+    }
+
+  /* Main executable.  */
+  if (pc >= _dlfo_main.map_start && pc < _dlfo_main.map_end)
+    {
+      _dl_find_object_to_external (&_dlfo_main, result);
+      return 0;
+    }
+
+  /* Other initially loaded objects.  */
+  if (pc >= _dlfo_nodelete_mappings->map_start
+      && pc < _dlfo_nodelete_mappings_end)
+    {
+      struct dl_find_object_internal *obj
+        = _dlfo_lookup (pc, _dlfo_nodelete_mappings,
+                        _dlfo_nodelete_mappings_size);
+      if (obj != NULL)
+        {
+          _dl_find_object_to_external (obj, result);
+          return 0;
+        }
+      /* Fall through to the full search.  The kernel may have mapped
+         the initial mappings with gaps that are later filled by
+         dlopen with other mappings.  */
+    }
+
+  /* Handle audit modules, dlopen, dlopen objects.  This uses software
+     transactional memory, with a retry loop in case the version
+     changes during execution.  */
+  while (true)
+    {
+    retry:
+      ;
+      uint64_t start_version = _dlfo_read_start_version ();
+
+      /* The read through seg->previous assumes that the CPU
+         recognizes the load dependency, so that no invalid size
+         values is read.  Furthermore, the code assumes that no
+         out-of-thin-air value for seg->size is observed.  Together,
+         this ensures that the observed seg->size value is always less
+         than seg->allocated, so that _dlfo_mappings_index does not
+         read out-of-bounds.  (This avoids intermediate TM version
+         verification.  A concurrent version update will lead to
+         invalid lookup results, but not to out-of-memory access.)
+
+         Either seg == NULL or seg->size == 0 terminates the segment
+         list.  _dl_find_object_update does not bother to clear the
+         size on earlier unused segments.  */
+      for (struct dlfo_mappings_segment *seg
+             = _dlfo_mappings_active_segment (start_version);
+           seg != NULL && seg->size > 0; seg = seg->previous)
+        if (pc >= seg->objects[0].map_start)
+          {
+            /* PC may lie within this segment.  If it is less than the
+               segment start address, it can only lie in a previous
+               segment, due to the base address sorting.  */
+            struct dl_find_object_internal *obj
+              = _dlfo_lookup (pc, seg->objects, seg->size);
+
+            if (obj != NULL)
+              {
+                /* Found the right mapping.  Copy out the data prior to
+                   checking if the read transaction was successful.  */
+                struct dl_find_object_internal copy = *obj;
+                if (_dlfo_read_success (start_version))
+                  {
+                    _dl_find_object_to_external (&copy, result);
+                    return 0;
+                  }
+                else
+                  /* Read transaction failure.  */
+                  goto retry;
+              }
+            else
+              {
+                /* PC is not covered by this mapping.  */
+                if (_dlfo_read_success (start_version))
+                  return -1;
+                else
+                  /* Read transaction failure.  */
+                  goto retry;
+              }
+          } /* if: PC might lie within the current seg.  */
+
+      /* PC is not covered by any segment.  */
+      if (_dlfo_read_success (start_version))
+        return -1;
+    } /* Transaction retry loop.  */
+}
+rtld_hidden_def (_dl_find_object)
+
+/* _dlfo_process_initial is called twice.  First to compute the array
+   sizes from the initial loaded mappings.  Second to fill in the
+   bases and infos arrays with the (still unsorted) data.  Returns the
+   number of loaded (non-nodelete) mappings.  */
+static size_t
+_dlfo_process_initial (void)
+{
+  struct link_map *main_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
+
+  size_t nodelete = 0;
+  if (!main_map->l_contiguous)
+    {
+      struct dl_find_object_internal dlfo;
+      _dl_find_object_from_map (main_map, &dlfo);
+
+      /* PT_LOAD segments for a non-contiguous are added to the
+         non-closeable mappings.  */
+      for (const ElfW(Phdr) *ph = main_map->l_phdr,
+             *ph_end = main_map->l_phdr + main_map->l_phnum;
+           ph < ph_end; ++ph)
+        if (ph->p_type == PT_LOAD)
+          {
+            if (_dlfo_nodelete_mappings != NULL)
+              {
+                /* Second pass only.  */
+                _dlfo_nodelete_mappings[nodelete] = dlfo;
+                _dlfo_nodelete_mappings[nodelete].map_start
+                  = ph->p_vaddr + main_map->l_addr;
+                _dlfo_nodelete_mappings[nodelete].map_end
+                  = _dlfo_nodelete_mappings[nodelete].map_start + ph->p_memsz;
+              }
+            ++nodelete;
+          }
+    }
+
+  size_t loaded = 0;
+  for (Lmid_t ns = 0; ns < GL(dl_nns); ++ns)
+    for (struct link_map *l = GL(dl_ns)[ns]._ns_loaded; l != NULL;
+         l = l->l_next)
+      /* Skip the main map processed above, and proxy maps.  */
+      if (l != main_map && l == l->l_real)
+        {
+          /* lt_library link maps are implicitly NODELETE.  */
+          if (l->l_type == lt_library || l->l_nodelete_active)
+            {
+              if (_dlfo_nodelete_mappings != NULL)
+                /* Second pass only.  */
+                _dl_find_object_from_map
+                  (l, _dlfo_nodelete_mappings + nodelete);
+              ++nodelete;
+            }
+          else if (l->l_type == lt_loaded)
+            {
+              if (_dlfo_loaded_mappings[0] != NULL)
+                /* Second pass only.  */
+                _dl_find_object_from_map
+                  (l, &_dlfo_loaded_mappings[0]->objects[loaded]);
+              ++loaded;
+            }
+        }
+
+  _dlfo_nodelete_mappings_size = nodelete;
+  return loaded;
+}
+
+/* Selection sort based on mapping start address.  */
+void
+_dlfo_sort_mappings (struct dl_find_object_internal *objects, size_t size)
+{
+  if (size < 2)
+    return;
+
+  for (size_t i = 0; i < size - 1; ++i)
+    {
+      /* Find minimum.  */
+      size_t min_idx = i;
+      uintptr_t min_val = objects[i].map_start;
+      for (size_t j = i + 1; j < size; ++j)
+        if (objects[j].map_start < min_val)
+          {
+            min_idx = j;
+            min_val = objects[j].map_start;
+          }
+
+      /* Swap into place.  */
+      struct dl_find_object_internal tmp = objects[min_idx];
+      objects[min_idx] = objects[i];
+      objects[i] = tmp;
+    }
+}
+
+void
+_dl_find_object_init (void)
+{
+  /* Cover the main mapping.  */
+  {
+    struct link_map *main_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
+
+    if (main_map->l_contiguous)
+      _dl_find_object_from_map (main_map, &_dlfo_main);
+    else
+      {
+        /* Non-contiguous main maps are handled in
+           _dlfo_process_initial.  Mark as initialized, but not
+           coverying any valid PC.  */
+        _dlfo_main.map_start = -1;
+        _dlfo_main.map_end = -1;
+      }
+  }
+
+  /* Allocate the data structures.  */
+  size_t loaded_size = _dlfo_process_initial ();
+  _dlfo_nodelete_mappings = malloc (_dlfo_nodelete_mappings_size
+                                    * sizeof (*_dlfo_nodelete_mappings));
+  if (loaded_size > 0)
+    _dlfo_loaded_mappings[0]
+      = _dlfo_mappings_segment_allocate_unpadded (loaded_size);
+  if (_dlfo_nodelete_mappings == NULL
+      || (loaded_size > 0 && _dlfo_loaded_mappings[0] == NULL))
+    _dl_fatal_printf ("\
+Fatal glibc error: cannot allocate memory for find-object data\n");
+  /* Fill in the data with the second call.  */
+  _dlfo_nodelete_mappings_size = 0;
+  _dlfo_process_initial ();
+
+  /* Sort both arrays.  */
+  if (_dlfo_nodelete_mappings_size > 0)
+    {
+      _dlfo_sort_mappings (_dlfo_nodelete_mappings,
+                           _dlfo_nodelete_mappings_size);
+      size_t last_idx = _dlfo_nodelete_mappings_size - 1;
+      _dlfo_nodelete_mappings_end = _dlfo_nodelete_mappings[last_idx].map_end;
+    }
+  if (loaded_size > 0)
+    _dlfo_sort_mappings (_dlfo_loaded_mappings[0]->objects,
+                         _dlfo_loaded_mappings[0]->size);
+}
+
+static void
+_dl_find_object_link_map_sort (struct link_map **loaded, size_t size)
+{
+  /* Selection sort based on map_start.  */
+  if (size < 2)
+    return;
+  for (size_t i = 0; i < size - 1; ++i)
+    {
+      /* Find minimum.  */
+      size_t min_idx = i;
+      ElfW(Addr) min_val = loaded[i]->l_map_start;
+      for (size_t j = i + 1; j < size; ++j)
+        if (loaded[j]->l_map_start < min_val)
+          {
+            min_idx = j;
+            min_val = loaded[j]->l_map_start;
+          }
+
+      /* Swap into place.  */
+      struct link_map *tmp = loaded[min_idx];
+      loaded[min_idx] = loaded[i];
+      loaded[i] = tmp;
+    }
+}
+
+/* Initializes the segment for writing.  Returns the target write
+   index (plus 1) in this segment.  The index is chosen so that a
+   partially filled segment still has data at index 0.  */
+static inline size_t
+_dlfo_update_init_seg (struct dlfo_mappings_segment *seg,
+                       size_t remaining_to_add)
+{
+  if (remaining_to_add < seg->allocated)
+    /* Partially filled segment.  */
+    seg->size = remaining_to_add;
+  else
+    seg->size = seg->allocated;
+  return seg->size;
+}
+
+/* Invoked from _dl_find_object_update after sorting.  */
+static bool
+_dl_find_object_update_1 (struct link_map **loaded, size_t count)
+{
+  int active_idx = _dlfo_read_version_locked () & 1;
+
+  struct dlfo_mappings_segment *current_seg
+    = _dlfo_loaded_mappings[active_idx];
+  size_t current_used = _dlfo_mappings_segment_count_used (current_seg);
+
+  struct dlfo_mappings_segment *target_seg
+    = _dlfo_loaded_mappings[!active_idx];
+  size_t remaining_to_add = current_used + count;
+
+  /* Ensure that the new segment chain has enough space.  */
+  {
+    size_t new_allocated
+      = _dlfo_mappings_segment_count_allocated (target_seg);
+    if (new_allocated < remaining_to_add)
+      {
+        size_t more = remaining_to_add - new_allocated;
+        target_seg = _dlfo_mappings_segment_allocate (more, target_seg);
+        if (target_seg == NULL)
+          /* Out of memory.  Do not end the update and keep the
+             current version unchanged.  */
+          return false;
+
+        /* Start update cycle. */
+        _dlfo_mappings_begin_update ();
+
+        /* The barrier ensures that a concurrent TM read or fork does
+           not see a partially initialized segment.  */
+        atomic_store_release (&_dlfo_loaded_mappings[!active_idx], target_seg);
+      }
+    else
+      /* Start update cycle without allocation.  */
+      _dlfo_mappings_begin_update ();
+  }
+
+  size_t target_seg_index1 = _dlfo_update_init_seg (target_seg,
+                                                    remaining_to_add);
+
+  /* Merge the current_seg segment list with the loaded array into the
+     target_set.  Merging occurs backwards, in decreasing l_map_start
+     order.  */
+  size_t loaded_index1 = count;
+  size_t current_seg_index1;
+  if (current_seg == NULL)
+    current_seg_index1 = 0;
+  else
+    current_seg_index1 = current_seg->size;
+  while (true)
+    {
+      if (current_seg_index1 == 0)
+        {
+          /* Switch to the previous segment.  */
+          if (current_seg != NULL)
+            current_seg = current_seg->previous;
+          if (current_seg != NULL)
+            {
+              current_seg_index1 = current_seg->size;
+              if (current_seg_index1 == 0)
+                /* No more data in previous segments.  */
+                current_seg = NULL;
+            }
+        }
+
+      if (current_seg != NULL
+          && (current_seg->objects[current_seg_index1 - 1].map == NULL))
+        {
+          /* This mapping has been dlclose'd.  Do not copy it.  */
+          --current_seg_index1;
+          continue;
+        }
+
+      if (loaded_index1 == 0 && current_seg == NULL)
+        /* No more data in either source.  */
+        break;
+
+      /* Make room for another mapping.  */
+      assert (remaining_to_add > 0);
+      if (target_seg_index1 == 0)
+        {
+          /* Switch segments and set the size of the segment.  */
+          target_seg = target_seg->previous;
+          target_seg_index1 = _dlfo_update_init_seg (target_seg,
+                                                     remaining_to_add);
+        }
+
+      /* Determine where to store the data.  */
+      struct dl_find_object_internal *dlfo
+        = &target_seg->objects[target_seg_index1 - 1];
+
+      if (loaded_index1 == 0
+          || (current_seg != NULL
+              && (loaded[loaded_index1 - 1]->l_map_start
+                  < current_seg->objects[current_seg_index1 - 1].map_start)))
+        {
+          /* Prefer mapping in current_seg.  */
+          assert (current_seg_index1 > 0);
+          *dlfo = current_seg->objects[current_seg_index1 - 1];
+          --current_seg_index1;
+        }
+      else
+        {
+          /* Prefer newly loaded link map.  */
+          assert (loaded_index1 > 0);
+          _dl_find_object_from_map (loaded[loaded_index1 - 1], dlfo);
+          loaded[loaded_index1 -  1]->l_find_object_processed = 1;
+          --loaded_index1;
+        }
+
+      /* Consume space in target segment.  */
+      --target_seg_index1;
+
+      --remaining_to_add;
+    }
+
+  /* Everything has been added.  */
+  assert (remaining_to_add == 0);
+
+  /* The segment must have been filled up to the beginning.  */
+  assert (target_seg_index1 == 0);
+
+  /* Prevent searching further into unused segments.  */
+  if (target_seg->previous != NULL)
+    target_seg->previous->size = 0;
+
+  _dlfo_mappings_end_update ();
+  return true;
+}
+
+bool
+_dl_find_object_update (struct link_map *new_map)
+{
+  /* Copy the newly-loaded link maps into an array for sorting.  */
+  size_t count = 0;
+  for (struct link_map *l = new_map; l != NULL; l = l->l_next)
+    /* Skip proxy maps and already-processed maps.  */
+    count += l == l->l_real && !l->l_find_object_processed;
+  struct link_map **map_array = malloc (count * sizeof (*map_array));
+  if (map_array == NULL)
+    return false;
+  {
+    size_t i = 0;
+    for (struct link_map *l = new_map; l != NULL; l = l->l_next)
+      if (l == l->l_real && !l->l_find_object_processed)
+        map_array[i++] = l;
+  }
+  if (count == 0)
+    return true;
+
+  _dl_find_object_link_map_sort (map_array, count);
+  bool ok = _dl_find_object_update_1 (map_array, count);
+  free (map_array);
+  return ok;
+}
+
+void
+_dl_find_object_dlclose (struct link_map *map)
+{
+  uint64_t start_version = _dlfo_read_version_locked ();
+  uintptr_t map_start = map->l_map_start;
+
+
+  /* Directly patch the size information in the mapping to mark it as
+     unused.  See the parallel lookup logic in _dl_find_object.  Do
+     not check for previous dlclose at the same mapping address
+     because that cannot happen (there would have to be an
+     intermediate dlopen, which drops size-zero mappings).  */
+  for (struct dlfo_mappings_segment *seg
+         = _dlfo_mappings_active_segment (start_version);
+       seg != NULL && seg->size > 0; seg = seg->previous)
+    if (map_start >= seg->objects[0].map_start)
+      {
+        struct dl_find_object_internal *obj
+          = _dlfo_lookup (map_start, seg->objects, seg->size);
+        if (obj == NULL)
+          /* Ignore missing link maps because of potential shutdown
+             issues around __libc_freeres.  */
+            return;
+
+        /* The update happens in-place, but given that we do not use
+           atomic accesses on the read side, update the version around
+           the update to trigger re-validation in concurrent
+           readers.  */
+        _dlfo_mappings_begin_update ();
+
+        /* Mark as closed.  */
+        obj->map_end = obj->map_start;
+        obj->map = NULL;
+
+        _dlfo_mappings_end_update_no_switch ();
+        return;
+      }
+}
+
+void
+_dl_find_object_freeres (void)
+{
+  for (int idx = 0; idx < 2; ++idx)
+    {
+      for (struct dlfo_mappings_segment *seg = _dlfo_loaded_mappings[idx];
+           seg != NULL; )
+        {
+          struct dlfo_mappings_segment *previous = seg->previous;
+          free (seg->to_free);
+          seg = previous;
+        }
+      /* Stop searching in shared objects.  */
+      _dlfo_loaded_mappings[idx] = 0;
+    }
+}