about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2024-09-26 14:14:35 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2024-09-27 14:22:58 -0300
commit93c7e10ca97ecc9b9370b1fb10f633fea34cb255 (patch)
treed8d92929f43fdceb1ad7a22ab44155650cf42fcf
parent24d2a0a474fa5d95ef29973b899bc47b733d13ef (diff)
downloadglibc-azanella/getrandom-vdso-v8.tar.gz
glibc-azanella/getrandom-vdso-v8.tar.xz
glibc-azanella/getrandom-vdso-v8.zip
[v8] vgetrandom Linux support azanella/getrandom-vdso-v8
* Added __getrandom_early_init to handle multiples libc.so.  The
  second loaded libc.so will always fallback to syscall.
* Move the vgetrandom parameters to libc.so.
* Remove the internal sys/random.h in favor of getrandom-internal.h.
* Refactor the reserve pointer macros in factor inline functions
  and added some comments.
* Remove the use of __get_nprocs(), instead starts with one page
  only for the opaque states.
* Use atomic_thread_fence_seq_cst instead of atomic_store_relaxed
  for fork synchronization.
* Align opaque state to L1D cache size to avoid false sharing.
-rw-r--r--elf/libc_early_init.c3
-rw-r--r--include/sys/random.h4
-rw-r--r--nptl/pthread_create.c2
-rw-r--r--stdlib/Makefile2
-rw-r--r--stdlib/tst-getrandom2.c47
-rw-r--r--sysdeps/generic/getrandom-internal.h (renamed from sysdeps/unix/sysv/linux/getrandom_vdso.h)20
-rw-r--r--sysdeps/nptl/_Fork.c2
-rw-r--r--sysdeps/nptl/fork.h2
-rw-r--r--sysdeps/unix/sysv/linux/dl-vdso-setup.c7
-rw-r--r--sysdeps/unix/sysv/linux/dl-vdso-setup.h14
-rw-r--r--sysdeps/unix/sysv/linux/getrandom-internal.h (renamed from sysdeps/unix/sysv/linux/include/sys/random.h)10
-rw-r--r--sysdeps/unix/sysv/linux/getrandom.c146
12 files changed, 168 insertions, 91 deletions
diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
index 575b837f8f..20c71fd48b 100644
--- a/elf/libc_early_init.c
+++ b/elf/libc_early_init.c
@@ -23,6 +23,7 @@
 #include <lowlevellock.h>
 #include <pthread_early_init.h>
 #include <sys/single_threaded.h>
+#include <getrandom-internal.h>
 
 #ifdef SHARED
 _Bool __libc_initial;
@@ -43,6 +44,8 @@ __libc_early_init (_Bool initial)
 
   __pthread_early_init ();
 
+  __getrandom_early_init (initial);
+
 #if ENABLE_ELISION_SUPPORT
   __lll_elision_init ();
 #endif
diff --git a/include/sys/random.h b/include/sys/random.h
index 35f64a0339..6aa313d35d 100644
--- a/include/sys/random.h
+++ b/include/sys/random.h
@@ -1,12 +1,8 @@
 #ifndef _SYS_RANDOM_H
 #include <stdlib/sys/random.h>
 
-#include_next <sys/random.h>
-
 # ifndef _ISOMAC
 
-# include <stdbool.h>
-
 extern ssize_t __getrandom (void *__buffer, size_t __length,
                             unsigned int __flags) __wur;
 libc_hidden_proto (__getrandom)
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index d1f5568b3b..ef3ec33290 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -38,7 +38,7 @@
 #include <version.h>
 #include <clone_internal.h>
 #include <futex-internal.h>
-#include <sys/random.h>
+#include <getrandom-internal.h>
 
 #include <shlib-compat.h>
 
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 347491de53..9c492051bf 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -278,6 +278,7 @@ tests := \
   tst-cxa_atexit \
   tst-environ \
   tst-getrandom \
+  tst-getrandom2 \
   tst-labs \
   tst-limits \
   tst-llabs \
@@ -627,3 +628,4 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
 $(objpfx)tst-qsort5: $(libm)
 $(objpfx)tst-concurrent-exit: $(shared-thread-library)
 $(objpfx)tst-concurrent-quick_exit: $(shared-thread-library)
+$(objpfx)tst-getrandom2: $(shared-thread-library)
diff --git a/stdlib/tst-getrandom2.c b/stdlib/tst-getrandom2.c
new file mode 100644
index 0000000000..f085b4b74f
--- /dev/null
+++ b/stdlib/tst-getrandom2.c
@@ -0,0 +1,47 @@
+/* Tests for the getrandom functions.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <gnu/lib-names.h>
+#include <support/check.h>
+#include <support/xdlfcn.h>
+#include <support/xthread.h>
+#include <sys/random.h>
+
+static __typeof (getrandom) *getrandom_ptr;
+
+static void *
+threadfunc (void *ignored)
+{
+  char buffer;
+  TEST_COMPARE (getrandom_ptr (&buffer, 1, 0), 1);
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  /* Check if issuing getrandom in the secondary libc.so works when
+     the vDSO might be potentially used.  */
+  void *handle = xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW);
+  getrandom_ptr = xdlsym (handle, "getrandom");
+  for (int i = 0; i < 1000; ++i)
+    xpthread_join (xpthread_create (NULL, threadfunc, NULL));
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/getrandom_vdso.h b/sysdeps/generic/getrandom-internal.h
index d1ef690e50..3fe46532a0 100644
--- a/sysdeps/unix/sysv/linux/getrandom_vdso.h
+++ b/sysdeps/generic/getrandom-internal.h
@@ -1,4 +1,4 @@
-/* Linux getrandom vDSO support.
+/* Internal definitions for getrandom implementation.
    Copyright (C) 2024 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,21 +16,11 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#ifndef _GETRANDOM_VDSO_H
-#define _GETRANDOM_VDSO_H
+#ifndef _GETRANDOM_INTERNAL_H
+#define _GETRANDOM_INTERNAL_H
 
-#include <stddef.h>
-#include <stdint.h>
-#include <sys/types.h>
-
-/* Used to query the vDSO for the required mmap flags and the opaque
-   per-thread state size  Defined by linux/random.h.  */
-struct vgetrandom_opaque_params
+static inline void __getrandom_early_init (_Bool)
 {
-  uint32_t size_of_opaque_state;
-  uint32_t mmap_prot;
-  uint32_t mmap_flags;
-  uint32_t reserved[13];
-};
+}
 
 #endif
diff --git a/sysdeps/nptl/_Fork.c b/sysdeps/nptl/_Fork.c
index adb7c18b29..c82fd50649 100644
--- a/sysdeps/nptl/_Fork.c
+++ b/sysdeps/nptl/_Fork.c
@@ -18,7 +18,7 @@
 
 #include <arch-fork.h>
 #include <pthreadP.h>
-#include <sys/random.h>
+#include <getrandom-internal.h>
 
 pid_t
 _Fork (void)
diff --git a/sysdeps/nptl/fork.h b/sysdeps/nptl/fork.h
index 106b2cf71d..eabf3c81b0 100644
--- a/sysdeps/nptl/fork.h
+++ b/sysdeps/nptl/fork.h
@@ -26,7 +26,7 @@
 #include <mqueue.h>
 #include <pthreadP.h>
 #include <sysdep.h>
-#include <sys/random.h>
+#include <getrandom-internal.h>
 
 static inline void
 fork_system_setup (void)
diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.c b/sysdeps/unix/sysv/linux/dl-vdso-setup.c
index b117a25922..476c6db75a 100644
--- a/sysdeps/unix/sysv/linux/dl-vdso-setup.c
+++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.c
@@ -70,13 +70,6 @@ PROCINFO_CLASS int (*_dl_vdso_clock_getres_time64) (clockid_t,
 PROCINFO_CLASS ssize_t (*_dl_vdso_getrandom) (void *buffer, size_t len,
                                               unsigned int flags, void *state,
                                               size_t state_len) RELRO;
-/* These values will be initialized at loading time by calling the
-   _dl_vdso_getrandom with a special value.  The 'state_size' is the opaque
-   state size per-thread allocated with a mmap using 'mmap_prot' and
-   'mmap_flags' argument.  */
-PROCINFO_CLASS uint32_t _dl_vdso_getrandom_state_size RELRO;
-PROCINFO_CLASS uint32_t _dl_vdso_getrandom_mmap_prot RELRO;
-PROCINFO_CLASS uint32_t _dl_vdso_getrandom_mmap_flags RELRO;
 # endif
 
 /* PowerPC specific ones.  */
diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.h b/sysdeps/unix/sysv/linux/dl-vdso-setup.h
index c63b7689e5..cde99f608c 100644
--- a/sysdeps/unix/sysv/linux/dl-vdso-setup.h
+++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.h
@@ -19,10 +19,6 @@
 #ifndef _DL_VDSO_INIT_H
 #define _DL_VDSO_INIT_H
 
-#ifdef HAVE_GETRANDOM_VSYSCALL
-# include <getrandom_vdso.h>
-#endif
-
 /* Initialize the VDSO functions pointers.  */
 static inline void __attribute__ ((always_inline))
 setup_vdso_pointers (void)
@@ -56,16 +52,6 @@ setup_vdso_pointers (void)
 #endif
 #ifdef HAVE_GETRANDOM_VSYSCALL
   GLRO(dl_vdso_getrandom) = dl_vdso_vsym (HAVE_GETRANDOM_VSYSCALL);
-  if (GLRO(dl_vdso_getrandom) != NULL)
-    {
-      struct vgetrandom_opaque_params params;
-      if (GLRO(dl_vdso_getrandom) (NULL, 0, 0, &params, ~0UL) == 0)
-	{
-	  GLRO(dl_vdso_getrandom_state_size) = params.size_of_opaque_state;
-	  GLRO(dl_vdso_getrandom_mmap_prot) = params.mmap_prot;
-	  GLRO(dl_vdso_getrandom_mmap_flags) = params.mmap_flags;
-	}
-    }
 #endif
 }
 
diff --git a/sysdeps/unix/sysv/linux/include/sys/random.h b/sysdeps/unix/sysv/linux/getrandom-internal.h
index 5a48de2d29..37e6c9bc15 100644
--- a/sysdeps/unix/sysv/linux/include/sys/random.h
+++ b/sysdeps/unix/sysv/linux/getrandom-internal.h
@@ -16,14 +16,14 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#ifndef _LINUX_SYS_RANDOM_H
-#define _LINUX_SYS_RANDOM_H
+#ifndef _GETRANDOM_INTERNAL_H
+#define _GETRANDOM_INTERNAL_H
 
-# ifndef _ISOMAC
-# include <pthreadP.h>
+#include <pthreadP.h>
+
+extern void __getrandom_early_init (_Bool) attribute_hidden;
 
 extern void __getrandom_fork_subprocess (void) attribute_hidden;
 extern void __getrandom_vdso_release (struct pthread *curp) attribute_hidden;
 extern void __getrandom_reset_state (struct pthread *curp) attribute_hidden;
-# endif
 #endif
diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c
index d6025199dc..b846f23542 100644
--- a/sysdeps/unix/sysv/linux/getrandom.c
+++ b/sysdeps/unix/sysv/linux/getrandom.c
@@ -31,7 +31,7 @@ getrandom_syscall (void *buffer, size_t length, unsigned int flags,
 }
 
 #ifdef HAVE_GETRANDOM_VSYSCALL
-# include <getrandom_vdso.h>
+# include <assert.h>
 # include <ldsodefs.h>
 # include <libc-lock.h>
 # include <list.h>
@@ -40,12 +40,64 @@ getrandom_syscall (void *buffer, size_t length, unsigned int flags,
 # include <sys/sysinfo.h>
 # include <tls-internal.h>
 
-# define ALIGN_PAGE(p)		PTR_ALIGN_UP (p, GLRO (dl_pagesize))
-# define READ_ONCE(p)		(*((volatile typeof (p) *) (&(p))))
-# define WRITE_ONCE(p, v)	(*((volatile typeof (p) *) (&(p))) = (v))
-# define RESERVE_PTR(p)		((void *) ((uintptr_t) (p) | 1UL))
-# define RELEASE_PTR(p)		((void *) ((uintptr_t) (p) & ~1UL))
-# define IS_RESERVED_PTR(p)	(!!((uintptr_t) (p) & 1UL))
+/* These values will be initialized at loading time by calling the]
+   _dl_vdso_getrandom with a special value.  The 'state_size' is the opaque
+   state size per-thread allocated with a mmap using 'mmap_prot' and
+   'mmap_flags' argument.  */
+static uint32_t state_size;
+static uint32_t stradle_size;
+static uint32_t mmap_prot;
+static uint32_t mmap_flags;
+
+void
+__getrandom_early_init (_Bool initial)
+{
+  if (initial && (GLRO (dl_vdso_getrandom) != NULL))
+    {
+      /* Used to query the vDSO for the required mmap flags and the opaque
+	 per-thread state size.  Defined by linux/random.h.  */
+      struct vgetrandom_opaque_params
+      {
+	uint32_t size_of_opaque_state;
+	uint32_t mmap_prot;
+	uint32_t mmap_flags;
+	uint32_t reserved[13];
+      } params;
+      if (GLRO(dl_vdso_getrandom) (NULL, 0, 0, &params, ~0UL) == 0)
+	{
+	  /* Align each opaque state to L1 data cache size to avoid false
+	     sharing.  If the size can not be obtained, use the kernel
+	     provided one.  */
+	  state_size = params.size_of_opaque_state;
+	  long int ld1sz = __sysconf (_SC_LEVEL1_DCACHE_LINESIZE) ?: 1;
+	  stradle_size = ALIGN_UP (state_size, ld1sz);
+	  mmap_prot = params.mmap_prot;
+	  mmap_flags = params.mmap_flags;
+	}
+    }
+}
+
+/* The function below are used on reentracy handling with (i.e. SA_NODEFER).
+   Befor allocate a new state or issue the vDSO, atomically read the current
+   thread buffer, and if this is already reserved (is_reserved_ptr) fallback
+   to the syscall.  Otherwise, reserve the buffer by atomically setting the
+   LSB of the opaque state pointer.  The bit is cleared after the vDSO is
+   called, or before issuing the fallback syscall.  */
+
+static inline void *reserve_ptr (void *p)
+{
+  return (void *) ((uintptr_t) (p) | 1UL);
+}
+
+static inline void *release_ptr (void *p)
+{
+  return (void *) ((uintptr_t) (p) & ~1UL);
+}
+
+static inline bool is_reserved_ptr (void *p)
+{
+  return (uintptr_t) (p) & 1UL;
+}
 
 static struct
 {
@@ -64,13 +116,10 @@ static struct
 static bool
 vgetrandom_get_state_alloc (void)
 {
-  size_t num = __get_nprocs (); /* Just a decent heuristic.  */
-
-  size_t block_size = ALIGN_PAGE (num * GLRO(dl_vdso_getrandom_state_size));
-  num = (GLRO (dl_pagesize) / GLRO(dl_vdso_getrandom_state_size)) *
-	(block_size / GLRO (dl_pagesize));
-  void *block = __mmap (NULL, block_size, GLRO(dl_vdso_getrandom_mmap_prot),
-			GLRO(dl_vdso_getrandom_mmap_flags), -1, 0);
+  /* Start by allocating one page for the opaque states.  */
+  size_t block_size = ALIGN_UP (stradle_size, GLRO(dl_pagesize));
+  size_t num = GLRO (dl_pagesize) / stradle_size;
+  void *block = __mmap (NULL, GLRO(dl_pagesize), mmap_prot, mmap_flags, -1, 0);
   if (block == MAP_FAILED)
     return false;
   __set_vma_name (block, block_size, " glibc: getrandom");
@@ -82,15 +131,20 @@ vgetrandom_get_state_alloc (void)
 	 mremap returns but before assigning to the grnd_alloc.states,
 	 thus making the its value invalid in the child.  */
       void *old_states = grnd_alloc.states;
-      size_t old_states_size = ALIGN_PAGE (sizeof (*grnd_alloc.states) *
-					   grnd_alloc.total + num);
+      size_t old_states_size = ALIGN_UP (sizeof (*grnd_alloc.states) *
+					 grnd_alloc.total + num,
+					 GLRO(dl_pagesize));
       size_t states_size;
-      if (grnd_alloc.states == NULL)
+      if (old_states == NULL)
 	states_size = old_states_size;
       else
-	states_size = ALIGN_PAGE (sizeof (*grnd_alloc.states)
-				  * grnd_alloc.cap);
+	states_size = ALIGN_UP (sizeof (*grnd_alloc.states) * grnd_alloc.cap,
+				GLRO(dl_pagesize));
 
+      /* There is no need to memcpy any opaque state information because
+	 all the allocated opaque states are assigned to running threads
+	 (meaning that if we iterate over them we can reconstruct the state
+	 list).  */
       void **states = __mmap (NULL, states_size, PROT_READ | PROT_WRITE,
 			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
       if (states == MAP_FAILED)
@@ -103,25 +157,29 @@ vgetrandom_get_state_alloc (void)
 	 process will see a consistent free state buffer.  The size might
 	 not be updated, but it does not really matter since the buffer is
 	 always increased.  */
-      atomic_store_relaxed (&grnd_alloc.states, states);
+      grnd_alloc.states = states;
+      atomic_thread_fence_seq_cst ();
       if (old_states != NULL)
 	__munmap (old_states, old_states_size);
 
       __set_vma_name (states, states_size, " glibc: getrandom states");
       grnd_alloc.cap = states_size / sizeof (*grnd_alloc.states);
+      atomic_thread_fence_seq_cst ();
     }
 
   for (size_t i = 0; i < num; ++i)
     {
       /* States should not straddle a page.  */
-      if (((uintptr_t) block & (GLRO (dl_pagesize) - 1)) +
-	  GLRO(dl_vdso_getrandom_state_size) > GLRO (dl_pagesize))
-	block = ALIGN_PAGE (block);
+      if (((uintptr_t) block & (GLRO (dl_pagesize) - 1)) + stradle_size
+	  > GLRO (dl_pagesize))
+	block = PTR_ALIGN_UP (block, GLRO(dl_pagesize));
       grnd_alloc.states[i] = block;
-      block += GLRO(dl_vdso_getrandom_state_size);
+      block += stradle_size;
     }
+  /* Concurrent fork should not observe the previous pointer value.  */
   grnd_alloc.len = num;
   grnd_alloc.total += num;
+  atomic_thread_fence_seq_cst ();
 
   return true;
 }
@@ -156,18 +214,15 @@ vgetrandom_get_state (void)
 static ssize_t
 getrandom_vdso (void *buffer, size_t length, unsigned int flags, bool cancel)
 {
-  if (GLRO (dl_vdso_getrandom_state_size) == 0)
+  if (__glibc_unlikely (state_size == 0))
     return getrandom_syscall (buffer, length, flags, cancel);
 
   struct pthread *self = THREAD_SELF;
 
-  /* If the LSB of getrandom_buf is set, then this function is already being
-     called, and we have a reentrant call from a signal handler.  In this case
-     fallback to the syscall.  */
-  void *state = READ_ONCE (self->getrandom_buf);
-  if (IS_RESERVED_PTR (state))
+  void *state = atomic_load_relaxed (&self->getrandom_buf);
+  if (is_reserved_ptr (state))
     return getrandom_syscall (buffer, length, flags, cancel);
-  WRITE_ONCE (self->getrandom_buf, RESERVE_PTR (state));
+  atomic_store_relaxed (&self->getrandom_buf, reserve_ptr (state));
 
   bool r = false;
   if (state == NULL)
@@ -177,15 +232,15 @@ getrandom_vdso (void *buffer, size_t length, unsigned int flags, bool cancel)
         goto out;
     }
 
-  /* Since the vDSO fallback does not issue the syscall with the cancellation
-     bridge (__syscall_cancel_arch), use GRND_NONBLOCK so there is no
-     potential unbounded blocking in the kernel.  It should be a rare
+  /* Since the vDSO implementation does not issue the syscall with the
+     cancellation bridge (__syscall_cancel_arch), use GRND_NONBLOCK so there
+     is no potential unbounded blocking in the kernel.  It should be a rare
      situation, only at system startup when RNG is not initialized.  */
-  ssize_t ret =  GLRO (dl_vdso_getrandom) (buffer,
-					   length,
-					   flags | GRND_NONBLOCK,
-					   state,
-					   GLRO(dl_vdso_getrandom_state_size));
+  ssize_t ret = GLRO (dl_vdso_getrandom) (buffer,
+					  length,
+					  flags | GRND_NONBLOCK,
+					  state,
+					  state_size);
   if (INTERNAL_SYSCALL_ERROR_P (ret))
     {
       /* Fallback to the syscall if the kernel would block.  */
@@ -199,25 +254,30 @@ getrandom_vdso (void *buffer, size_t length, unsigned int flags, bool cancel)
   r = true;
 
 out:
-  WRITE_ONCE (self->getrandom_buf, state);
+  atomic_store_relaxed (&self->getrandom_buf, state);
   return r ? ret : getrandom_syscall (buffer, length, flags, cancel);
 }
 #endif
 
-/* Re-add the state state from CURP on the free list.  */
+/* Re-add the state state from CURP on the free list.  This function is
+   called after fork returns in the child, so no locking is required.  */
 void
 __getrandom_reset_state (struct pthread *curp)
 {
 #ifdef HAVE_GETRANDOM_VSYSCALL
   if (grnd_alloc.states == NULL || curp->getrandom_buf == NULL)
     return;
-  grnd_alloc.states[grnd_alloc.len++] = RELEASE_PTR (curp->getrandom_buf);
+  grnd_alloc.len++;
+  assert (grnd_alloc.len < grnd_alloc.cap);
+  grnd_alloc.states[grnd_alloc.len] = release_ptr (curp->getrandom_buf);
   curp->getrandom_buf = NULL;
 #endif
 }
 
 /* Called when a thread terminates, and adds its random buffer back into the
-   allocator pool for use in a future thread.  */
+   allocator pool for use in a future thread.  This is called by
+   pthrea_create during thread termination, and after signal has been
+   blocked. */
 void
 __getrandom_vdso_release (struct pthread *curp)
 {