about summary refs log tree commit diff
path: root/nptl/pthread_create.c
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-02-13 11:47:46 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-07-14 06:33:58 -0700
commitd8ea0d0168b190bdf138a20358293c939509367f (patch)
tree09f489ad57652978fe9c8dbf8f649d6868bafbda /nptl/pthread_create.c
parent135425a1dd50cbe2b9db0628d6c2b36c7889f30b (diff)
downloadglibc-d8ea0d0168b190bdf138a20358293c939509367f.tar.gz
glibc-d8ea0d0168b190bdf138a20358293c939509367f.tar.xz
glibc-d8ea0d0168b190bdf138a20358293c939509367f.zip
Add an internal wrapper for clone, clone2 and clone3
The clone3 system call (since Linux 5.3) provides a superset of the
functionality of clone and clone2.  It also provides a number of API
improvements, including the ability to specify the size of the child's
stack area which can be used by kernel to compute the shadow stack size
when allocating the shadow stack.  Add:

extern int __clone_internal (struct clone_args *__cl_args,
			     int (*__func) (void *__arg), void *__arg);

to provide an abstract interface for clone, clone2 and clone3.

1. Simplify stack management for thread creation by passing both stack
base and size to create_thread.
2. Consolidate clone vs clone2 differences into a single file.
3. Call __clone3 if HAVE_CLONE3_WAPPER is defined.  If __clone3 returns
-1 with ENOSYS, fall back to clone or clone2.
4. Use only __clone_internal to clone a thread.  Since the stack size
argument for create_thread is now unconditional, always pass stack size
to create_thread.
5. Enable the public clone3 wrapper in the future after it has been
added to all targets.

NB: Sandbox will return ENOSYS on clone3 in both Chromium:

The following revision refers to this bug:
  https://chromium.googlesource.com/chromium/src/+/218438259dd795456f0a48f67cbe5b4e520db88b

commit 218438259dd795456f0a48f67cbe5b4e520db88b
Author: Matthew Denton <mpdenton@chromium.org>
Date: Thu Jun 03 20:06:13 2021

Linux sandbox: return ENOSYS for clone3

Because clone3 uses a pointer argument rather than a flags argument, we
cannot examine the contents with seccomp, which is essential to
preventing sandboxed processes from starting other processes. So, we
won't be able to support clone3 in Chromium. This CL modifies the
BPF policy to return ENOSYS for clone3 so glibc always uses the fallback
to clone.

Bug: 1213452
Change-Id: I7c7c585a319e0264eac5b1ebee1a45be2d782303
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2936184
Reviewed-by: Robert Sesek <rsesek@chromium.org>
Commit-Queue: Matthew Denton <mpdenton@chromium.org>
Cr-Commit-Position: refs/heads/master@{#888980}

[modify] https://crrev.com/218438259dd795456f0a48f67cbe5b4e520db88b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc

and Firefox:

https://hg.mozilla.org/integration/autoland/rev/ecb4011a0c76

Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Diffstat (limited to 'nptl/pthread_create.c')
-rw-r--r--nptl/pthread_create.c38
1 files changed, 22 insertions, 16 deletions
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index 440adc2a6f..d8ec299cb1 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -36,6 +36,7 @@
 #include "libioP.h"
 #include <sys/single_threaded.h>
 #include <version.h>
+#include <clone_internal.h>
 
 #include <shlib-compat.h>
 
@@ -227,8 +228,8 @@ late_init (void)
 static int _Noreturn start_thread (void *arg);
 
 static int create_thread (struct pthread *pd, const struct pthread_attr *attr,
-			  bool *stopped_start, STACK_VARIABLES_PARMS,
-			  bool *thread_ran)
+			  bool *stopped_start, void *stackaddr,
+			  size_t stacksize, bool *thread_ran)
 {
   /* Determine whether the newly created threads has to be started
      stopped since we have to set the scheduling parameters or set the
@@ -280,14 +281,18 @@ static int create_thread (struct pthread *pd, const struct pthread_attr *attr,
 
   TLS_DEFINE_INIT_TP (tp, pd);
 
-#ifdef __NR_clone2
-# define ARCH_CLONE __clone2
-#else
-# define ARCH_CLONE __clone
-#endif
-  if (__glibc_unlikely (ARCH_CLONE (&start_thread, STACK_VARIABLES_ARGS,
-				    clone_flags, pd, &pd->tid, tp, &pd->tid)
-			== -1))
+  struct clone_args args =
+    {
+      .flags = clone_flags,
+      .pidfd = (uintptr_t) &pd->tid,
+      .parent_tid = (uintptr_t) &pd->tid,
+      .child_tid = (uintptr_t) &pd->tid,
+      .stack = (uintptr_t) stackaddr,
+      .stack_size = stacksize,
+      .tls = (uintptr_t) tp,
+    };
+  int ret = __clone_internal (&args, &start_thread, pd);
+  if (__glibc_unlikely (ret == -1))
     return errno;
 
   /* It's started now, so if we fail below, we'll have to let it clean itself
@@ -576,7 +581,8 @@ int
 __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
 		      void *(*start_routine) (void *), void *arg)
 {
-  STACK_VARIABLES;
+  void *stackaddr = NULL;
+  size_t stacksize = 0;
 
   /* Avoid a data race in the multi-threaded case, and call the
      deferred initialization only once.  */
@@ -600,7 +606,7 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
     }
 
   struct pthread *pd = NULL;
-  int err = ALLOCATE_STACK (iattr, &pd);
+  int err = allocate_stack (iattr, &pd, &stackaddr, &stacksize);
   int retval = 0;
 
   if (__glibc_unlikely (err != 0))
@@ -744,8 +750,8 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
 
       /* We always create the thread stopped at startup so we can
 	 notify the debugger.  */
-      retval = create_thread (pd, iattr, &stopped_start,
-			      STACK_VARIABLES_ARGS, &thread_ran);
+      retval = create_thread (pd, iattr, &stopped_start, stackaddr,
+			      stacksize, &thread_ran);
       if (retval == 0)
 	{
 	  /* We retain ownership of PD until (a) (see CONCURRENCY NOTES
@@ -776,8 +782,8 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
 	}
     }
   else
-    retval = create_thread (pd, iattr, &stopped_start,
-			    STACK_VARIABLES_ARGS, &thread_ran);
+    retval = create_thread (pd, iattr, &stopped_start, stackaddr,
+			    stacksize, &thread_ran);
 
   /* Return to the previous signal mask, after creating the new
      thread.  */