about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--include/clone_internal.h16
-rw-r--r--nptl/allocatestack.c59
-rw-r--r--nptl/pthread_create.c38
-rw-r--r--sysdeps/unix/sysv/linux/Makefile3
-rw-r--r--sysdeps/unix/sysv/linux/clone-internal.c91
-rw-r--r--sysdeps/unix/sysv/linux/clone3.c1
-rw-r--r--sysdeps/unix/sysv/linux/clone3.h67
-rw-r--r--sysdeps/unix/sysv/linux/spawni.c26
8 files changed, 213 insertions, 88 deletions
diff --git a/include/clone_internal.h b/include/clone_internal.h
new file mode 100644
index 0000000000..4b23ef33ce
--- /dev/null
+++ b/include/clone_internal.h
@@ -0,0 +1,16 @@
+#ifndef _CLONE3_H
+#include_next <clone3.h>
+
+extern __typeof (clone3) __clone3;
+
+/* The internal wrapper of clone/clone2 and clone3.  If __clone3 returns
+   -1 with ENOSYS, fall back to clone or clone2.  */
+extern int __clone_internal (struct clone_args *__cl_args,
+			     int (*__func) (void *__arg), void *__arg);
+
+#ifndef _ISOMAC
+libc_hidden_proto (__clone3)
+libc_hidden_proto (__clone_internal)
+#endif
+
+#endif
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 9be6c42894..cfe37a3443 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -33,47 +33,6 @@
 #include <kernel-features.h>
 #include <nptl-stack.h>
 
-#ifndef NEED_SEPARATE_REGISTER_STACK
-
-/* Most architectures have exactly one stack pointer.  Some have more.  */
-# define STACK_VARIABLES void *stackaddr = NULL
-
-/* How to pass the values to the 'create_thread' function.  */
-# define STACK_VARIABLES_ARGS stackaddr
-
-/* How to declare function which gets there parameters.  */
-# define STACK_VARIABLES_PARMS void *stackaddr
-
-/* How to declare allocate_stack.  */
-# define ALLOCATE_STACK_PARMS void **stack
-
-/* This is how the function is called.  We do it this way to allow
-   other variants of the function to have more parameters.  */
-# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
-
-#else
-
-/* We need two stacks.  The kernel will place them but we have to tell
-   the kernel about the size of the reserved address space.  */
-# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
-
-/* How to pass the values to the 'create_thread' function.  */
-# define STACK_VARIABLES_ARGS stackaddr, stacksize
-
-/* How to declare function which gets there parameters.  */
-# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
-
-/* How to declare allocate_stack.  */
-# define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
-
-/* This is how the function is called.  We do it this way to allow
-   other variants of the function to have more parameters.  */
-# define ALLOCATE_STACK(attr, pd) \
-  allocate_stack (attr, pd, &stackaddr, &stacksize)
-
-#endif
-
-
 /* Default alignment of stack.  */
 #ifndef STACK_ALIGN
 # define STACK_ALIGN __alignof__ (long double)
@@ -252,7 +211,7 @@ advise_stack_range (void *mem, size_t size, uintptr_t pd, size_t guardsize)
    PDP must be non-NULL.  */
 static int
 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
-		ALLOCATE_STACK_PARMS)
+		void **stack, size_t *stacksize)
 {
   struct pthread *pd;
   size_t size;
@@ -603,25 +562,17 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
   /* We place the thread descriptor at the end of the stack.  */
   *pdp = pd;
 
-#if _STACK_GROWS_DOWN
   void *stacktop;
 
-# if TLS_TCB_AT_TP
+#if TLS_TCB_AT_TP
   /* The stack begins before the TCB and the static TLS block.  */
   stacktop = ((char *) (pd + 1) - tls_static_size_for_stack);
-# elif TLS_DTV_AT_TP
+#elif TLS_DTV_AT_TP
   stacktop = (char *) (pd - 1);
-# endif
+#endif
 
-# ifdef NEED_SEPARATE_REGISTER_STACK
+  *stacksize = stacktop - pd->stackblock;
   *stack = pd->stackblock;
-  *stacksize = stacktop - *stack;
-# else
-  *stack = stacktop;
-# endif
-#else
-  *stack = pd->stackblock;
-#endif
 
   return 0;
 }
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index 440adc2a6f..d8ec299cb1 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -36,6 +36,7 @@
 #include "libioP.h"
 #include <sys/single_threaded.h>
 #include <version.h>
+#include <clone_internal.h>
 
 #include <shlib-compat.h>
 
@@ -227,8 +228,8 @@ late_init (void)
 static int _Noreturn start_thread (void *arg);
 
 static int create_thread (struct pthread *pd, const struct pthread_attr *attr,
-			  bool *stopped_start, STACK_VARIABLES_PARMS,
-			  bool *thread_ran)
+			  bool *stopped_start, void *stackaddr,
+			  size_t stacksize, bool *thread_ran)
 {
   /* Determine whether the newly created threads has to be started
      stopped since we have to set the scheduling parameters or set the
@@ -280,14 +281,18 @@ static int create_thread (struct pthread *pd, const struct pthread_attr *attr,
 
   TLS_DEFINE_INIT_TP (tp, pd);
 
-#ifdef __NR_clone2
-# define ARCH_CLONE __clone2
-#else
-# define ARCH_CLONE __clone
-#endif
-  if (__glibc_unlikely (ARCH_CLONE (&start_thread, STACK_VARIABLES_ARGS,
-				    clone_flags, pd, &pd->tid, tp, &pd->tid)
-			== -1))
+  struct clone_args args =
+    {
+      .flags = clone_flags,
+      .pidfd = (uintptr_t) &pd->tid,
+      .parent_tid = (uintptr_t) &pd->tid,
+      .child_tid = (uintptr_t) &pd->tid,
+      .stack = (uintptr_t) stackaddr,
+      .stack_size = stacksize,
+      .tls = (uintptr_t) tp,
+    };
+  int ret = __clone_internal (&args, &start_thread, pd);
+  if (__glibc_unlikely (ret == -1))
     return errno;
 
   /* It's started now, so if we fail below, we'll have to let it clean itself
@@ -576,7 +581,8 @@ int
 __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
 		      void *(*start_routine) (void *), void *arg)
 {
-  STACK_VARIABLES;
+  void *stackaddr = NULL;
+  size_t stacksize = 0;
 
   /* Avoid a data race in the multi-threaded case, and call the
      deferred initialization only once.  */
@@ -600,7 +606,7 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
     }
 
   struct pthread *pd = NULL;
-  int err = ALLOCATE_STACK (iattr, &pd);
+  int err = allocate_stack (iattr, &pd, &stackaddr, &stacksize);
   int retval = 0;
 
   if (__glibc_unlikely (err != 0))
@@ -744,8 +750,8 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
 
       /* We always create the thread stopped at startup so we can
 	 notify the debugger.  */
-      retval = create_thread (pd, iattr, &stopped_start,
-			      STACK_VARIABLES_ARGS, &thread_ran);
+      retval = create_thread (pd, iattr, &stopped_start, stackaddr,
+			      stacksize, &thread_ran);
       if (retval == 0)
 	{
 	  /* We retain ownership of PD until (a) (see CONCURRENCY NOTES
@@ -776,8 +782,8 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
 	}
     }
   else
-    retval = create_thread (pd, iattr, &stopped_start,
-			    STACK_VARIABLES_ARGS, &thread_ran);
+    retval = create_thread (pd, iattr, &stopped_start, stackaddr,
+			    stacksize, &thread_ran);
 
   /* Return to the previous signal mask, after creating the new
      thread.  */
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index feb8fd4ce1..ed0c0d27f4 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -65,7 +65,8 @@ sysdep_routines += adjtimex clone umount umount2 readahead sysctl \
 		   xstat fxstat lxstat xstat64 fxstat64 lxstat64 \
 		   fxstatat fxstatat64 \
 		   xmknod xmknodat convert_scm_timestamps \
-		   closefrom_fallback
+		   closefrom_fallback \
+		   clone3 clone-internal
 
 CFLAGS-gethostid.c = -fexceptions
 CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables
diff --git a/sysdeps/unix/sysv/linux/clone-internal.c b/sysdeps/unix/sysv/linux/clone-internal.c
new file mode 100644
index 0000000000..1e7a8f6b35
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/clone-internal.c
@@ -0,0 +1,91 @@
+/* The internal wrapper of clone and clone3.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <stddef.h>
+#include <errno.h>
+#include <sched.h>
+#include <clone_internal.h>
+#include <libc-pointer-arith.h>	/* For cast_to_pointer.  */
+#include <stackinfo.h>		/* For _STACK_GROWS_{UP,DOWN}.  */
+
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
+#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
+#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
+
+#define sizeof_field(TYPE, MEMBER) sizeof ((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+  (offsetof (TYPE, MEMBER) + sizeof_field (TYPE, MEMBER))
+
+_Static_assert (__alignof (struct clone_args) == 8,
+		"__alignof (struct clone_args) != 8");
+_Static_assert (offsetofend (struct clone_args, tls) == CLONE_ARGS_SIZE_VER0,
+		"offsetofend (struct clone_args, tls) != CLONE_ARGS_SIZE_VER0");
+_Static_assert (offsetofend (struct clone_args, set_tid_size) == CLONE_ARGS_SIZE_VER1,
+		"offsetofend (struct clone_args, set_tid_size) != CLONE_ARGS_SIZE_VER1");
+_Static_assert (offsetofend (struct clone_args, cgroup) == CLONE_ARGS_SIZE_VER2,
+		"offsetofend (struct clone_args, cgroup) != CLONE_ARGS_SIZE_VER2");
+_Static_assert (sizeof (struct clone_args) == CLONE_ARGS_SIZE_VER2,
+		"sizeof (struct clone_args) != CLONE_ARGS_SIZE_VER2");
+
+int
+__clone_internal (struct clone_args *cl_args,
+		  int (*func) (void *arg), void *arg)
+{
+  int ret;
+#ifdef HAVE_CLONE3_WAPPER
+  /* Try clone3 first.  */
+  int saved_errno = errno;
+  ret = __clone3 (cl_args, sizeof (*cl_args), func, arg);
+  if (ret != -1 || errno != ENOSYS)
+    return ret;
+
+  /* NB: Restore errno since errno may be checked against non-zero
+     return value.  */
+  __set_errno (saved_errno);
+#endif
+
+  /* Map clone3 arguments to clone arguments.  NB: No need to check
+     invalid clone3 specific bits in flags nor exit_signal since this
+     is an internal function.  */
+  int flags = cl_args->flags | cl_args->exit_signal;
+  void *stack = cast_to_pointer (cl_args->stack);
+
+#ifdef __ia64__
+  ret = __clone2 (func, stack, cl_args->stack_size,
+		  flags, arg,
+		  cast_to_pointer (cl_args->parent_tid),
+		  cast_to_pointer (cl_args->tls),
+		  cast_to_pointer (cl_args->child_tid));
+#else
+# if !_STACK_GROWS_DOWN && !_STACK_GROWS_UP
+#  error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
+# endif
+
+# if _STACK_GROWS_DOWN
+  stack += cl_args->stack_size;
+# endif
+  ret = __clone (func, stack, flags, arg,
+		 cast_to_pointer (cl_args->parent_tid),
+		 cast_to_pointer (cl_args->tls),
+		 cast_to_pointer (cl_args->child_tid));
+#endif
+  return ret;
+}
+
+libc_hidden_def (__clone_internal)
diff --git a/sysdeps/unix/sysv/linux/clone3.c b/sysdeps/unix/sysv/linux/clone3.c
new file mode 100644
index 0000000000..de963ef89d
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/clone3.c
@@ -0,0 +1 @@
+/* An empty placeholder.  */
diff --git a/sysdeps/unix/sysv/linux/clone3.h b/sysdeps/unix/sysv/linux/clone3.h
new file mode 100644
index 0000000000..1e35ff6422
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/clone3.h
@@ -0,0 +1,67 @@
+/* The wrapper of clone3.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _CLONE3_H
+#define _CLONE3_H	1
+
+#include <features.h>
+#include <stddef.h>
+#include <bits/types.h>
+
+__BEGIN_DECLS
+
+/* The unsigned 64-bit and 8-byte aligned integer type.  */
+typedef __U64_TYPE __aligned_uint64_t __attribute__ ((__aligned__ (8)));
+
+/* This struct should only be used in an argument to the clone3 system
+   call (along with its size argument).  It may be extended with new
+   fields in the future.  */
+
+struct clone_args
+{
+  /* Flags bit mask.  */
+  __aligned_uint64_t flags;
+  /* Where to store PID file descriptor (pid_t *).  */
+  __aligned_uint64_t pidfd;
+  /* Where to store child TID, in child's memory (pid_t *).  */
+  __aligned_uint64_t child_tid;
+  /* Where to store child TID, in parent's memory (int *). */
+  __aligned_uint64_t parent_tid;
+  /* Signal to deliver to parent on child termination */
+  __aligned_uint64_t exit_signal;
+  /* The lowest address of stack.  */
+  __aligned_uint64_t stack;
+  /* Size of stack.  */
+  __aligned_uint64_t stack_size;
+  /* Location of new TLS.  */
+  __aligned_uint64_t tls;
+  /* Pointer to a pid_t array (since Linux 5.5).  */
+  __aligned_uint64_t set_tid;
+  /* Number of elements in set_tid (since Linux 5.5). */
+  __aligned_uint64_t set_tid_size;
+  /* File descriptor for target cgroup of child (since Linux 5.7).  */
+  __aligned_uint64_t cgroup;
+};
+
+/* The wrapper of clone3.  */
+extern int clone3 (struct clone_args *__cl_args, size_t __size,
+		   int (*__func) (void *__arg), void *__arg);
+
+__END_DECLS
+
+#endif /* clone3.h */
diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c
index f7e7353a05..6b0bade4d4 100644
--- a/sysdeps/unix/sysv/linux/spawni.c
+++ b/sysdeps/unix/sysv/linux/spawni.c
@@ -26,6 +26,7 @@
 #include <spawn_int.h>
 #include <sysdep.h>
 #include <sys/resource.h>
+#include <clone_internal.h>
 
 /* The Linux implementation of posix_spawn{p} uses the clone syscall directly
    with CLONE_VM and CLONE_VFORK flags and an allocated stack.  The new stack
@@ -53,21 +54,6 @@
    normal program exit with the exit code 127.  */
 #define SPAWN_ERROR	127
 
-#ifdef __ia64__
-# define CLONE(__fn, __stackbase, __stacksize, __flags, __args) \
-  __clone2 (__fn, __stackbase, __stacksize, __flags, __args, 0, 0, 0)
-#else
-# define CLONE(__fn, __stack, __stacksize, __flags, __args) \
-  __clone (__fn, __stack, __flags, __args)
-#endif
-
-/* Since ia64 wants the stackbase w/clone2, re-use the grows-up macro.  */
-#if _STACK_GROWS_UP || defined (__ia64__)
-# define STACK(__stack, __stack_size) (__stack)
-#elif _STACK_GROWS_DOWN
-# define STACK(__stack, __stack_size) (__stack + __stack_size)
-#endif
-
 
 struct posix_spawn_args
 {
@@ -382,8 +368,14 @@ __spawnix (pid_t * pid, const char *file,
      need for CLONE_SETTLS.  Although parent and child share the same TLS
      namespace, there will be no concurrent access for TLS variables (errno
      for instance).  */
-  new_pid = CLONE (__spawni_child, STACK (stack, stack_size), stack_size,
-		   CLONE_VM | CLONE_VFORK | SIGCHLD, &args);
+  struct clone_args clone_args =
+    {
+      .flags = CLONE_VM | CLONE_VFORK,
+      .exit_signal = SIGCHLD,
+      .stack = (uintptr_t) stack,
+      .stack_size = stack_size,
+    };
+  new_pid = __clone_internal (&clone_args, __spawni_child, &args);
 
   /* It needs to collect the case where the auxiliary process was created
      but failed to execute the file (due either any preparation step or