about summary refs log tree commit diff
path: root/src/process/posix_spawn.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/process/posix_spawn.c')
-rw-r--r--src/process/posix_spawn.c174
1 files changed, 122 insertions, 52 deletions
diff --git a/src/process/posix_spawn.c b/src/process/posix_spawn.c
index 5eb516b0..c55907d3 100644
--- a/src/process/posix_spawn.c
+++ b/src/process/posix_spawn.c
@@ -1,51 +1,47 @@
+#define _GNU_SOURCE
 #include <spawn.h>
+#include <sched.h>
 #include <unistd.h>
 #include <signal.h>
-#include <stdint.h>
 #include <fcntl.h>
+#include <sys/wait.h>
 #include "syscall.h"
 #include "pthread_impl.h"
 #include "fdop.h"
 #include "libc.h"
 
-extern char **environ;
-
 static void dummy_0()
 {
 }
 weak_alias(dummy_0, __acquire_ptc);
 weak_alias(dummy_0, __release_ptc);
 
-pid_t __vfork(void);
-
-int __posix_spawnx(pid_t *restrict res, const char *restrict path,
-	int (*exec)(const char *, char *const *, char *const *),
-	const posix_spawn_file_actions_t *fa,
-	const posix_spawnattr_t *restrict attr,
-	char *const argv[restrict], char *const envp[restrict])
-{
-	pid_t pid;
+struct args {
+	int p[2];
 	sigset_t oldmask;
-	int i;
-	posix_spawnattr_t dummy_attr = { 0 };
-
-	if (!attr) attr = &dummy_attr;
-
-	sigprocmask(SIG_BLOCK, SIGALL_SET, &oldmask);
-
-	__acquire_ptc();
-	pid = __vfork();
-
-	if (pid) {
-		__release_ptc();
-		sigprocmask(SIG_SETMASK, &oldmask, 0);
-		if (pid < 0) return -pid;
-		*res = pid;
-		return 0;
-	}
-
-	for (i=1; i<=8*__SYSCALL_SSLEN; i++) {
-		struct sigaction sa;
+	const char *path;
+	int (*exec)(const char *, char *const *, char *const *);
+	const posix_spawn_file_actions_t *fa;
+	const posix_spawnattr_t *restrict attr;
+	char *const *argv, *const *envp;
+};
+
+static int child(void *args_vp)
+{
+	int i, ret;
+	struct sigaction sa;
+	struct args *args = args_vp;
+	int p = args->p[1];
+	const posix_spawn_file_actions_t *fa = args->fa;
+	const posix_spawnattr_t *restrict attr = args->attr;
+
+	close(args->p[0]);
+
+	/* All signal dispositions must be either SIG_DFL or SIG_IGN
+	 * before signals are unblocked. Otherwise a signal handler
+	 * from the parent might get run in the child while sharing
+	 * memory, with unpredictable and dangerous results. */
+	for (i=1; i<_NSIG; i++) {
 		__libc_sigaction(i, 0, &sa);
 		if (sa.sa_handler!=SIG_DFL && (sa.sa_handler!=SIG_IGN ||
 		    ((attr->__flags & POSIX_SPAWN_SETSIGDEF)
@@ -55,50 +51,124 @@ int __posix_spawnx(pid_t *restrict res, const char *restrict path,
 		}
 	}
 
-	if ((attr->__flags&POSIX_SPAWN_SETPGROUP) && setpgid(0, attr->__pgrp))
-		_exit(127);
+	if (attr->__flags & POSIX_SPAWN_SETPGROUP)
+		if ((ret=__syscall(SYS_setpgid, 0, attr->__pgrp)))
+			goto fail;
 
-	/* Use syscalls directly because pthread state is not consistent
-	 * for making calls to the library wrappers... */
-	if ((attr->__flags&POSIX_SPAWN_RESETIDS) && (
-		__syscall(SYS_setgid, __syscall(SYS_getgid)) ||
-		__syscall(SYS_setuid, __syscall(SYS_getuid)) ))
-		_exit(127);
+	/* Use syscalls directly because pthread state because the
+	 * library functions attempt to do a multi-threaded synchronized
+	 * id-change, which would trash the parent's state. */
+	if (attr->__flags & POSIX_SPAWN_RESETIDS)
+		if ((ret=__syscall(SYS_setgid, __syscall(SYS_getgid))) ||
+		    (ret=__syscall(SYS_setuid, __syscall(SYS_getuid))) )
+			goto fail;
 
 	if (fa && fa->__actions) {
 		struct fdop *op;
-		int ret, fd;
+		int fd;
 		for (op = fa->__actions; op->next; op = op->next);
 		for (; op; op = op->prev) {
+			/* It's possible that a file operation would clobber
+			 * the pipe fd used for synchronizing with the
+			 * parent. To avoid that, we dup the pipe onto
+			 * an unoccupied fd. */
+			if (op->fd == p) {
+				ret = __syscall(SYS_dup, p);
+				if (ret < 0) goto fail;
+				__syscall(SYS_close, p);
+				p = ret;
+			}
 			switch(op->cmd) {
 			case FDOP_CLOSE:
-				ret = __syscall(SYS_close, op->fd);
+				if ((ret=__syscall(SYS_close, op->fd)))
+					goto fail;
 				break;
 			case FDOP_DUP2:
-				ret = __syscall(SYS_dup2, op->fd, op->newfd)<0;
+				if ((ret=__syscall(SYS_dup2, op->srcfd, op->fd))<0)
+					goto fail;
 				break;
 			case FDOP_OPEN:
 				fd = __syscall(SYS_open, op->path,
 					op->oflag | O_LARGEFILE, op->mode);
-				if (fd == op->fd) {
-					ret = 0;
-				} else {
-					ret = __syscall(SYS_dup2, fd, op->fd)<0;
+				if ((ret=fd) < 0) goto fail;
+				if (fd != op->fd) {
+					if ((ret=__syscall(SYS_dup2, fd, op->fd))<0)
+						goto fail;
 					__syscall(SYS_close, fd);
 				}
 				break;
 			}
-			if (ret) _exit(127);
 		}
 	}
 
-	sigprocmask(SIG_SETMASK, (attr->__flags & POSIX_SPAWN_SETSIGMASK)
-		? &attr->__mask : &oldmask, 0);
+	/* Close-on-exec flag may have been lost if we moved the pipe
+	 * to a different fd. We don't use F_DUPFD_CLOEXEC above because
+	 * it would fail on older kernels and atomicity is not needed --
+	 * in this process there are no threads or signal handlers. */
+	__syscall(SYS_fcntl, p, F_SETFD, FD_CLOEXEC);
 
-	exec(path, argv, envp ? envp : environ);
+	pthread_sigmask(SIG_SETMASK, (attr->__flags & POSIX_SPAWN_SETSIGMASK)
+		? &attr->__mask : &args->oldmask, 0);
+
+	args->exec(args->path, args->argv, args->envp);
+
+fail:
+	/* Since sizeof errno < PIPE_BUF, the write is atomic. */
+	ret = -ret;
+	if (ret) while (write(p, &ret, sizeof ret) < 0);
 	_exit(127);
+}
+
+
+int __posix_spawnx(pid_t *restrict res, const char *restrict path,
+	int (*exec)(const char *, char *const *, char *const *),
+	const posix_spawn_file_actions_t *fa,
+	const posix_spawnattr_t *restrict attr,
+	char *const argv[restrict], char *const envp[restrict])
+{
+	pid_t pid;
+	char stack[1024];
+	int ec=0, cs;
+	struct args args;
+
+	if (pipe2(args.p, O_CLOEXEC))
+		return errno;
+
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+
+	args.path = path;
+	args.exec = exec;
+	args.fa = fa;
+	args.attr = attr ? attr : &(const posix_spawnattr_t){0};
+	args.argv = argv;
+	args.envp = envp;
+	pthread_sigmask(SIG_BLOCK, SIGALL_SET, &args.oldmask);
+
+	/* This lock prevents setuid/setgid operations while the parent
+	 * is sharing memory with the child. Situations where processes
+	 * with different permissions share VM are fundamentally unsafe. */
+	__acquire_ptc();
+	pid = __clone(child, stack+sizeof stack, CLONE_VM|SIGCHLD, &args);
+	close(args.p[1]);
+
+	if (pid > 0) {
+		if (read(args.p[0], &ec, sizeof ec) < sizeof ec) ec = 0;
+		else waitpid(pid, &(int){0}, 0);
+	} else {
+		ec = -pid;
+	}
+
+	/* At this point, the child has either exited or successfully
+	 * performed exec, so the lock may be released. */
+	__release_ptc();
+	close(args.p[0]);
+
+	if (!ec) *res = pid;
+
+	pthread_sigmask(SIG_SETMASK, &args.oldmask, 0);
+	pthread_setcancelstate(cs, 0);
 
-	return 0;
+	return ec;
 }
 
 int posix_spawn(pid_t *restrict res, const char *restrict path,