/* spawn a new process running an executable. Hurd version.
Copyright (C) 2001-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation; either version 2.1 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, see . */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "spawn_int.h"
/* Spawn a new process executing PATH with the attributes describes in *ATTRP.
Before running the process perform the actions described in FILE-ACTIONS. */
int
__spawni (pid_t *pid, const char *file,
const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *attrp,
char *const argv[], char *const envp[],
int xflags)
{
pid_t new_pid;
char *path, *p, *name;
size_t len;
size_t pathlen;
short int flags;
/* The generic POSIX.1 implementation of posix_spawn uses fork and exec.
In traditional POSIX systems (Unix, Linux, etc), the only way to
create a new process is by fork, which also copies all the things from
the parent process that will be immediately wiped and replaced by the
exec.
This Hurd implementation works by doing an exec on a fresh task,
without ever doing all the work of fork. The only work done by fork
that remains visible after an exec is registration with the proc
server, and the inheritance of various values and ports. All those
inherited values and ports are what get collected up and passed in the
file_exec RPC by an exec call. So we do the proc server registration
here, following the model of fork (see fork.c). We then collect up
the inherited values and ports from this (parent) process following
the model of exec (see hurd/hurdexec.c), modify or replace each value
that fork would (plus the specific changes demanded by ATTRP and
FILE_ACTIONS), and make the file_exec RPC on the requested executable
file with the child process's task port rather than our own. This
should be indistinguishable from the fork + exec implementation,
except that all errors will be detected here (in the parent process)
and return proper errno codes rather than the child dying with 127.
XXX The one exception to this supposed indistinguishableness is that
when posix_spawn_file_actions_addopen has been used, the parent
process can do various filesystem RPCs on the child's behalf, rather
than the child process doing it. If these block due to a broken or
malicious filesystem server or just a blocked network fs or a serial
port waiting for carrier detect (!!), the parent's posix_spawn call
can block arbitrarily rather than just the child blocking. Possible
solutions include:
* punt to plain fork + exec implementation if addopen was used
** easy to do
** gives up all benefits of this implementation in that case
* if addopen was used, don't do any file actions at all here;
instead, exec an installed helper program e.g.:
/libexec/spawn-helper close 3 dup2 1 2 open 0 /file 0x123 0666 exec /bin/foo foo a1 a2
** extra exec might be more or less overhead than fork
* could do some weird half-fork thing where the child would inherit
our vm and run some code here, but not do the full work of fork
XXX Actually, the parent opens the executable file on behalf of
the child, and that has all the same issues.
I am favoring the half-fork solution. That is, we do task_create with
vm inheritance, and we setjmp/longjmp the child like fork does. But
rather than all the fork hair, the parent just packs up init/dtable
ports and does a single IPC to a receive right inserted in the child. */
error_t err;
task_t task;
file_t execfile;
process_t proc;
auth_t auth;
int ints[INIT_INT_MAX];
file_t *dtable;
unsigned int dtablesize, orig_dtablesize, i;
struct hurd_port **dtable_cells;
char *dtable_cloexec;
struct hurd_userlink *ulink_dtable = NULL;
struct hurd_sigstate *ss;
/* For POSIX_SPAWN_RESETIDS, this reauthenticates our root/current
directory ports with the new AUTH port. */
file_t rcrdir = MACH_PORT_NULL, rcwdir = MACH_PORT_NULL;
error_t reauthenticate (int which, file_t *result)
{
error_t err;
mach_port_t ref;
if (*result != MACH_PORT_NULL)
return 0;
ref = __mach_reply_port ();
err = HURD_PORT_USE
(&_hurd_ports[which],
({
err = __io_reauthenticate (port, ref, MACH_MSG_TYPE_MAKE_SEND);
if (!err)
err = __auth_user_authenticate (auth,
ref, MACH_MSG_TYPE_MAKE_SEND,
result);
err;
}));
__mach_port_destroy (__mach_task_self (), ref);
return err;
}
/* Reauthenticate one of our file descriptors for the child. A null
element of DTABLE_CELLS indicates a descriptor that was already
reauthenticated, or was newly opened on behalf of the child. */
error_t reauthenticate_fd (int fd)
{
if (dtable_cells[fd] != NULL)
{
file_t newfile;
mach_port_t ref = __mach_reply_port ();
error_t err = __io_reauthenticate (dtable[fd],
ref, MACH_MSG_TYPE_MAKE_SEND);
if (!err)
err = __auth_user_authenticate (auth,
ref, MACH_MSG_TYPE_MAKE_SEND,
&newfile);
__mach_port_destroy (__mach_task_self (), ref);
if (err)
return err;
_hurd_port_free (dtable_cells[fd], &ulink_dtable[fd], dtable[fd]);
dtable_cells[fd] = NULL;
dtable[fd] = newfile;
}
return 0;
}
/* These callbacks are for looking up file names on behalf of the child. */
error_t child_init_port (int which, error_t (*operate) (mach_port_t))
{
if (flags & POSIX_SPAWN_RESETIDS)
switch (which)
{
case INIT_PORT_AUTH:
return (*operate) (auth);
case INIT_PORT_CRDIR:
return (reauthenticate (INIT_PORT_CRDIR, &rcrdir)
?: (*operate) (rcrdir));
case INIT_PORT_CWDIR:
return (reauthenticate (INIT_PORT_CWDIR, &rcwdir)
?: (*operate) (rcwdir));
}
assert (which != INIT_PORT_PROC);
return _hurd_ports_use (which, operate);
}
file_t child_fd (int fd)
{
if ((unsigned int) fd < dtablesize && dtable[fd] != MACH_PORT_NULL)
{
if (flags & POSIX_SPAWN_RESETIDS)
{
/* Reauthenticate this descriptor right now,
since it is going to be used on behalf of the child. */
errno = reauthenticate_fd (fd);
if (errno)
return MACH_PORT_NULL;
}
__mach_port_mod_refs (__mach_task_self (), dtable[fd],
MACH_PORT_RIGHT_SEND, +1);
return dtable[fd];
}
errno = EBADF;
return MACH_PORT_NULL;
}
inline error_t child_lookup (const char *file, int oflag, mode_t mode,
file_t *result)
{
return __hurd_file_name_lookup (&child_init_port, &child_fd, 0,
file, oflag, mode, result);
}
/* Do this once. */
flags = attrp == NULL ? 0 : attrp->__flags;
/* Generate the new process. We create a task that does not inherit our
memory, and then register it as our child like fork does. See fork.c
for comments about the sequencing of these proc operations. */
err = __task_create (__mach_task_self (),
#ifdef KERN_INVALID_LEDGER
NULL, 0, /* OSF Mach */
#endif
0, &task);
if (err)
return __hurd_fail (err);
// From here down we must deallocate TASK and PROC before returning.
proc = MACH_PORT_NULL;
auth = MACH_PORT_NULL;
err = __USEPORT (PROC, __proc_task2pid (port, task, &new_pid));
if (!err)
err = __USEPORT (PROC, __proc_task2proc (port, task, &proc));
if (!err)
err = __USEPORT (PROC, __proc_child (port, task));
if (err)
goto out;
/* Load up the ints to give the new program. */
memset (ints, 0, sizeof ints);
ints[INIT_UMASK] = _hurd_umask;
ints[INIT_TRACEMASK] = _hurdsig_traced;
ss = _hurd_self_sigstate ();
assert (! __spin_lock_locked (&ss->critical_section_lock));
__spin_lock (&ss->critical_section_lock);
__spin_lock (&ss->lock);
ints[INIT_SIGMASK] = ss->blocked;
ints[INIT_SIGPENDING] = ss->pending;
ints[INIT_SIGIGN] = 0;
/* Unless we were asked to reset all handlers to SIG_DFL,
pass down the set of signals that were set to SIG_IGN. */
if ((flags & POSIX_SPAWN_SETSIGDEF) == 0)
for (i = 1; i < NSIG; ++i)
if (ss->actions[i].sa_handler == SIG_IGN)
ints[INIT_SIGIGN] |= __sigmask (i);
/* We hold the sigstate lock until the exec has failed so that no signal
can arrive between when we pack the blocked and ignored signals, and
when the exec actually happens. A signal handler could change what
signals are blocked and ignored. Either the change will be reflected
in the exec, or the signal will never be delivered. Setting the
critical section flag avoids anything we call trying to acquire the
sigstate lock. */
__spin_unlock (&ss->lock);
/* Set signal mask. */
if ((flags & POSIX_SPAWN_SETSIGMASK) != 0)
ints[INIT_SIGMASK] = attrp->__ss;
#ifdef _POSIX_PRIORITY_SCHEDULING
/* Set the scheduling algorithm and parameters. */
# error implement me
if ((flags & (POSIX_SPAWN_SETSCHEDPARAM | POSIX_SPAWN_SETSCHEDULER))
== POSIX_SPAWN_SETSCHEDPARAM)
{
if (__sched_setparam (0, &attrp->__sp) == -1)
_exit (SPAWN_ERROR);
}
else if ((flags & POSIX_SPAWN_SETSCHEDULER) != 0)
{
if (__sched_setscheduler (0, attrp->__policy,
(flags & POSIX_SPAWN_SETSCHEDPARAM) != 0
? &attrp->__sp : NULL) == -1)
_exit (SPAWN_ERROR);
}
#endif
/* Set the process group ID. */
if (!err && (flags & POSIX_SPAWN_SETPGROUP) != 0)
err = __proc_setpgrp (proc, new_pid, attrp->__pgrp);
/* Set the effective user and group IDs. */
if (!err && (flags & POSIX_SPAWN_RESETIDS) != 0)
{
/* We need a different auth port for the child. */
__mutex_lock (&_hurd_id.lock);
err = _hurd_check_ids (); /* Get _hurd_id up to date. */
if (!err && _hurd_id.rid_auth == MACH_PORT_NULL)
{
/* Set up _hurd_id.rid_auth. This is a special auth server port
which uses the real uid and gid (the first aux uid and gid) as
the only effective uid and gid. */
if (_hurd_id.aux.nuids < 1 || _hurd_id.aux.ngids < 1)
/* We do not have a real UID and GID. Lose, lose, lose! */
err = EGRATUITOUS;
/* Create a new auth port using our real UID and GID (the first
auxiliary UID and GID) as the only effective IDs. */
if (!err)
err = __USEPORT (AUTH,
__auth_makeauth (port,
NULL, MACH_MSG_TYPE_COPY_SEND, 0,
_hurd_id.aux.uids, 1,
_hurd_id.aux.uids,
_hurd_id.aux.nuids,
_hurd_id.aux.gids, 1,
_hurd_id.aux.gids,
_hurd_id.aux.ngids,
&_hurd_id.rid_auth));
}
if (!err)
{
/* Use the real-ID auth port in place of the normal one. */
assert (_hurd_id.rid_auth != MACH_PORT_NULL);
auth = _hurd_id.rid_auth;
__mach_port_mod_refs (__mach_task_self (), auth,
MACH_PORT_RIGHT_SEND, +1);
}
__mutex_unlock (&_hurd_id.lock);
}
else
/* Copy our existing auth port. */
err = __USEPORT (AUTH, __mach_port_mod_refs (__mach_task_self (),
(auth = port),
MACH_PORT_RIGHT_SEND, +1));
if (err)
goto out;
/* Pack up the descriptor table to give the new program.
These descriptors will need to be reauthenticated below
if POSIX_SPAWN_RESETIDS is set. */
__mutex_lock (&_hurd_dtable_lock);
dtablesize = _hurd_dtablesize;
orig_dtablesize = _hurd_dtablesize;
dtable = __alloca (dtablesize * sizeof (dtable[0]));
ulink_dtable = __alloca (dtablesize * sizeof (ulink_dtable[0]));
dtable_cells = __alloca (dtablesize * sizeof (dtable_cells[0]));
dtable_cloexec = __alloca (dtablesize);
for (i = 0; i < dtablesize; ++i)
{
struct hurd_fd *const d = _hurd_dtable[i];
if (d == NULL)
{
dtable[i] = MACH_PORT_NULL;
dtable_cells[i] = NULL;
continue;
}
/* Note that this might return MACH_PORT_NULL. */
dtable[i] = _hurd_port_get (&d->port, &ulink_dtable[i]);
dtable_cells[i] = &d->port;
dtable_cloexec[i] = (d->flags & FD_CLOEXEC) != 0;
}
__mutex_unlock (&_hurd_dtable_lock);
/* Safe to let signals happen now. */
_hurd_critical_section_unlock (ss);
/* Execute the file actions. */
if (file_actions != NULL)
for (i = 0; i < file_actions->__used; ++i)
{
/* Close a file descriptor in the child. */
error_t do_close (int fd)
{
if ((unsigned int)fd < dtablesize
&& dtable[fd] != MACH_PORT_NULL)
{
if (dtable_cells[fd] == NULL)
__mach_port_deallocate (__mach_task_self (), dtable[fd]);
else
{
_hurd_port_free (dtable_cells[fd],
&ulink_dtable[fd], dtable[fd]);
}
dtable_cells[fd] = NULL;
dtable[fd] = MACH_PORT_NULL;
return 0;
}
return EBADF;
}
/* Make sure the dtable can hold NEWFD. */
#define EXPAND_DTABLE(newfd) \
({ \
if ((unsigned int)newfd >= dtablesize \
&& newfd < _hurd_rlimits[RLIMIT_OFILE].rlim_cur) \
{ \
/* We need to expand the dtable for the child. */ \
NEW_TABLE (dtable, newfd); \
NEW_TABLE (ulink_dtable, newfd); \
NEW_TABLE (dtable_cells, newfd); \
dtablesize = newfd + 1; \
} \
((unsigned int)newfd < dtablesize ? 0 : EMFILE); \
})
#define NEW_TABLE(x, newfd) \
do { __typeof (x) new_##x = __alloca ((newfd + 1) * sizeof (x[0])); \
memcpy (new_##x, x, dtablesize * sizeof (x[0])); \
memset (&new_##x[dtablesize], 0, (newfd + 1 - dtablesize) * sizeof (x[0])); \
x = new_##x; } while (0)
struct __spawn_action *action = &file_actions->__actions[i];
switch (action->tag)
{
case spawn_do_close:
err = do_close (action->action.close_action.fd);
break;
case spawn_do_dup2:
if ((unsigned int)action->action.dup2_action.fd < dtablesize
&& dtable[action->action.dup2_action.fd] != MACH_PORT_NULL)
{
const int fd = action->action.dup2_action.fd;
const int newfd = action->action.dup2_action.newfd;
// dup2 always clears any old FD_CLOEXEC flag on the new fd.
if (newfd < orig_dtablesize)
dtable_cloexec[newfd] = 0;
if (fd == newfd)
// Same is same as same was.
break;
err = EXPAND_DTABLE (newfd);
if (!err)
{
/* Close the old NEWFD and replace it with FD's
contents, which can be either an original
descriptor (DTABLE_CELLS[FD] != 0) or a new
right that we acquired in this function. */
do_close (newfd);
dtable_cells[newfd] = dtable_cells[fd];
if (dtable_cells[newfd] != NULL)
dtable[newfd] = _hurd_port_get (dtable_cells[newfd],
&ulink_dtable[newfd]);
else
{
dtable[newfd] = dtable[fd];
err = __mach_port_mod_refs (__mach_task_self (),
dtable[fd],
MACH_PORT_RIGHT_SEND, +1);
}
}
}
else
// The old FD specified was bogus.
err = EBADF;
break;
case spawn_do_open:
/* Open a file on behalf of the child.
XXX note that this can subject the parent to arbitrary
delays waiting for the files to open. I don't know what the
spec says about this. If it's not permissible, then this
whole forkless implementation is probably untenable. */
{
const int fd = action->action.open_action.fd;
do_close (fd);
if (fd < orig_dtablesize)
dtable_cloexec[fd] = 0;
err = EXPAND_DTABLE (fd);
if (err)
break;
err = child_lookup (action->action.open_action.path,
action->action.open_action.oflag,
action->action.open_action.mode,
&dtable[fd]);
dtable_cells[fd] = NULL;
break;
}
}
if (err)
goto out;
}
/* Only now can we perform FD_CLOEXEC. We had to leave the descriptors
unmolested for the file actions to use. Note that the DTABLE_CLOEXEC
array is never expanded by file actions, so it might now have fewer
than DTABLESIZE elements. */
for (i = 0; i < orig_dtablesize; ++i)
if (dtable[i] != MACH_PORT_NULL && dtable_cloexec[i])
{
assert (dtable_cells[i] != NULL);
_hurd_port_free (dtable_cells[i], &ulink_dtable[i], dtable[i]);
dtable[i] = MACH_PORT_NULL;
}
/* Prune trailing null ports from the descriptor table. */
while (dtablesize > 0 && dtable[dtablesize - 1] == MACH_PORT_NULL)
--dtablesize;
if (flags & POSIX_SPAWN_RESETIDS)
{
/* Reauthenticate all the child's ports with its new auth handle. */
mach_port_t ref;
process_t newproc;
/* Reauthenticate with the proc server. */
ref = __mach_reply_port ();
err = __proc_reauthenticate (proc, ref, MACH_MSG_TYPE_MAKE_SEND);
if (!err)
err = __auth_user_authenticate (auth,
ref, MACH_MSG_TYPE_MAKE_SEND,
&newproc);
__mach_port_destroy (__mach_task_self (), ref);
if (!err)
{
__mach_port_deallocate (__mach_task_self (), proc);
proc = newproc;
}
if (!err)
err = reauthenticate (INIT_PORT_CRDIR, &rcrdir);
if (!err)
err = reauthenticate (INIT_PORT_CWDIR, &rcwdir);
/* We must reauthenticate all the fds except those that came from
`spawn_do_open' file actions, which were opened using the child's
auth port to begin with. */
for (i = 0; !err && i < dtablesize; ++i)
err = reauthenticate_fd (i);
}
if (err)
goto out;
/* Now we are ready to open the executable file using the child's ports.
We do this after performing all the file actions so the order of
events is the same as for a fork, exec sequence. This affects things
like the meaning of a /dev/fd file name, as well as which error
conditions are diagnosed first and what side effects (file creation,
etc) can be observed before what errors. */
if ((xflags & SPAWN_XFLAGS_USE_PATH) == 0 || strchr (file, '/') != NULL)
/* The FILE parameter is actually a path. */
err = child_lookup (file, O_EXEC, 0, &execfile);
else
{
/* We have to search for FILE on the path. */
path = getenv ("PATH");
if (path == NULL)
{
/* There is no `PATH' in the environment.
The default search path is the current directory
followed by the path `confstr' returns for `_CS_PATH'. */
len = confstr (_CS_PATH, (char *) NULL, 0);
path = (char *) __alloca (1 + len);
path[0] = ':';
(void) confstr (_CS_PATH, path + 1, len);
}
len = strlen (file) + 1;
pathlen = strlen (path);
name = __alloca (pathlen + len + 1);
/* Copy the file name at the top. */
name = (char *) memcpy (name + pathlen + 1, file, len);
/* And add the slash. */
*--name = '/';
p = path;
do
{
char *startp;
path = p;
p = __strchrnul (path, ':');
if (p == path)
/* Two adjacent colons, or a colon at the beginning or the end
of `PATH' means to search the current directory. */
startp = name + 1;
else
startp = (char *) memcpy (name - (p - path), path, p - path);
/* Try to open this file name. */
err = child_lookup (startp, O_EXEC, 0, &execfile);
switch (err)
{
case EACCES:
case ENOENT:
case ESTALE:
case ENOTDIR:
/* Those errors indicate the file is missing or not executable
by us, in which case we want to just try the next path
directory. */
continue;
case 0: /* Success! */
default:
/* Some other error means we found an executable file, but
something went wrong executing it; return the error to our
caller. */
break;
}
// We only get here when we are done looking for the file.
break;
}
while (*p++ != '\0');
}
if (err)
goto out;
/* Almost there! */
{
mach_port_t ports[_hurd_nports];
struct hurd_userlink ulink_ports[_hurd_nports];
char *args = NULL, *env = NULL;
size_t argslen = 0, envlen = 0;
inline error_t exec (file_t file)
{
return __file_exec (file, task,
(__sigismember (&_hurdsig_traced, SIGKILL)
? EXEC_SIGTRAP : 0),
args, argslen, env, envlen,
dtable, MACH_MSG_TYPE_COPY_SEND, dtablesize,
ports, MACH_MSG_TYPE_COPY_SEND, _hurd_nports,
ints, INIT_INT_MAX,
NULL, 0, NULL, 0);
}
/* Now we are out of things that can fail before the file_exec RPC,
for which everything else must be prepared. The only thing left
to do is packing up the argument and environment strings,
and the array of init ports. */
if (argv != NULL)
err = __argz_create (argv, &args, &argslen);
if (!err && envp != NULL)
err = __argz_create (envp, &env, &envlen);
/* Load up the ports to give to the new program.
Note the loop/switch below must parallel exactly to release refs. */
for (i = 0; i < _hurd_nports; ++i)
{
switch (i)
{
case INIT_PORT_AUTH:
ports[i] = auth;
continue;
case INIT_PORT_PROC:
ports[i] = proc;
continue;
case INIT_PORT_CRDIR:
if (flags & POSIX_SPAWN_RESETIDS)
{
ports[i] = rcrdir;
continue;
}
break;
case INIT_PORT_CWDIR:
if (flags & POSIX_SPAWN_RESETIDS)
{
ports[i] = rcwdir;
continue;
}
break;
}
ports[i] = _hurd_port_get (&_hurd_ports[i], &ulink_ports[i]);
}
/* Finally, try executing the file we opened. */
if (!err)
err = exec (execfile);
__mach_port_deallocate (__mach_task_self (), execfile);
if (err == ENOEXEC)
{
/* The file is accessible but it is not an executable file.
Invoke the shell to interpret it as a script. */
err = __argz_insert (&args, &argslen, args, _PATH_BSHELL);
if (!err)
err = child_lookup (_PATH_BSHELL, O_EXEC, 0, &execfile);
if (!err)
{
err = exec (execfile);
__mach_port_deallocate (__mach_task_self (), execfile);
}
}
/* Release the references just packed up in PORTS.
This switch must always parallel the one above that fills PORTS. */
for (i = 0; i < _hurd_nports; ++i)
{
switch (i)
{
case INIT_PORT_AUTH:
case INIT_PORT_PROC:
continue;
case INIT_PORT_CRDIR:
if (flags & POSIX_SPAWN_RESETIDS)
continue;
break;
case INIT_PORT_CWDIR:
if (flags & POSIX_SPAWN_RESETIDS)
continue;
break;
}
_hurd_port_free (&_hurd_ports[i], &ulink_ports[i], ports[i]);
}
free (args);
free (env);
}
/* We did it! We have a child! */
if (pid != NULL)
*pid = new_pid;
out:
/* Clean up all the references we are now holding. */
if (task != MACH_PORT_NULL)
{
if (err)
/* We failed after creating the task, so kill it. */
__task_terminate (task);
__mach_port_deallocate (__mach_task_self (), task);
}
__mach_port_deallocate (__mach_task_self (), auth);
__mach_port_deallocate (__mach_task_self (), proc);
if (rcrdir != MACH_PORT_NULL)
__mach_port_deallocate (__mach_task_self (), rcrdir);
if (rcwdir != MACH_PORT_NULL)
__mach_port_deallocate (__mach_task_self (), rcwdir);
if (ulink_dtable)
/* Release references to the file descriptor ports. */
for (i = 0; i < dtablesize; ++i)
if (dtable[i] != MACH_PORT_NULL)
{
if (dtable_cells[i] == NULL)
__mach_port_deallocate (__mach_task_self (), dtable[i]);
else
_hurd_port_free (dtable_cells[i], &ulink_dtable[i], dtable[i]);
}
if (err)
/* This hack canonicalizes the error code that we return. */
err = (__hurd_fail (err), errno);
return err;
}