/* accept4 */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_SV 512 // ## not truly portable :/ #define LOG(fmt, ...) dprintf(selflogfd[1], fmt "\n", ## __VA_ARGS__); enum { UNDEF, UP, DOWN, }; struct entry { char level; char name[62]; char logged; pid_t pid; pid_t display_pid; // not reset to 0, needed for oneshots int logfd[2]; time_t start; int status; char state; }; struct entry services[MAX_SV]; int level; pthread_t main_thread; pthread_t socket_thread; pthread_t logger_thread; int selflogfd[2]; int newlogfd[2]; sig_atomic_t want_shutdown; sig_atomic_t want_rescan; int want_reboot = 0; void on_sigint(int sig) { (void)sig; want_shutdown = 1; } void on_sigcont(int sig) { (void)sig; /* do nothing, but interrupt the system call */ } void restart(int i) { if (services[i].pid > 0) return; if (services[i].state != UP) return; int delay = 0; time_t now = time(0); if (services[i].start == now) delay = 1; int loggerpipe[2] = { -1, -1 }; if (services[i].logfd[0] == -1) { pipe(services[i].logfd); fcntl(services[i].logfd[0], F_SETFL, O_NONBLOCK); write(newlogfd[1], "!", 1); } if (services[i].name[2] == 'L') { for (int j = 0; j < MAX_SV; j++) { if (strcmp(services[i].name + 3, services[j].name + 3) == 0 && services[j].name[2] == 'D') { if (services[j].logfd[0] == -1) { // hook up logger pipe(loggerpipe); services[j].logfd[0] = loggerpipe[0]; services[j].logfd[1] = loggerpipe[1]; services[j].logged = 1; } else { // recover loggerpipe loggerpipe[0] = services[j].logfd[0]; loggerpipe[1] = services[j].logfd[1]; } break; } } } pid_t child = fork(); if (child == 0) { dup2(services[i].logfd[1], 1); if (loggerpipe[0] != -1) { // loggers get read end of loggerpipe dup2(loggerpipe[0], 0); close(loggerpipe[0]); close(loggerpipe[1]); } close(services[i].logfd[0]); close(services[i].logfd[1]); sleep(delay); setsid(); execl(services[i].name, services[i].name, (char *)0); exit(-1); } else if (child > 0) { fcntl(services[i].logfd[0], F_SETFD, FD_CLOEXEC); fcntl(services[i].logfd[1], F_SETFD, FD_CLOEXEC); if (loggerpipe[0] != -1) { fcntl(loggerpipe[0], F_SETFD, FD_CLOEXEC); fcntl(loggerpipe[1], F_SETFD, FD_CLOEXEC); } services[i].pid = services[i].display_pid = child; services[i].start = now; } else { abort(); } } int charsig(char c) { switch(c) { case 'p': return SIGSTOP; case 'c': return SIGCONT; case 'h': return SIGHUP; case 'a': return SIGALRM; case 'i': return SIGINT; case 'q': return SIGQUIT; case '1': return SIGUSR1; case '2': return SIGUSR2; case 't': return SIGTERM; case 'k': return SIGKILL; } return 0; } void * socket_loop(void* ignored) { (void)ignored; const char *path = "/tmp/rvnit.sock"; struct sockaddr_un addr = { 0 }; addr.sun_family = AF_UNIX; strncpy(addr.sun_path, path, sizeof addr.sun_path - 1); int listenfd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); if (listenfd < 0) { perror("socket"); exit(111); } unlink(path); int r = bind(listenfd, (struct sockaddr *)&addr, sizeof addr); if (r < 0) { perror("bind"); exit(111); } r = listen(listenfd, SOMAXCONN); if (r < 0) { perror("listen"); exit(111); } while (1) { int connfd = accept4(listenfd, 0, 0, SOCK_CLOEXEC); if (connfd < 0) continue; struct pollfd fds[1]; fds[0].fd = connfd; fds[0].events = POLLIN; int n = poll(fds, 1, 7000); if (n != 1) { close(connfd); continue; } char buf[63]; int rd = read(connfd, buf, sizeof buf); if (rd <= 0) { close(connfd); continue; } write(connfd, "ok\n", 3); char cmd = buf[0]; if (cmd == 'l') { dprintf(connfd, "level=%d\n", level); } if (cmd == 's') { time_t now = time(0); for (int i = 0; i < MAX_SV; i++) { if (services[i].name[2] == 'D' || services[i].name[2] == 'L') dprintf(connfd, "%-25s pid=%d state=%s uptime=%ld status=%d\n", services[i].name, services[i].pid, services[i].state == UP ? "UP" : "DOWN", services[i].pid > 0 ? (long)(now - services[i].start) : 0, services[i].status); } } if ((cmd == 'd' || cmd == 'u' || charsig(cmd)) && rd > 1) { buf[rd] = 0; printf("got %c|%s|\n", cmd, buf+1); for (int i = 0; i < MAX_SV; i++) { if (strcmp(services[i].name, buf+1) == 0) { if (cmd == 'd') { printf("setting %s down\n", services[i].name); services[i].state = DOWN; want_rescan = 1; pthread_kill(main_thread, SIGCONT); } else if (cmd == 'u') { printf("setting %s up\n", services[i].name); services[i].state = UP; want_rescan = 1; pthread_kill(main_thread, SIGCONT); } else if (charsig(cmd) && services[i].pid > 0) { printf("sending signal %d to %s\n", charsig(cmd), services[i].name); kill(services[i].pid, charsig(cmd)); } } } } if (cmd == 'X') { want_shutdown = 1; pthread_kill(main_thread, SIGCONT); } if (cmd == 'R') { want_shutdown = 1; want_reboot = 1; pthread_kill(main_thread, SIGCONT); } close(connfd); } return 0; } void * logger_loop(void* ignored) { (void)ignored; while (1) { struct pollfd fds[MAX_SV]; nfds_t nfds = 0; fds[nfds].fd = newlogfd[0]; fds[nfds].events = POLLIN; nfds++; fds[nfds].fd = selflogfd[0]; fds[nfds].events = POLLIN; nfds++; for (int i = 0; i < MAX_SV; i++) { if (!services[i].logged && services[i].logfd[0] > 0) { fds[nfds].fd = services[i].logfd[0]; fds[nfds].events = POLLIN; nfds++; } } int n = poll(fds, nfds, -1); if (n < 0) { perror("poll"); sleep(1); continue; } if (fds[0].revents & POLLIN) { /* data on newlogfd, restart */ char c; read(fds[0].fd, &c, 1); continue; } if (n == 1 && (fds[1].revents & POLLHUP) && !(fds[1].revents & POLLIN)) { /* selflog was closed, end thread */ break; } for (nfds_t j = 0; j < nfds; j++) { // XXX HUP, NVAL if (!(fds[j].revents & POLLIN)) continue; struct timespec now; clock_gettime(CLOCK_REALTIME, &now); char timestamp[32]; strftime(timestamp, sizeof timestamp, "%Y-%m-%dT%H:%M:%S", gmtime(&now.tv_sec)); char buf[4096]; ssize_t rd = read(fds[j].fd, buf, sizeof buf); if (rd < 0) { perror("read"); continue; } char *s = buf; char *e = buf + rd; while (s < e) { char *eol = memchr(s, '\n', e - s); if (!eol) eol = e; const char *sv = ""; long pid = -1; if (fds[j].fd == selflogfd[0]) { sv = "rvnit"; pid = getpid(); } else { for (int i = 0; i < MAX_SV; i++) { if (services[i].logfd[0] == fds[j].fd) { sv = services[i].name; pid = services[i].display_pid; break; } } } printf("%s.%05ld %s[%ld]: %.*s\n", timestamp, now.tv_nsec / 10000, sv, pid, (int)(eol - s), s); s = eol + 1; } } } return 0; } int reap(pid_t pid, int status) { for (int i = 0; i < MAX_SV; i++) { if (services[i].pid != pid) continue; services[i].pid = 0; services[i].status = status; LOG("reaped %s[%d] with status %d during level=%d", services[i].name, pid, status, level); return i; } return -1; } pid_t timedwait(int *wstatus, int secs) { struct timespec timeout = {secs, 0}; sigset_t childset; sigemptyset(&childset); sigaddset(&childset, SIGCHLD); /* we block SIGCHLD here, so that we do not lose the signal possibly sent between waitpid(-1) and sigtimedwait */ sigprocmask(SIG_BLOCK, &childset, 0); pid_t pid; while (1) { pid = waitpid(-1, wstatus, WNOHANG); if (pid == 0) { // nothing to reap if (sigtimedwait(&childset, 0, &timeout) == SIGCHLD) continue; if (errno == EAGAIN) // hit timeout, return pid = 0 break; } else if (pid < 0) { if (errno == ECHILD) break; if (errno == EINTR) continue; } else { break; // pid > 0 } } sigprocmask(SIG_UNBLOCK, &childset, 0); return pid; } int svcmp(void const *a, void const *b) { const struct entry *sv_a = a; const struct entry *sv_b = b; return strcmp(sv_a->name, sv_b->name); } /* -1: no error handler, 0: error handler successful, 1: error handler failed */ int on_error() { int e = -1; for (int l = level; l >= 0; l--) { for (int i = 0; i < MAX_SV; i++) { if (services[i].level != l) continue; if (services[i].name[2] == 'E') { e = i; goto found; } } } // no error handler, keep going return -1; found: restart(e); int status = 0; pid_t pid = waitpid(services[e].pid, &status, 0); reap(pid, status); if (WEXITSTATUS(status) == 111) return -1; // keep going return status != 0; } int main() { pipe(selflogfd); fcntl(selflogfd[0], F_SETFL, O_NONBLOCK); fcntl(selflogfd[1], F_SETFL, O_NONBLOCK); fcntl(selflogfd[0], F_SETFD, FD_CLOEXEC); fcntl(selflogfd[1], F_SETFD, FD_CLOEXEC); pipe(newlogfd); fcntl(newlogfd[0], F_SETFL, O_NONBLOCK); fcntl(newlogfd[1], F_SETFL, O_NONBLOCK); fcntl(newlogfd[0], F_SETFD, FD_CLOEXEC); fcntl(newlogfd[1], F_SETFD, FD_CLOEXEC); main_thread = pthread_self(); pthread_create(&socket_thread, 0, socket_loop, 0); pthread_create(&logger_thread, 0, logger_loop, 0); if (chdir("sv") < 0) { perror("chdir"); return 111; } DIR *dir = opendir("."); if (!dir) return 111; int i = 0; struct dirent *ent; while ((ent = readdir(dir))) { if (!isdigit(ent->d_name[0]) || !isdigit(ent->d_name[1])) continue; if (ent->d_name[strlen(ent->d_name) - 1] == '~') continue; services[i].level = 10 * (ent->d_name[0] - '0') + (ent->d_name[1] - '0'); snprintf(services[i].name, sizeof services[i].name, "%s", ent->d_name); // follow symlinks services[i].state = access(ent->d_name, X_OK) == 0 ? UP : DOWN; services[i].logfd[0] = -1; services[i].logfd[1] = -1; services[i].logged = 0; i++; } closedir(dir); qsort(services, MAX_SV, sizeof services[0], svcmp); LOG("booting"); for (level = 0; level < 100; level++) { /* spawn all of level */ int oneshot = 0; // spawn loggers first for (i = 0; i < MAX_SV; i++) { if (services[i].level != level) continue; if (services[i].state != UP) continue; if (services[i].name[2] == 'L') restart(i); } // spawn oneshots and daemons for (i = 0; i < MAX_SV; i++) { if (services[i].level != level) continue; if (services[i].state != UP) continue; if (services[i].name[2] == 'S') { restart(i); oneshot++; } else if (services[i].name[2] == 'D') { restart(i); } } while (oneshot) { int status = 0; int pid = wait(&status); if (pid < 0 && errno == ECHILD) break; int i = reap(pid, status); if (i < 0) continue; if (services[i].level != level) continue; if (services[i].name[2] == 'S') { if (services[i].status != 0) { LOG("oneshot %s failed, running error services for level=%d", services[i].name, level); int r = on_error(); if (r == 0) { restart(i); continue; } if (r < 0) { LOG("no error handler, going on"); } if (r > 0) { LOG("fatal error, shutting down at level=%d", level); goto fatal; } } oneshot--; } else if (services[i].name[2] == 'D' || services[i].name[2] == 'L') { restart(i); } } } sigaction(SIGINT, &(struct sigaction){.sa_handler=on_sigint}, 0); sigaction(SIGCONT, &(struct sigaction){.sa_handler=on_sigcont}, 0); LOG("system up"); while (1) { if (want_shutdown) break; if (want_rescan) { printf("rescanning state\n"); for (i = 0; i < MAX_SV; i++) { if (services[i].name[2] == 'D' || services[i].name[2] == 'L') { if (services[i].state == UP && services[i].pid == 0) restart(i); if (services[i].state == DOWN && services[i].pid > 0) kill(services[i].pid, SIGTERM); } } want_rescan = 0; } int status = 0; errno = 0; int pid = wait(&status); if (pid < 0) { if (errno == ECHILD) break; if (errno == EINTR) continue; } int i = reap(pid, status); if (i < 0) continue; if (services[i].name[2] == 'D' || services[i].name[2] == 'L') { LOG("%s terminated with status %d", services[i].name, services[i].status); if (services[i].state == UP) { LOG("restarting %s", services[i].name); restart(i); } } } LOG("shutting down"); for (level = 99; level >= 0; level--) { fatal: ; // arrives with level < 99 /* kill all of level */ int oneshot = 0; int daemons = 0; int loggers = 0; for (i = 0; i < MAX_SV; i++) { if (services[i].level != level) continue; if (services[i].name[2] == 'K' && services[i].state == UP) { LOG("starting shutdown oneshot %s", services[i].name); restart(i); oneshot++; } if (services[i].name[2] == 'D' && services[i].pid > 0) { LOG("sending sigterm to %s", services[i].name); kill(services[i].pid, SIGTERM); kill(services[i].pid, SIGCONT); services[i].state = DOWN; daemons++; } if (services[i].name[2] == 'L' && services[i].pid > 0) { loggers++; services[i].state = DOWN; } } while (oneshot) { int status = 0; int pid = wait(&status); if (pid < 0 && errno == ECHILD) break; int i = reap(pid, status); if (i < 0) continue; if (services[i].level != level) continue; if (services[i].name[2] == 'K') { LOG("oneshot %s exited with status %d", services[i].name, services[i].status); oneshot--; } else if (services[i].name[2] == 'D') { LOG("daemon %s exited with status %d", services[i].name, services[i].status); if (services[i].logged) close(services[i].logfd[1]); daemons--; } else if (services[i].name[2] == 'L') { LOG("logger %s exited with status %d", services[i].name, services[i].status); loggers--; } } // only daemons are left, wait up to 7s before sending // them SIGKILL. int slayed = 0; while (daemons + loggers) { LOG("level=%d waiting for %d+%d", level, daemons, loggers); int status = 0; int pid = timedwait(&status, 7); if (pid == 0) { // hit timeout if (slayed) break; LOG("slaying level=%d", level); for (i = 0; i < MAX_SV; i++) { if (services[i].level != level) continue; if (services[i].pid > 0) { kill(services[i].pid, SIGKILL); kill(services[i].pid, SIGCONT); } } slayed = 1; continue; } if (pid < 0 && errno == ECHILD) break; int i = reap(pid, status); if (i < 0) continue; if (services[i].level != level) continue; if (services[i].name[2] == 'K') { // can't happen LOG("oneshot %s exited with status %d", services[i].name, services[i].status); oneshot--; } else if (services[i].name[2] == 'D') { LOG("daemon %s exited with status %d", services[i].name, services[i].status); if (services[i].logged) close(services[i].logfd[1]); daemons--; } else if (services[i].name[2] == 'L') { LOG("logger %s exited with status %d", services[i].name, services[i].status); loggers--; } } if (daemons) { LOG("slaying unsuccessful, %d daemons left in level=%d, continuing...", daemons, level); } } if (want_reboot) dprintf(selflogfd[1], "reboot\n"); else dprintf(selflogfd[1], "shutdown\n"); close(selflogfd[1]); pthread_join(logger_thread, 0); }