diff options
author | Leah Neukirchen <leah@vuxu.org> | 2021-02-27 20:20:55 +0100 |
---|---|---|
committer | Leah Neukirchen <leah@vuxu.org> | 2021-03-13 16:27:55 +0100 |
commit | 9942e645b33c7d8772f7419e4b3fc8f50aa7eda8 (patch) | |
tree | 7414645645c9e5d0a7786aa137ebdaaf6ae53ca5 | |
parent | 6c81764641c7fe4c6ac190a12b9308f4de54cb4e (diff) | |
download | nq-9942e645b33c7d8772f7419e4b3fc8f50aa7eda8.tar.gz nq-9942e645b33c7d8772f7419e4b3fc8f50aa7eda8.tar.xz nq-9942e645b33c7d8772f7419e4b3fc8f50aa7eda8.zip |
nq: revamp waiting loop to avoid busy loops in contested cases
As reported in #37, the current rewind-loop has the issue of waiting for a lock on the first file that is locked, but it could be detected as being locked by another process that also just wants to check this file. As both processes rewind the dir and retry in this case, this can yield a busy loop. Instead, find the newest locked file and wait for that. This should result in most nq processes waiting for different locks, so this kind of synchonization cannot appear. In theory, removing the rewinddir would be enough, as later nq runs should not result in enqueuing earlier jobs, but it's better to be safe and check there are no more locked files before we launch our job.
-rw-r--r-- | nq.c | 45 |
1 files changed, 27 insertions, 18 deletions
diff --git a/nq.c b/nq.c index 5c23b15..178626b 100644 --- a/nq.c +++ b/nq.c @@ -110,7 +110,7 @@ main(int argc, char *argv[]) int dirfd = 0, lockfd = 0; int opt = 0, cflag = 0, qflag = 0, tflag = 0, wflag = 0; int pipefd[2]; - char lockfile[64]; + char lockfile[64], newestlocked[64]; pid_t child; struct timeval started; struct dirent *ent; @@ -303,31 +303,40 @@ wait: } again: + *newestlocked = 0; + while ((ent = readdir(dir))) { - /* wait for all ,* files. */ + /* wait for all older ,* files than ours. */ - if (ent->d_name[0] == ',' && - strcmp(ent->d_name, lockfile+1) < 0) { - int fd; + if (!(ent->d_name[0] == ',' && + strcmp(ent->d_name, lockfile+1) < 0)) + continue; - fd = openat(dirfd, ent->d_name, O_RDWR); - if (fd < 0) - continue; + int fd = openat(dirfd, ent->d_name, O_RDWR); + if (fd < 0) + continue; - if (flock(fd, LOCK_EX | LOCK_NB) == -1 && - errno == EWOULDBLOCK) { - if (tflag) - exit(1); - flock(fd, LOCK_EX); /* sit it out. */ + if (flock(fd, LOCK_EX | LOCK_NB) == -1 && + errno == EWOULDBLOCK) { + if (tflag) + exit(1); + if (strcmp(ent->d_name, newestlocked) > 0) + strcpy(newestlocked, ent->d_name); + } else { + fchmod(fd, 0600); + } - close(fd); - rewinddir(dir); - goto again; - } + close(fd); + } - fchmod(fd, 0600); + if (*newestlocked) { + int fd = openat(dirfd, newestlocked, O_RDWR); + if (fd >= 0) { + flock(fd, LOCK_EX); /* sit it out. */ close(fd); } + rewinddir(dir); + goto again; } closedir(dir); /* closes dirfd too. */ |