From f859a8d3c33cc275f41d983bfeff2a21a9f88f1b Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 3 Feb 2010 21:22:57 +0000 Subject: [PATCH] * While waiting for a lock, print a sign of life every 5 minutes. This prevents remote builders from being killed by the `max-silent-time' inactivity monitor while they are waiting for a long garbage collection to finish. This happens fairly often in the Hydra build farm. --- src/libmain/shared.cc | 13 +++++++++++++ src/libstore/pathlocks.cc | 14 ++++++++++++-- src/libstore/pathlocks.hh | 3 ++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/libmain/shared.cc b/src/libmain/shared.cc index d9cf9a862..d48e2ad69 100644 --- a/src/libmain/shared.cc +++ b/src/libmain/shared.cc @@ -31,6 +31,11 @@ static void sigintHandler(int signo) } +static void sigalrmHandler(int signo) +{ +} + + Path makeRootName(const Path & gcRoot, int & counter) { counter++; @@ -160,6 +165,14 @@ static void initAndRun(int argc, char * * argv) if (sigaction(SIGPIPE, &act, 0)) throw SysError("ignoring SIGPIPE"); + /* Catch SIGALRM with an empty handler (we just need it to get an + EINTR from blocking system calls). */ + act.sa_handler = sigalrmHandler; + sigfillset(&act.sa_mask); + act.sa_flags = 0; + if (sigaction(SIGALRM, &act, 0)) + throw SysError("installing handler for SIGALRM"); + /* Reset SIGCHLD to its default. */ act.sa_handler = SIG_DFL; act.sa_flags = 0; diff --git a/src/libstore/pathlocks.cc b/src/libstore/pathlocks.cc index d8290815c..fe872ceed 100644 --- a/src/libstore/pathlocks.cc +++ b/src/libstore/pathlocks.cc @@ -37,7 +37,8 @@ void deleteLockFile(const Path & path, int fd) } -bool lockFile(int fd, LockType lockType, bool wait) +bool lockFile(int fd, LockType lockType, bool wait, + unsigned int progressInterval) { struct flock lock; if (lockType == ltRead) lock.l_type = F_RDLCK; @@ -49,11 +50,20 @@ bool lockFile(int fd, LockType lockType, bool wait) lock.l_len = 0; /* entire file */ if (wait) { - while (fcntl(fd, F_SETLKW, &lock) != 0) { + /* Wait until we acquire the lock. If `progressInterval' is + non-zero, when print a message every `progressInterval' + seconds. This is mostly to make sure that remote builders + aren't killed due to the `max-silent-time' inactivity + monitor while waiting for the garbage collector lock. */ + while (1) { + if (progressInterval) alarm(progressInterval); + if (fcntl(fd, F_SETLKW, &lock) == 0) break; checkInterrupt(); if (errno != EINTR) throw SysError(format("acquiring/releasing lock")); + if (progressInterval) printMsg(lvlError, "still waiting for lock..."); } + alarm(0); } else { while (fcntl(fd, F_SETLK, &lock) != 0) { checkInterrupt(); diff --git a/src/libstore/pathlocks.hh b/src/libstore/pathlocks.hh index 57ca1584a..8c6ac6a03 100644 --- a/src/libstore/pathlocks.hh +++ b/src/libstore/pathlocks.hh @@ -17,7 +17,8 @@ void deleteLockFile(const Path & path, int fd); enum LockType { ltRead, ltWrite, ltNone }; -bool lockFile(int fd, LockType lockType, bool wait); +bool lockFile(int fd, LockType lockType, bool wait, + unsigned int progressInterval = 300); class PathLocks