From cacff1be886ed975bbef1b17151b25c633711256 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 23 Mar 2009 01:05:54 +0000 Subject: [PATCH] * No longer block while waiting for a lock on a store path. Instead poll for it (i.e. if we can't acquire the lock, then let the main select() loop wait for at most a few seconds and then try again). This improves parallelism: if two nix-store processes are both trying to build a path at the same time, the second one shouldn't block; it should first see if it can build other goals. Also, it prevents the deadlocks that have been occuring in Hydra lately, where a process waits for a lock held by another process that's waiting for a lock held by the first. The downside is that polling isn't really elegant, but POSIX doesn't provide a way to wait for locks in a select() loop. The only solution would be to spawn a thread for each lock to do a blocking fcntl() and then signal the main thread, but that would require pthreads. --- src/libstore/build.cc | 87 ++++++++++++++++++++++++++++++--------- src/libstore/globals.cc | 2 +- src/libstore/globals.hh | 2 +- src/libstore/pathlocks.cc | 17 ++++++-- src/libstore/pathlocks.hh | 5 ++- 5 files changed, 85 insertions(+), 28 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 103289b43..e827431eb 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -200,6 +201,12 @@ private: /* Goals waiting for busy paths to be unlocked. */ WeakGoals waitingForAnyGoal; + /* Goals sleeping for a few seconds (polling a lock). */ + WeakGoals waitingForAWhile; + + /* Last time the goals in `waitingForAWhile' where woken up. */ + time_t lastWokenUp; + public: LocalStore & store; @@ -246,6 +253,12 @@ public: wait for some resource that some other goal is holding. */ void waitForAnyGoal(GoalPtr goal); + /* Wait for a few seconds and then retry this goal. Used when + waiting for a lock held by another process. This kind of + polling is inefficient, but POSIX doesn't really provide a way + to wait for multiple locks in the main select() loop. */ + void waitForAWhile(GoalPtr goal); + /* Loop until the specified top-level goals have finished. */ void run(const Goals & topGoals); @@ -952,10 +965,14 @@ void DerivationGoal::tryToBuild() } /* Obtain locks on all output paths. The locks are automatically - released when we exit this function or Nix crashes. */ - /* !!! nonblock */ - outputLocks.lockPaths(outputPaths(drv.outputs), - (format("waiting for lock on %1%") % showPaths(outputPaths(drv.outputs))).str()); + released when we exit this function or Nix crashes. If we + can't acquire the lock, then continue; hopefully some other + goal can start a build, and if not, the main loop will sleep a + few seconds and then retry this goal. */ + if (!outputLocks.lockPaths(outputPaths(drv.outputs), "", false)) { + worker.waitForAWhile(shared_from_this()); + return; + } /* Now check again whether the outputs are valid. This is because another process may have started building in parallel. After @@ -2205,8 +2222,10 @@ void SubstitutionGoal::tryToRun() /* Acquire a lock on the output path. */ outputLock = boost::shared_ptr(new PathLocks); - outputLock->lockPaths(singleton(storePath), - (format("waiting for lock on `%1%'") % storePath).str()); + if (!outputLock->lockPaths(singleton(storePath), "", false)) { + worker.waitForAWhile(shared_from_this()); + return; + } /* Check again whether the path is invalid. */ if (worker.store.isValidPath(storePath)) { @@ -2372,6 +2391,7 @@ Worker::Worker(LocalStore & store) if (working) abort(); working = true; nrChildren = 0; + lastWokenUp = 0; } @@ -2440,9 +2460,7 @@ void Worker::removeGoal(GoalPtr goal) } /* Wake up goals waiting for any goal to finish. */ - for (WeakGoals::iterator i = waitingForAnyGoal.begin(); - i != waitingForAnyGoal.end(); ++i) - { + foreach (WeakGoals::iterator, i, waitingForAnyGoal) { GoalPtr goal = i->lock(); if (goal) wakeUp(goal); } @@ -2539,6 +2557,13 @@ void Worker::waitForAnyGoal(GoalPtr goal) } +void Worker::waitForAWhile(GoalPtr goal) +{ + debug("wait for a while"); + waitingForAWhile.insert(goal); +} + + void Worker::run(const Goals & _topGoals) { for (Goals::iterator i = _topGoals.begin(); @@ -2566,10 +2591,9 @@ void Worker::run(const Goals & _topGoals) if (topGoals.empty()) break; /* Wait for input. */ - if (!children.empty()) + if (!children.empty() || !waitingForAWhile.empty()) waitForInput(); else - /* !!! not when we're polling */ assert(!awake.empty()); } @@ -2592,22 +2616,36 @@ void Worker::waitForInput() the logger pipe of a build, we assume that the builder has terminated. */ + bool useTimeout = false; + struct timeval timeout; + timeout.tv_usec = 0; + time_t before = time(0); + /* If we're monitoring for silence on stdout/stderr, sleep until the first deadline for any child. */ - struct timeval timeout; if (maxSilentTime != 0) { time_t oldest = 0; foreach (Children::iterator, i, children) { oldest = oldest == 0 || i->second.lastOutput < oldest ? i->second.lastOutput : oldest; } - time_t now = time(0); - timeout.tv_sec = (time_t) (oldest + maxSilentTime) <= now ? 0 : - oldest + maxSilentTime - now; - timeout.tv_usec = 0; + useTimeout = true; + timeout.tv_sec = std::max((time_t) 0, oldest + maxSilentTime - before); printMsg(lvlVomit, format("sleeping %1% seconds") % timeout.tv_sec); } + /* If we are polling goals that are waiting for a lock, then wake + up after a few seconds at most. */ + int wakeUpInterval = 3; + + if (!waitingForAWhile.empty()) { + useTimeout = true; + if (lastWokenUp == 0 && children.empty()) + printMsg(lvlError, "waiting for locks..."); + if (lastWokenUp == 0 || lastWokenUp > before) lastWokenUp = before; + timeout.tv_sec = std::max((time_t) 0, lastWokenUp + wakeUpInterval - before); + } else lastWokenUp = 0; + /* Use select() to wait for the input side of any logger pipe to become `available'. Note that `available' (i.e., non-blocking) includes EOF. */ @@ -2621,12 +2659,12 @@ void Worker::waitForInput() } } - if (select(fdMax, &fds, 0, 0, maxSilentTime != 0 ? &timeout : 0) == -1) { + if (select(fdMax, &fds, 0, 0, useTimeout ? &timeout : 0) == -1) { if (errno == EINTR) return; throw SysError("waiting for input"); } - time_t now = time(0); + time_t after = time(0); /* Process all available file descriptors. */ @@ -2662,13 +2700,13 @@ void Worker::waitForInput() % goal->getName() % rd); string data((char *) buffer, rd); goal->handleChildOutput(*k, data); - j->second.lastOutput = now; + j->second.lastOutput = after; } } } if (maxSilentTime != 0 && - now - j->second.lastOutput >= (time_t) maxSilentTime) + after - j->second.lastOutput >= (time_t) maxSilentTime) { printMsg(lvlError, format("%1% timed out after %2% seconds of silence") @@ -2676,6 +2714,15 @@ void Worker::waitForInput() goal->cancel(); } } + + if (!waitingForAWhile.empty() && lastWokenUp + wakeUpInterval >= after) { + lastWokenUp = after; + foreach (WeakGoals::iterator, i, waitingForAWhile) { + GoalPtr goal = i->lock(); + if (goal) wakeUp(goal); + } + waitingForAWhile.clear(); + } } diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc index 907627b65..cc0e44e8e 100644 --- a/src/libstore/globals.cc +++ b/src/libstore/globals.cc @@ -24,7 +24,7 @@ Verbosity buildVerbosity = lvlInfo; unsigned int maxBuildJobs = 1; bool readOnlyMode = false; string thisSystem = "unset"; -unsigned int maxSilentTime = 0; +time_t maxSilentTime = 0; Paths substituters; bool useBuildHook = true; bool printBuildTrace = false; diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 687247cc7..d3388e309 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -65,7 +65,7 @@ extern string thisSystem; /* The maximum time in seconds that a builer can go without producing any output on stdout/stderr before it is killed. 0 means infinity. */ -extern unsigned int maxSilentTime; +extern time_t maxSilentTime; /* The substituters. There are programs that can somehow realise a store path without building, e.g., by downloading it or copying it diff --git a/src/libstore/pathlocks.cc b/src/libstore/pathlocks.cc index f8753bcb6..cad893726 100644 --- a/src/libstore/pathlocks.cc +++ b/src/libstore/pathlocks.cc @@ -141,9 +141,9 @@ PathLocks::PathLocks(const PathSet & paths, const string & waitMsg) } -void PathLocks::lockPaths(const PathSet & _paths, const string & waitMsg) +bool PathLocks::lockPaths(const PathSet & _paths, + const string & waitMsg, bool wait) { - /* May be called only once! */ assert(fds.empty()); /* Note that `fds' is built incrementally so that the destructor @@ -174,8 +174,15 @@ void PathLocks::lockPaths(const PathSet & _paths, const string & waitMsg) /* Acquire an exclusive lock. */ if (!lockFile(fd, ltWrite, false)) { - if (waitMsg != "") printMsg(lvlError, waitMsg); - lockFile(fd, ltWrite, true); + if (wait) { + if (waitMsg != "") printMsg(lvlError, waitMsg); + lockFile(fd, ltWrite, true); + } else { + /* Failed to lock this path; release all other + locks. */ + unlock(); + return false; + } } debug(format("lock acquired on `%1%'") % lockPath); @@ -199,6 +206,8 @@ void PathLocks::lockPaths(const PathSet & _paths, const string & waitMsg) fds.push_back(FDPair(fd.borrow(), lockPath)); lockedPaths.insert(lockPath); } + + return true; } diff --git a/src/libstore/pathlocks.hh b/src/libstore/pathlocks.hh index 9898b497b..64d62f6ae 100644 --- a/src/libstore/pathlocks.hh +++ b/src/libstore/pathlocks.hh @@ -33,8 +33,9 @@ public: PathLocks(); PathLocks(const PathSet & paths, const string & waitMsg = ""); - void lockPaths(const PathSet & _paths, - const string & waitMsg = ""); + bool lockPaths(const PathSet & _paths, + const string & waitMsg = "", + bool wait = true); ~PathLocks(); void unlock(); void setDeletion(bool deletePaths);