* No longer block while waiting for a lock on a store path. Instead

poll for it (i.e. if we can't acquire the lock, then let the main
  select() loop wait for at most a few seconds and then try again).
  This improves parallelism: if two nix-store processes are both
  trying to build a path at the same time, the second one shouldn't
  block; it should first see if it can build other goals.  Also, it
  prevents the deadlocks that have been occuring in Hydra lately,
  where a process waits for a lock held by another process that's
  waiting for a lock held by the first.

  The downside is that polling isn't really elegant, but POSIX doesn't
  provide a way to wait for locks in a select() loop.  The only
  solution would be to spawn a thread for each lock to do a blocking
  fcntl() and then signal the main thread, but that would require
  pthreads.
This commit is contained in:
Eelco Dolstra 2009-03-23 01:05:54 +00:00
parent 58969fa2bf
commit cacff1be88
5 changed files with 85 additions and 28 deletions

View file

@ -8,6 +8,7 @@
#include <map>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <boost/shared_ptr.hpp>
#include <boost/weak_ptr.hpp>
#include <boost/enable_shared_from_this.hpp>
@ -200,6 +201,12 @@ private:
/* Goals waiting for busy paths to be unlocked. */
WeakGoals waitingForAnyGoal;
/* Goals sleeping for a few seconds (polling a lock). */
WeakGoals waitingForAWhile;
/* Last time the goals in `waitingForAWhile' where woken up. */
time_t lastWokenUp;
public:
LocalStore & store;
@ -246,6 +253,12 @@ public:
wait for some resource that some other goal is holding. */
void waitForAnyGoal(GoalPtr goal);
/* Wait for a few seconds and then retry this goal. Used when
waiting for a lock held by another process. This kind of
polling is inefficient, but POSIX doesn't really provide a way
to wait for multiple locks in the main select() loop. */
void waitForAWhile(GoalPtr goal);
/* Loop until the specified top-level goals have finished. */
void run(const Goals & topGoals);
@ -952,10 +965,14 @@ void DerivationGoal::tryToBuild()
}
/* Obtain locks on all output paths. The locks are automatically
released when we exit this function or Nix crashes. */
/* !!! nonblock */
outputLocks.lockPaths(outputPaths(drv.outputs),
(format("waiting for lock on %1%") % showPaths(outputPaths(drv.outputs))).str());
released when we exit this function or Nix crashes. If we
can't acquire the lock, then continue; hopefully some other
goal can start a build, and if not, the main loop will sleep a
few seconds and then retry this goal. */
if (!outputLocks.lockPaths(outputPaths(drv.outputs), "", false)) {
worker.waitForAWhile(shared_from_this());
return;
}
/* Now check again whether the outputs are valid. This is because
another process may have started building in parallel. After
@ -2205,8 +2222,10 @@ void SubstitutionGoal::tryToRun()
/* Acquire a lock on the output path. */
outputLock = boost::shared_ptr<PathLocks>(new PathLocks);
outputLock->lockPaths(singleton<PathSet>(storePath),
(format("waiting for lock on `%1%'") % storePath).str());
if (!outputLock->lockPaths(singleton<PathSet>(storePath), "", false)) {
worker.waitForAWhile(shared_from_this());
return;
}
/* Check again whether the path is invalid. */
if (worker.store.isValidPath(storePath)) {
@ -2372,6 +2391,7 @@ Worker::Worker(LocalStore & store)
if (working) abort();
working = true;
nrChildren = 0;
lastWokenUp = 0;
}
@ -2440,9 +2460,7 @@ void Worker::removeGoal(GoalPtr goal)
}
/* Wake up goals waiting for any goal to finish. */
for (WeakGoals::iterator i = waitingForAnyGoal.begin();
i != waitingForAnyGoal.end(); ++i)
{
foreach (WeakGoals::iterator, i, waitingForAnyGoal) {
GoalPtr goal = i->lock();
if (goal) wakeUp(goal);
}
@ -2539,6 +2557,13 @@ void Worker::waitForAnyGoal(GoalPtr goal)
}
void Worker::waitForAWhile(GoalPtr goal)
{
debug("wait for a while");
waitingForAWhile.insert(goal);
}
void Worker::run(const Goals & _topGoals)
{
for (Goals::iterator i = _topGoals.begin();
@ -2566,10 +2591,9 @@ void Worker::run(const Goals & _topGoals)
if (topGoals.empty()) break;
/* Wait for input. */
if (!children.empty())
if (!children.empty() || !waitingForAWhile.empty())
waitForInput();
else
/* !!! not when we're polling */
assert(!awake.empty());
}
@ -2592,22 +2616,36 @@ void Worker::waitForInput()
the logger pipe of a build, we assume that the builder has
terminated. */
bool useTimeout = false;
struct timeval timeout;
timeout.tv_usec = 0;
time_t before = time(0);
/* If we're monitoring for silence on stdout/stderr, sleep until
the first deadline for any child. */
struct timeval timeout;
if (maxSilentTime != 0) {
time_t oldest = 0;
foreach (Children::iterator, i, children) {
oldest = oldest == 0 || i->second.lastOutput < oldest
? i->second.lastOutput : oldest;
}
time_t now = time(0);
timeout.tv_sec = (time_t) (oldest + maxSilentTime) <= now ? 0 :
oldest + maxSilentTime - now;
timeout.tv_usec = 0;
useTimeout = true;
timeout.tv_sec = std::max((time_t) 0, oldest + maxSilentTime - before);
printMsg(lvlVomit, format("sleeping %1% seconds") % timeout.tv_sec);
}
/* If we are polling goals that are waiting for a lock, then wake
up after a few seconds at most. */
int wakeUpInterval = 3;
if (!waitingForAWhile.empty()) {
useTimeout = true;
if (lastWokenUp == 0 && children.empty())
printMsg(lvlError, "waiting for locks...");
if (lastWokenUp == 0 || lastWokenUp > before) lastWokenUp = before;
timeout.tv_sec = std::max((time_t) 0, lastWokenUp + wakeUpInterval - before);
} else lastWokenUp = 0;
/* Use select() to wait for the input side of any logger pipe to
become `available'. Note that `available' (i.e., non-blocking)
includes EOF. */
@ -2621,12 +2659,12 @@ void Worker::waitForInput()
}
}
if (select(fdMax, &fds, 0, 0, maxSilentTime != 0 ? &timeout : 0) == -1) {
if (select(fdMax, &fds, 0, 0, useTimeout ? &timeout : 0) == -1) {
if (errno == EINTR) return;
throw SysError("waiting for input");
}
time_t now = time(0);
time_t after = time(0);
/* Process all available file descriptors. */
@ -2662,13 +2700,13 @@ void Worker::waitForInput()
% goal->getName() % rd);
string data((char *) buffer, rd);
goal->handleChildOutput(*k, data);
j->second.lastOutput = now;
j->second.lastOutput = after;
}
}
}
if (maxSilentTime != 0 &&
now - j->second.lastOutput >= (time_t) maxSilentTime)
after - j->second.lastOutput >= (time_t) maxSilentTime)
{
printMsg(lvlError,
format("%1% timed out after %2% seconds of silence")
@ -2676,6 +2714,15 @@ void Worker::waitForInput()
goal->cancel();
}
}
if (!waitingForAWhile.empty() && lastWokenUp + wakeUpInterval >= after) {
lastWokenUp = after;
foreach (WeakGoals::iterator, i, waitingForAWhile) {
GoalPtr goal = i->lock();
if (goal) wakeUp(goal);
}
waitingForAWhile.clear();
}
}

View file

@ -24,7 +24,7 @@ Verbosity buildVerbosity = lvlInfo;
unsigned int maxBuildJobs = 1;
bool readOnlyMode = false;
string thisSystem = "unset";
unsigned int maxSilentTime = 0;
time_t maxSilentTime = 0;
Paths substituters;
bool useBuildHook = true;
bool printBuildTrace = false;

View file

@ -65,7 +65,7 @@ extern string thisSystem;
/* The maximum time in seconds that a builer can go without producing
any output on stdout/stderr before it is killed. 0 means
infinity. */
extern unsigned int maxSilentTime;
extern time_t maxSilentTime;
/* The substituters. There are programs that can somehow realise a
store path without building, e.g., by downloading it or copying it

View file

@ -141,9 +141,9 @@ PathLocks::PathLocks(const PathSet & paths, const string & waitMsg)
}
void PathLocks::lockPaths(const PathSet & _paths, const string & waitMsg)
bool PathLocks::lockPaths(const PathSet & _paths,
const string & waitMsg, bool wait)
{
/* May be called only once! */
assert(fds.empty());
/* Note that `fds' is built incrementally so that the destructor
@ -174,8 +174,15 @@ void PathLocks::lockPaths(const PathSet & _paths, const string & waitMsg)
/* Acquire an exclusive lock. */
if (!lockFile(fd, ltWrite, false)) {
if (waitMsg != "") printMsg(lvlError, waitMsg);
lockFile(fd, ltWrite, true);
if (wait) {
if (waitMsg != "") printMsg(lvlError, waitMsg);
lockFile(fd, ltWrite, true);
} else {
/* Failed to lock this path; release all other
locks. */
unlock();
return false;
}
}
debug(format("lock acquired on `%1%'") % lockPath);
@ -199,6 +206,8 @@ void PathLocks::lockPaths(const PathSet & _paths, const string & waitMsg)
fds.push_back(FDPair(fd.borrow(), lockPath));
lockedPaths.insert(lockPath);
}
return true;
}

View file

@ -33,8 +33,9 @@ public:
PathLocks();
PathLocks(const PathSet & paths,
const string & waitMsg = "");
void lockPaths(const PathSet & _paths,
const string & waitMsg = "");
bool lockPaths(const PathSet & _paths,
const string & waitMsg = "",
bool wait = true);
~PathLocks();
void unlock();
void setDeletion(bool deletePaths);