forked from lix-project/lix
* No longer block while waiting for a lock on a store path. Instead
poll for it (i.e. if we can't acquire the lock, then let the main select() loop wait for at most a few seconds and then try again). This improves parallelism: if two nix-store processes are both trying to build a path at the same time, the second one shouldn't block; it should first see if it can build other goals. Also, it prevents the deadlocks that have been occuring in Hydra lately, where a process waits for a lock held by another process that's waiting for a lock held by the first. The downside is that polling isn't really elegant, but POSIX doesn't provide a way to wait for locks in a select() loop. The only solution would be to spawn a thread for each lock to do a blocking fcntl() and then signal the main thread, but that would require pthreads.
This commit is contained in:
parent
58969fa2bf
commit
cacff1be88
5 changed files with 85 additions and 28 deletions
|
@ -8,6 +8,7 @@
|
|||
#include <map>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/weak_ptr.hpp>
|
||||
#include <boost/enable_shared_from_this.hpp>
|
||||
|
@ -200,6 +201,12 @@ private:
|
|||
/* Goals waiting for busy paths to be unlocked. */
|
||||
WeakGoals waitingForAnyGoal;
|
||||
|
||||
/* Goals sleeping for a few seconds (polling a lock). */
|
||||
WeakGoals waitingForAWhile;
|
||||
|
||||
/* Last time the goals in `waitingForAWhile' where woken up. */
|
||||
time_t lastWokenUp;
|
||||
|
||||
public:
|
||||
|
||||
LocalStore & store;
|
||||
|
@ -246,6 +253,12 @@ public:
|
|||
wait for some resource that some other goal is holding. */
|
||||
void waitForAnyGoal(GoalPtr goal);
|
||||
|
||||
/* Wait for a few seconds and then retry this goal. Used when
|
||||
waiting for a lock held by another process. This kind of
|
||||
polling is inefficient, but POSIX doesn't really provide a way
|
||||
to wait for multiple locks in the main select() loop. */
|
||||
void waitForAWhile(GoalPtr goal);
|
||||
|
||||
/* Loop until the specified top-level goals have finished. */
|
||||
void run(const Goals & topGoals);
|
||||
|
||||
|
@ -952,10 +965,14 @@ void DerivationGoal::tryToBuild()
|
|||
}
|
||||
|
||||
/* Obtain locks on all output paths. The locks are automatically
|
||||
released when we exit this function or Nix crashes. */
|
||||
/* !!! nonblock */
|
||||
outputLocks.lockPaths(outputPaths(drv.outputs),
|
||||
(format("waiting for lock on %1%") % showPaths(outputPaths(drv.outputs))).str());
|
||||
released when we exit this function or Nix crashes. If we
|
||||
can't acquire the lock, then continue; hopefully some other
|
||||
goal can start a build, and if not, the main loop will sleep a
|
||||
few seconds and then retry this goal. */
|
||||
if (!outputLocks.lockPaths(outputPaths(drv.outputs), "", false)) {
|
||||
worker.waitForAWhile(shared_from_this());
|
||||
return;
|
||||
}
|
||||
|
||||
/* Now check again whether the outputs are valid. This is because
|
||||
another process may have started building in parallel. After
|
||||
|
@ -2205,8 +2222,10 @@ void SubstitutionGoal::tryToRun()
|
|||
|
||||
/* Acquire a lock on the output path. */
|
||||
outputLock = boost::shared_ptr<PathLocks>(new PathLocks);
|
||||
outputLock->lockPaths(singleton<PathSet>(storePath),
|
||||
(format("waiting for lock on `%1%'") % storePath).str());
|
||||
if (!outputLock->lockPaths(singleton<PathSet>(storePath), "", false)) {
|
||||
worker.waitForAWhile(shared_from_this());
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check again whether the path is invalid. */
|
||||
if (worker.store.isValidPath(storePath)) {
|
||||
|
@ -2372,6 +2391,7 @@ Worker::Worker(LocalStore & store)
|
|||
if (working) abort();
|
||||
working = true;
|
||||
nrChildren = 0;
|
||||
lastWokenUp = 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2440,9 +2460,7 @@ void Worker::removeGoal(GoalPtr goal)
|
|||
}
|
||||
|
||||
/* Wake up goals waiting for any goal to finish. */
|
||||
for (WeakGoals::iterator i = waitingForAnyGoal.begin();
|
||||
i != waitingForAnyGoal.end(); ++i)
|
||||
{
|
||||
foreach (WeakGoals::iterator, i, waitingForAnyGoal) {
|
||||
GoalPtr goal = i->lock();
|
||||
if (goal) wakeUp(goal);
|
||||
}
|
||||
|
@ -2539,6 +2557,13 @@ void Worker::waitForAnyGoal(GoalPtr goal)
|
|||
}
|
||||
|
||||
|
||||
void Worker::waitForAWhile(GoalPtr goal)
|
||||
{
|
||||
debug("wait for a while");
|
||||
waitingForAWhile.insert(goal);
|
||||
}
|
||||
|
||||
|
||||
void Worker::run(const Goals & _topGoals)
|
||||
{
|
||||
for (Goals::iterator i = _topGoals.begin();
|
||||
|
@ -2566,10 +2591,9 @@ void Worker::run(const Goals & _topGoals)
|
|||
if (topGoals.empty()) break;
|
||||
|
||||
/* Wait for input. */
|
||||
if (!children.empty())
|
||||
if (!children.empty() || !waitingForAWhile.empty())
|
||||
waitForInput();
|
||||
else
|
||||
/* !!! not when we're polling */
|
||||
assert(!awake.empty());
|
||||
}
|
||||
|
||||
|
@ -2592,22 +2616,36 @@ void Worker::waitForInput()
|
|||
the logger pipe of a build, we assume that the builder has
|
||||
terminated. */
|
||||
|
||||
bool useTimeout = false;
|
||||
struct timeval timeout;
|
||||
timeout.tv_usec = 0;
|
||||
time_t before = time(0);
|
||||
|
||||
/* If we're monitoring for silence on stdout/stderr, sleep until
|
||||
the first deadline for any child. */
|
||||
struct timeval timeout;
|
||||
if (maxSilentTime != 0) {
|
||||
time_t oldest = 0;
|
||||
foreach (Children::iterator, i, children) {
|
||||
oldest = oldest == 0 || i->second.lastOutput < oldest
|
||||
? i->second.lastOutput : oldest;
|
||||
}
|
||||
time_t now = time(0);
|
||||
timeout.tv_sec = (time_t) (oldest + maxSilentTime) <= now ? 0 :
|
||||
oldest + maxSilentTime - now;
|
||||
timeout.tv_usec = 0;
|
||||
useTimeout = true;
|
||||
timeout.tv_sec = std::max((time_t) 0, oldest + maxSilentTime - before);
|
||||
printMsg(lvlVomit, format("sleeping %1% seconds") % timeout.tv_sec);
|
||||
}
|
||||
|
||||
/* If we are polling goals that are waiting for a lock, then wake
|
||||
up after a few seconds at most. */
|
||||
int wakeUpInterval = 3;
|
||||
|
||||
if (!waitingForAWhile.empty()) {
|
||||
useTimeout = true;
|
||||
if (lastWokenUp == 0 && children.empty())
|
||||
printMsg(lvlError, "waiting for locks...");
|
||||
if (lastWokenUp == 0 || lastWokenUp > before) lastWokenUp = before;
|
||||
timeout.tv_sec = std::max((time_t) 0, lastWokenUp + wakeUpInterval - before);
|
||||
} else lastWokenUp = 0;
|
||||
|
||||
/* Use select() to wait for the input side of any logger pipe to
|
||||
become `available'. Note that `available' (i.e., non-blocking)
|
||||
includes EOF. */
|
||||
|
@ -2621,12 +2659,12 @@ void Worker::waitForInput()
|
|||
}
|
||||
}
|
||||
|
||||
if (select(fdMax, &fds, 0, 0, maxSilentTime != 0 ? &timeout : 0) == -1) {
|
||||
if (select(fdMax, &fds, 0, 0, useTimeout ? &timeout : 0) == -1) {
|
||||
if (errno == EINTR) return;
|
||||
throw SysError("waiting for input");
|
||||
}
|
||||
|
||||
time_t now = time(0);
|
||||
time_t after = time(0);
|
||||
|
||||
/* Process all available file descriptors. */
|
||||
|
||||
|
@ -2662,13 +2700,13 @@ void Worker::waitForInput()
|
|||
% goal->getName() % rd);
|
||||
string data((char *) buffer, rd);
|
||||
goal->handleChildOutput(*k, data);
|
||||
j->second.lastOutput = now;
|
||||
j->second.lastOutput = after;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (maxSilentTime != 0 &&
|
||||
now - j->second.lastOutput >= (time_t) maxSilentTime)
|
||||
after - j->second.lastOutput >= (time_t) maxSilentTime)
|
||||
{
|
||||
printMsg(lvlError,
|
||||
format("%1% timed out after %2% seconds of silence")
|
||||
|
@ -2676,6 +2714,15 @@ void Worker::waitForInput()
|
|||
goal->cancel();
|
||||
}
|
||||
}
|
||||
|
||||
if (!waitingForAWhile.empty() && lastWokenUp + wakeUpInterval >= after) {
|
||||
lastWokenUp = after;
|
||||
foreach (WeakGoals::iterator, i, waitingForAWhile) {
|
||||
GoalPtr goal = i->lock();
|
||||
if (goal) wakeUp(goal);
|
||||
}
|
||||
waitingForAWhile.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ Verbosity buildVerbosity = lvlInfo;
|
|||
unsigned int maxBuildJobs = 1;
|
||||
bool readOnlyMode = false;
|
||||
string thisSystem = "unset";
|
||||
unsigned int maxSilentTime = 0;
|
||||
time_t maxSilentTime = 0;
|
||||
Paths substituters;
|
||||
bool useBuildHook = true;
|
||||
bool printBuildTrace = false;
|
||||
|
|
|
@ -65,7 +65,7 @@ extern string thisSystem;
|
|||
/* The maximum time in seconds that a builer can go without producing
|
||||
any output on stdout/stderr before it is killed. 0 means
|
||||
infinity. */
|
||||
extern unsigned int maxSilentTime;
|
||||
extern time_t maxSilentTime;
|
||||
|
||||
/* The substituters. There are programs that can somehow realise a
|
||||
store path without building, e.g., by downloading it or copying it
|
||||
|
|
|
@ -141,9 +141,9 @@ PathLocks::PathLocks(const PathSet & paths, const string & waitMsg)
|
|||
}
|
||||
|
||||
|
||||
void PathLocks::lockPaths(const PathSet & _paths, const string & waitMsg)
|
||||
bool PathLocks::lockPaths(const PathSet & _paths,
|
||||
const string & waitMsg, bool wait)
|
||||
{
|
||||
/* May be called only once! */
|
||||
assert(fds.empty());
|
||||
|
||||
/* Note that `fds' is built incrementally so that the destructor
|
||||
|
@ -174,8 +174,15 @@ void PathLocks::lockPaths(const PathSet & _paths, const string & waitMsg)
|
|||
|
||||
/* Acquire an exclusive lock. */
|
||||
if (!lockFile(fd, ltWrite, false)) {
|
||||
if (waitMsg != "") printMsg(lvlError, waitMsg);
|
||||
lockFile(fd, ltWrite, true);
|
||||
if (wait) {
|
||||
if (waitMsg != "") printMsg(lvlError, waitMsg);
|
||||
lockFile(fd, ltWrite, true);
|
||||
} else {
|
||||
/* Failed to lock this path; release all other
|
||||
locks. */
|
||||
unlock();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
debug(format("lock acquired on `%1%'") % lockPath);
|
||||
|
@ -199,6 +206,8 @@ void PathLocks::lockPaths(const PathSet & _paths, const string & waitMsg)
|
|||
fds.push_back(FDPair(fd.borrow(), lockPath));
|
||||
lockedPaths.insert(lockPath);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -33,8 +33,9 @@ public:
|
|||
PathLocks();
|
||||
PathLocks(const PathSet & paths,
|
||||
const string & waitMsg = "");
|
||||
void lockPaths(const PathSet & _paths,
|
||||
const string & waitMsg = "");
|
||||
bool lockPaths(const PathSet & _paths,
|
||||
const string & waitMsg = "",
|
||||
bool wait = true);
|
||||
~PathLocks();
|
||||
void unlock();
|
||||
void setDeletion(bool deletePaths);
|
||||
|
|
Loading…
Reference in a new issue