* Distributed builds and load balancing now seem to work pretty well.
(Though the `build-remote.pl' script has a gigantic race condition).
This commit is contained in:
parent
2fa3304933
commit
4fc00cbec1
2 changed files with 101 additions and 39 deletions
|
@ -80,6 +80,21 @@ if (!defined $machine) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub writeLoad {
|
||||||
|
system "echo A >> /tmp/blaaaa";
|
||||||
|
open LOAD, "> /home/eelco/nix/distributed/current-load" or die;
|
||||||
|
system "echo B >> /tmp/blaaaa";
|
||||||
|
foreach my $cur (keys %machines) {
|
||||||
|
system "echo $cur $curJobs{$cur} >> /tmp/blaaaa";
|
||||||
|
print LOAD "$cur $curJobs{$cur}\n";
|
||||||
|
}
|
||||||
|
system "echo C >> /tmp/blaaaa";
|
||||||
|
close LOAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
$curJobs{$machine} = $curJobs{$machine} + 1;
|
||||||
|
writeLoad;
|
||||||
|
|
||||||
sendReply "accept";
|
sendReply "accept";
|
||||||
open IN, "<&4" or die;
|
open IN, "<&4" or die;
|
||||||
my $x = <IN>;
|
my $x = <IN>;
|
||||||
|
@ -87,30 +102,27 @@ chomp $x;
|
||||||
print "got $x\n";
|
print "got $x\n";
|
||||||
close IN;
|
close IN;
|
||||||
|
|
||||||
|
system "echo $x >> /tmp/blaaaa";
|
||||||
|
system "echo $curJobs{$machine} >> /tmp/blaaaa";
|
||||||
|
|
||||||
|
if ($x ne "okay") {
|
||||||
|
$curJobs{$machine} = $curJobs{$machine} - 1;
|
||||||
|
system "echo $curJobs{$machine} >> /tmp/blaaaa";
|
||||||
|
writeLoad;
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
|
|
||||||
print "BUILDING REMOTE: $storeExpr on $machine\n";
|
print "BUILDING REMOTE: $storeExpr on $machine\n";
|
||||||
|
|
||||||
$curJobs{$machine} = $curJobs{$machine} + 1;
|
|
||||||
|
|
||||||
sub writeLoad {
|
|
||||||
open LOAD, "> /home/eelco/nix/distributed/current-load" or die;
|
|
||||||
foreach my $cur (keys %machines) {
|
|
||||||
print LOAD "$cur $curJobs{$cur}\n";
|
|
||||||
}
|
|
||||||
close LOAD;
|
|
||||||
}
|
|
||||||
|
|
||||||
writeLoad
|
|
||||||
|
|
||||||
|
|
||||||
my $ssh = "ssh -i $sshKeys{$machine} -x";
|
my $ssh = "ssh -i $sshKeys{$machine} -x";
|
||||||
|
|
||||||
my $inputs = `cat inputs`;
|
my $inputs = `cat inputs` or die;
|
||||||
$inputs =~ s/\n/ /g;
|
$inputs =~ s/\n/ /g;
|
||||||
|
|
||||||
my $outputs = `cat outputs`;
|
my $outputs = `cat outputs` or die;
|
||||||
$outputs =~ s/\n/ /g;
|
$outputs =~ s/\n/ /g;
|
||||||
|
|
||||||
my $successors = `cat successors`;
|
my $successors = `cat successors` or die;
|
||||||
$successors =~ s/\n/ /g;
|
$successors =~ s/\n/ /g;
|
||||||
|
|
||||||
system "rsync -a -e '$ssh' $storeExpr $inputs $machine:/nix/store";
|
system "rsync -a -e '$ssh' $storeExpr $inputs $machine:/nix/store";
|
||||||
|
@ -136,4 +148,4 @@ foreach my $output (split '\n', $outputs) {
|
||||||
|
|
||||||
$curJobs{$machine} = $curJobs{$machine} - 1;
|
$curJobs{$machine} = $curJobs{$machine} - 1;
|
||||||
|
|
||||||
writeLoad
|
writeLoad;
|
||||||
|
|
|
@ -91,6 +91,9 @@ struct Goal
|
||||||
|
|
||||||
/* The remainder is state held during the build. */
|
/* The remainder is state held during the build. */
|
||||||
|
|
||||||
|
/* Whether it's being built by a hook or by ourselves. */
|
||||||
|
bool inHook;
|
||||||
|
|
||||||
/* Locks on the output paths. */
|
/* Locks on the output paths. */
|
||||||
PathLocks outputLocks;
|
PathLocks outputLocks;
|
||||||
|
|
||||||
|
@ -201,6 +204,11 @@ private:
|
||||||
of derivation expressions that have yet to be normalised. */
|
of derivation expressions that have yet to be normalised. */
|
||||||
Goals goals;
|
Goals goals;
|
||||||
|
|
||||||
|
/* Finished goals are removed in run() at top-level; they are not
|
||||||
|
deleted as soon as they are finished, since there may be
|
||||||
|
references hanging about. */
|
||||||
|
PathSet deadGoals;
|
||||||
|
|
||||||
/* The set of `buildable' goals, which are the ones with an empty
|
/* The set of `buildable' goals, which are the ones with an empty
|
||||||
list of unfinished inputs. */
|
list of unfinished inputs. */
|
||||||
PathSet buildable;
|
PathSet buildable;
|
||||||
|
@ -237,7 +245,10 @@ private:
|
||||||
|
|
||||||
void startBuildChild(Goal & goal);
|
void startBuildChild(Goal & goal);
|
||||||
|
|
||||||
bool tryBuildHook(Goal & goal);
|
typedef enum {rpAccept, rpDecline, rpPostpone} HookReply;
|
||||||
|
HookReply tryBuildHook(Goal & goal);
|
||||||
|
|
||||||
|
void terminateBuildHook(Goal & goal);
|
||||||
|
|
||||||
void openLogFile(Goal & goal);
|
void openLogFile(Goal & goal);
|
||||||
|
|
||||||
|
@ -353,6 +364,12 @@ void Normaliser::run()
|
||||||
do {
|
do {
|
||||||
printMsg(lvlVomit, "waiting for children");
|
printMsg(lvlVomit, "waiting for children");
|
||||||
} while (!waitForChildren());
|
} while (!waitForChildren());
|
||||||
|
|
||||||
|
/* Remove finished goals from the graph. */
|
||||||
|
for (PathSet::iterator i = deadGoals.begin();
|
||||||
|
i != deadGoals.end(); ++i)
|
||||||
|
goals.erase(*i);
|
||||||
|
deadGoals.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(buildable.empty() && building.empty());
|
assert(buildable.empty() && building.empty());
|
||||||
|
@ -361,7 +378,12 @@ void Normaliser::run()
|
||||||
|
|
||||||
bool Normaliser::canBuildMore()
|
bool Normaliser::canBuildMore()
|
||||||
{
|
{
|
||||||
return building.size() < maxBuildJobs;
|
/* !!! O(n) - not that it matters */
|
||||||
|
unsigned int localJobs = 0;
|
||||||
|
for (Building::iterator i = building.begin();
|
||||||
|
i != building.end(); ++i)
|
||||||
|
if (!goals[i->second].inHook) localJobs++;
|
||||||
|
return localJobs < maxBuildJobs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -378,13 +400,19 @@ bool Normaliser::startBuild(Path nePath)
|
||||||
format("starting normalisation of goal `%1%'") % nePath);
|
format("starting normalisation of goal `%1%'") % nePath);
|
||||||
|
|
||||||
/* Is the build hook willing to accept this job? */
|
/* Is the build hook willing to accept this job? */
|
||||||
if (tryBuildHook(goal)) return true;
|
switch (tryBuildHook(goal)) {
|
||||||
|
case rpAccept: return true;
|
||||||
|
case rpPostpone: return false;
|
||||||
|
case rpDecline: ;
|
||||||
|
}
|
||||||
|
|
||||||
if (!canBuildMore()) {
|
if (!canBuildMore()) {
|
||||||
debug("postponing build");
|
debug("postponing build");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
goal.inHook = false;
|
||||||
|
|
||||||
/* Prepare the build, i.e., acquire locks and gather necessary
|
/* Prepare the build, i.e., acquire locks and gather necessary
|
||||||
information. */
|
information. */
|
||||||
if (!prepareBuild(goal)) return true;
|
if (!prepareBuild(goal)) return true;
|
||||||
|
@ -616,10 +644,17 @@ string readLine(int fd)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool Normaliser::tryBuildHook(Goal & goal)
|
void writeLine(int fd, string s)
|
||||||
|
{
|
||||||
|
s += '\n';
|
||||||
|
writeFull(fd, (const unsigned char *) s.c_str(), s.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Normaliser::HookReply Normaliser::tryBuildHook(Goal & goal)
|
||||||
{
|
{
|
||||||
Path buildHook = getEnv("NIX_BUILD_HOOK");
|
Path buildHook = getEnv("NIX_BUILD_HOOK");
|
||||||
if (buildHook == "") return false;
|
if (buildHook == "") return rpDecline;
|
||||||
buildHook = absPath(buildHook);
|
buildHook = absPath(buildHook);
|
||||||
|
|
||||||
/* Create a directory where we will store files used for
|
/* Create a directory where we will store files used for
|
||||||
|
@ -676,21 +711,17 @@ bool Normaliser::tryBuildHook(Goal & goal)
|
||||||
|
|
||||||
if (reply == "decline" || reply == "postpone") {
|
if (reply == "decline" || reply == "postpone") {
|
||||||
/* Clean up the child. !!! hacky / should verify */
|
/* Clean up the child. !!! hacky / should verify */
|
||||||
/* !!! drain stdout of hook, wait for child process */
|
terminateBuildHook(goal);
|
||||||
goal.pid = 0;
|
return reply == "decline" ? rpDecline : rpPostpone;
|
||||||
goal.fromHook.closeReadSide();
|
|
||||||
goal.toHook.closeWriteSide();
|
|
||||||
close(goal.fdLogFile);
|
|
||||||
goal.fdLogFile = 0;
|
|
||||||
goal.logPipe.closeReadSide();
|
|
||||||
building.erase(pid);
|
|
||||||
return reply == "postpone";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (reply == "accept") {
|
else if (reply == "accept") {
|
||||||
|
|
||||||
if (!prepareBuild(goal))
|
if (!prepareBuild(goal)) {
|
||||||
throw Error("NOT IMPLEMENTED: hook unnecessary");
|
writeLine(goal.toHook.writeSide, "cancel");
|
||||||
|
terminateBuildHook(goal);
|
||||||
|
return rpAccept;
|
||||||
|
}
|
||||||
|
|
||||||
Path inputListFN = goal.tmpDir + "/inputs";
|
Path inputListFN = goal.tmpDir + "/inputs";
|
||||||
Path outputListFN = goal.tmpDir + "/outputs";
|
Path outputListFN = goal.tmpDir + "/outputs";
|
||||||
|
@ -717,17 +748,35 @@ bool Normaliser::tryBuildHook(Goal & goal)
|
||||||
s += i->first + " " + i->second + "\n";
|
s += i->first + " " + i->second + "\n";
|
||||||
writeStringToFile(successorsListFN, s);
|
writeStringToFile(successorsListFN, s);
|
||||||
|
|
||||||
string okay = "okay\n";
|
writeLine(goal.toHook.writeSide, "okay");
|
||||||
writeFull(goal.toHook.writeSide,
|
|
||||||
(const unsigned char *) okay.c_str(), okay.size());
|
|
||||||
|
|
||||||
return true;
|
goal.inHook = true;
|
||||||
|
|
||||||
|
return rpAccept;
|
||||||
}
|
}
|
||||||
|
|
||||||
else throw Error(format("bad hook reply `%1%'") % reply);
|
else throw Error(format("bad hook reply `%1%'") % reply);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Normaliser::terminateBuildHook(Goal & goal)
|
||||||
|
{
|
||||||
|
/* !!! drain stdout of hook, wait for child process */
|
||||||
|
debug("terminating build hook");
|
||||||
|
pid_t pid = goal.pid;
|
||||||
|
int status;
|
||||||
|
if (waitpid(goal.pid, &status, 0) != goal.pid)
|
||||||
|
printMsg(lvlError, format("process `%1%' missing") % goal.pid);
|
||||||
|
goal.pid = 0;
|
||||||
|
goal.fromHook.closeReadSide();
|
||||||
|
goal.toHook.closeWriteSide();
|
||||||
|
close(goal.fdLogFile);
|
||||||
|
goal.fdLogFile = 0;
|
||||||
|
goal.logPipe.closeReadSide();
|
||||||
|
building.erase(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void Normaliser::openLogFile(Goal & goal)
|
void Normaliser::openLogFile(Goal & goal)
|
||||||
{
|
{
|
||||||
/* Create a log file. */
|
/* Create a log file. */
|
||||||
|
@ -841,7 +890,7 @@ bool Normaliser::waitForChildren()
|
||||||
Goal & goal(goals[i->second]);
|
Goal & goal(goals[i->second]);
|
||||||
int fd = goal.logPipe.readSide;
|
int fd = goal.logPipe.readSide;
|
||||||
if (FD_ISSET(fd, &fds)) {
|
if (FD_ISSET(fd, &fds)) {
|
||||||
unsigned char buffer[1024];
|
unsigned char buffer[4096];
|
||||||
ssize_t rd = read(fd, buffer, sizeof(buffer));
|
ssize_t rd = read(fd, buffer, sizeof(buffer));
|
||||||
if (rd == -1) {
|
if (rd == -1) {
|
||||||
if (errno != EINTR)
|
if (errno != EINTR)
|
||||||
|
@ -1047,9 +1096,10 @@ void Normaliser::removeGoal(Goal & goal)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remove this goal from the graph. Careful: after this `goal' is
|
/* Lazily remove the goal from the graph (it will be actually
|
||||||
probably no longer valid. */
|
removed in run(); this is since callers may have references to
|
||||||
goals.erase(goal.nePath);
|
`goal'). */
|
||||||
|
deadGoals.insert(goal.nePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue