hydra-queue-runner: Use cmdBuildDerivation

See 1511aa9f48 and eda2f36c2a.
This commit is contained in:
Eelco Dolstra 2015-07-21 01:45:00 +02:00
parent 62b1b095ad
commit 5370be9f52
5 changed files with 85 additions and 58 deletions

View file

@ -81,10 +81,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
enabled. This prevents a race where the remote host enabled. This prevents a race where the remote host
garbage-collect paths that are already there. Optionally, ask garbage-collect paths that are already there. Optionally, ask
the remote host to substitute missing paths. */ the remote host to substitute missing paths. */
writeInt(cmdQueryValidPaths, to); to << cmdQueryValidPaths << 1 << useSubstitutes << closure;
writeInt(1, to); // == lock paths
writeInt(useSubstitutes, to);
writeStrings(closure, to);
to.flush(); to.flush();
/* Get back the set of paths that are already valid on the remote /* Get back the set of paths that are already valid on the remote
@ -104,7 +101,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
for (auto & p : missing) for (auto & p : missing)
bytesSent += store->queryPathInfo(p).narSize; bytesSent += store->queryPathInfo(p).narSize;
writeInt(cmdImportPaths, to); to << cmdImportPaths;
exportPaths(*store, missing, false, to); exportPaths(*store, missing, false, to);
to.flush(); to.flush();
@ -116,9 +113,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
static void copyClosureFrom(std::shared_ptr<StoreAPI> store, static void copyClosureFrom(std::shared_ptr<StoreAPI> store,
FdSource & from, FdSink & to, const PathSet & paths, counter & bytesReceived) FdSource & from, FdSink & to, const PathSet & paths, counter & bytesReceived)
{ {
writeInt(cmdExportPaths, to); to << cmdExportPaths << 0 << paths;
writeInt(0, to); // == don't sign
writeStrings(paths, to);
to.flush(); to.flush();
store->importPaths(false, from); store->importPaths(false, from);
@ -150,9 +145,9 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
FdSink to(child.to); FdSink to(child.to);
/* Handshake. */ /* Handshake. */
bool sendDerivation = true;
try { try {
writeInt(SERVE_MAGIC_1, to); to << SERVE_MAGIC_1 << SERVE_PROTOCOL_VERSION;
writeInt(SERVE_PROTOCOL_VERSION, to);
to.flush(); to.flush();
unsigned int magic = readInt(from); unsigned int magic = readInt(from);
@ -161,19 +156,33 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
unsigned int version = readInt(from); unsigned int version = readInt(from);
if (GET_PROTOCOL_MAJOR(version) != 0x200) if (GET_PROTOCOL_MAJOR(version) != 0x200)
throw Error(format("unsupported nix-store --serve protocol version on %1%") % machine->sshName); throw Error(format("unsupported nix-store --serve protocol version on %1%") % machine->sshName);
if (GET_PROTOCOL_MINOR(version) >= 1)
sendDerivation = false;
} catch (EndOfFile & e) { } catch (EndOfFile & e) {
child.pid.wait(true); child.pid.wait(true);
string s = chomp(readFile(result.logFile)); string s = chomp(readFile(result.logFile));
throw Error(format("cannot connect to %1%: %2%") % machine->sshName % s); throw Error(format("cannot connect to %1%: %2%") % machine->sshName % s);
} }
/* Gather the inputs. */ /* Gather the inputs. If the remote side is Nix <= 1.9, we have to
PathSet inputs({step->drvPath}); copy the entire closure of drvPath, as well the required
outputs of the input derivations. On Nix > 1.9, we only need to
copy the immediate sources of the derivation and the required
outputs of the input derivations. */
PathSet inputs;
if (sendDerivation)
inputs.insert(step->drvPath);
else
for (auto & p : step->drv.inputSrcs)
inputs.insert(p);
for (auto & input : step->drv.inputDrvs) { for (auto & input : step->drv.inputDrvs) {
Derivation drv2 = readDerivation(input.first); Derivation drv2 = readDerivation(input.first);
for (auto & name : input.second) { for (auto & name : input.second) {
auto i = drv2.outputs.find(name); auto i = drv2.outputs.find(name);
if (i != drv2.outputs.end()) inputs.insert(i->second.path); if (i == drv2.outputs.end()) continue;
inputs.insert(i->second.path);
} }
} }
@ -191,12 +200,14 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
/* Do the build. */ /* Do the build. */
printMsg(lvlDebug, format("building %1% on %2%") % step->drvPath % machine->sshName); printMsg(lvlDebug, format("building %1% on %2%") % step->drvPath % machine->sshName);
writeInt(cmdBuildPaths, to);
writeStrings(PathSet({step->drvPath}), to); if (sendDerivation)
writeInt(maxSilentTime, to); to << cmdBuildPaths << PathSet({step->drvPath}) << maxSilentTime << buildTimeout;
writeInt(buildTimeout, to); else
// FIXME: send maxLogSize. to << cmdBuildDerivation << step->drvPath << step->drv << maxSilentTime << buildTimeout;
// FIXME: send maxLogSize.
to.flush(); to.flush();
result.startTime = time(0); result.startTime = time(0);
int res; int res;
{ {
@ -204,12 +215,27 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
res = readInt(from); res = readInt(from);
} }
result.stopTime = time(0); result.stopTime = time(0);
if (res) {
result.errorMsg = (format("%1% on %2%") % readString(from) % machine->sshName).str(); if (sendDerivation) {
if (res == 100) result.status = RemoteResult::rrPermanentFailure; if (res) {
else if (res == 101) result.status = RemoteResult::rrTimedOut; result.errorMsg = (format("%1% on %2%") % readString(from) % machine->sshName).str();
else result.status = RemoteResult::rrMiscFailure; if (res == 100) result.status = BuildResult::PermanentFailure;
return; else if (res == 101) result.status = BuildResult::TimedOut;
else result.status = BuildResult::MiscFailure;
return;
}
result.status = BuildResult::Built;
} else {
result.status = (BuildResult::Status) res;
result.errorMsg = readString(from);
if (!result.success()) return;
}
/* If the path was substituted or already valid, then we didn't
get a build log. */
if (result.status == BuildResult::Substituted || result.status == BuildResult::AlreadyValid) {
unlink(result.logFile.c_str());
result.logFile = "";
} }
/* Copy the output paths. */ /* Copy the output paths. */
@ -226,5 +252,4 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
child.to.close(); child.to.close();
child.pid.wait(true); child.pid.wait(true);
result.status = RemoteResult::rrSuccess;
} }

View file

@ -7,9 +7,9 @@
using namespace nix; using namespace nix;
BuildResult getBuildResult(std::shared_ptr<StoreAPI> store, const Derivation & drv) BuildOutput getBuildOutput(std::shared_ptr<StoreAPI> store, const Derivation & drv)
{ {
BuildResult res; BuildOutput res;
/* Compute the closure size. */ /* Compute the closure size. */
PathSet outputs; PathSet outputs;

View file

@ -15,7 +15,7 @@ struct BuildProduct
BuildProduct() { } BuildProduct() { }
}; };
struct BuildResult struct BuildOutput
{ {
/* Whether this build has failed with output, i.e., the build /* Whether this build has failed with output, i.e., the build
finished with exit code 0 but produced a file finished with exit code 0 but produced a file
@ -29,4 +29,4 @@ struct BuildResult
std::list<BuildProduct> products; std::list<BuildProduct> products;
}; };
BuildResult getBuildResult(std::shared_ptr<nix::StoreAPI> store, const nix::Derivation & drv); BuildOutput getBuildOutput(std::shared_ptr<nix::StoreAPI> store, const nix::Derivation & drv);

View file

@ -314,7 +314,7 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
all valid. So we mark this as a finished, cached build. */ all valid. So we mark this as a finished, cached build. */
if (!step) { if (!step) {
Derivation drv = readDerivation(build->drvPath); Derivation drv = readDerivation(build->drvPath);
BuildResult res = getBuildResult(store, drv); BuildOutput res = getBuildOutput(store, drv);
pqxx::work txn(conn); pqxx::work txn(conn);
time_t now = time(0); time_t now = time(0);
@ -822,7 +822,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
auto conn(dbPool.get()); auto conn(dbPool.get());
RemoteResult result; RemoteResult result;
BuildResult res; BuildOutput res;
int stepNr = 0; int stepNr = 0;
time_t stepStartTime = result.startTime = time(0); time_t stepStartTime = result.startTime = time(0);
@ -832,7 +832,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
bool cachedFailure = checkCachedFailure(step, *conn); bool cachedFailure = checkCachedFailure(step, *conn);
if (cachedFailure) if (cachedFailure)
result.status = RemoteResult::rrPermanentFailure; result.status = BuildResult::CachedFailure;
else { else {
/* Create a build step record indicating that we started /* Create a build step record indicating that we started
@ -849,11 +849,11 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
/* FIXME: referring builds may have conflicting timeouts. */ /* FIXME: referring builds may have conflicting timeouts. */
buildRemote(store, machine, step, build->maxSilentTime, build->buildTimeout, result); buildRemote(store, machine, step, build->maxSilentTime, build->buildTimeout, result);
} catch (Error & e) { } catch (Error & e) {
result.status = RemoteResult::rrMiscFailure; result.status = BuildResult::MiscFailure;
result.errorMsg = e.msg(); result.errorMsg = e.msg();
} }
if (result.status == RemoteResult::rrSuccess) res = getBuildResult(store, step->drv); if (result.success()) res = getBuildOutput(store, step->drv);
} }
time_t stepStopTime = time(0); time_t stepStopTime = time(0);
@ -870,8 +870,8 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
/* The step had a hopefully temporary failure (e.g. network /* The step had a hopefully temporary failure (e.g. network
issue). Retry a number of times. */ issue). Retry a number of times. */
if (result.status == RemoteResult::rrMiscFailure) { if (result.canRetry()) {
printMsg(lvlError, format("irregular failure building %1% on %2%: %3%") printMsg(lvlError, format("possibly transient failure building %1% on %2%: %3%")
% step->drvPath % machine->sshName % result.errorMsg); % step->drvPath % machine->sshName % result.errorMsg);
bool retry; bool retry;
{ {
@ -888,7 +888,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
} }
} }
if (result.status == RemoteResult::rrSuccess) { if (result.success()) {
/* Register success in the database for all Build objects that /* Register success in the database for all Build objects that
have this step as the top-level step. Since the queue have this step as the top-level step. Since the queue
@ -932,7 +932,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssSuccess); finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssSuccess);
for (auto & b : direct) for (auto & b : direct)
markSucceededBuild(txn, b, res, build != b, markSucceededBuild(txn, b, res, build != b || result.status != BuildResult::Built,
result.startTime, result.stopTime); result.startTime, result.stopTime);
txn.commit(); txn.commit();
@ -1015,17 +1015,21 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
pqxx::work txn(*conn); pqxx::work txn(*conn);
BuildStatus buildStatus = BuildStatus buildStatus =
result.status == RemoteResult::rrPermanentFailure ? bsFailed : result.status == BuildResult::TimedOut ? bsTimedOut :
result.status == RemoteResult::rrTimedOut ? bsTimedOut : result.canRetry() ? bsAborted :
bsAborted; bsFailed;
BuildStepStatus buildStepStatus = BuildStepStatus buildStepStatus =
result.status == RemoteResult::rrPermanentFailure ? bssFailed : result.status == BuildResult::TimedOut ? bssTimedOut :
result.status == RemoteResult::rrTimedOut ? bssTimedOut : result.canRetry() ? bssAborted :
bssAborted; bssFailed;
/* For regular failures, we don't care about the error /* For standard failures, we don't care about the error
message. */ message. */
if (buildStatus != bsAborted) result.errorMsg = ""; if (result.status == BuildResult::PermanentFailure ||
result.status == BuildResult::TransientFailure ||
result.status == BuildResult::CachedFailure ||
result.status == BuildResult::TimedOut)
result.errorMsg = "";
/* Create failed build steps for every build that depends /* Create failed build steps for every build that depends
on this. For cached failures, only create a step for on this. For cached failures, only create a step for
@ -1061,7 +1065,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
/* Remember failed paths in the database so that they /* Remember failed paths in the database so that they
won't be built again. */ won't be built again. */
if (!cachedFailure && result.status == RemoteResult::rrPermanentFailure) if (!cachedFailure && result.status == BuildResult::PermanentFailure)
for (auto & path : outputPaths(step->drv)) for (auto & path : outputPaths(step->drv))
txn.parameterized("insert into FailedPaths values ($1)")(path).exec(); txn.parameterized("insert into FailedPaths values ($1)")(path).exec();
@ -1103,7 +1107,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
void State::markSucceededBuild(pqxx::work & txn, Build::ptr build, void State::markSucceededBuild(pqxx::work & txn, Build::ptr build,
const BuildResult & res, bool isCachedBuild, time_t startTime, time_t stopTime) const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime)
{ {
printMsg(lvlInfo, format("marking build %1% as succeeded") % build->id); printMsg(lvlInfo, format("marking build %1% as succeeded") % build->id);

View file

@ -43,22 +43,20 @@ typedef enum {
} BuildStepStatus; } BuildStepStatus;
struct RemoteResult struct RemoteResult : nix::BuildResult
{ {
enum {
rrSuccess = 0,
rrPermanentFailure = 1,
rrTimedOut = 2,
rrMiscFailure = 3
} status = rrMiscFailure;
std::string errorMsg;
time_t startTime = 0, stopTime = 0; time_t startTime = 0, stopTime = 0;
nix::Path logFile; nix::Path logFile;
bool canRetry()
{
return status == TransientFailure || status == MiscFailure;
}
}; };
struct Step; struct Step;
struct BuildResult; struct BuildOutput;
struct Build struct Build
@ -283,7 +281,7 @@ private:
RemoteResult & result); RemoteResult & result);
void markSucceededBuild(pqxx::work & txn, Build::ptr build, void markSucceededBuild(pqxx::work & txn, Build::ptr build,
const BuildResult & res, bool isCachedBuild, time_t startTime, time_t stopTime); const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime);
bool checkCachedFailure(Step::ptr step, Connection & conn); bool checkCachedFailure(Step::ptr step, Connection & conn);