hydra-queue-runner: Use cmdBuildDerivation

See 1511aa9f48 and eda2f36c2a.
This commit is contained in:
Eelco Dolstra 2015-07-21 01:45:00 +02:00
parent 62b1b095ad
commit 5370be9f52
5 changed files with 85 additions and 58 deletions

View file

@ -81,10 +81,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
enabled. This prevents a race where the remote host
garbage-collect paths that are already there. Optionally, ask
the remote host to substitute missing paths. */
writeInt(cmdQueryValidPaths, to);
writeInt(1, to); // == lock paths
writeInt(useSubstitutes, to);
writeStrings(closure, to);
to << cmdQueryValidPaths << 1 << useSubstitutes << closure;
to.flush();
/* Get back the set of paths that are already valid on the remote
@ -104,7 +101,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
for (auto & p : missing)
bytesSent += store->queryPathInfo(p).narSize;
writeInt(cmdImportPaths, to);
to << cmdImportPaths;
exportPaths(*store, missing, false, to);
to.flush();
@ -116,9 +113,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
static void copyClosureFrom(std::shared_ptr<StoreAPI> store,
FdSource & from, FdSink & to, const PathSet & paths, counter & bytesReceived)
{
writeInt(cmdExportPaths, to);
writeInt(0, to); // == don't sign
writeStrings(paths, to);
to << cmdExportPaths << 0 << paths;
to.flush();
store->importPaths(false, from);
@ -150,9 +145,9 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
FdSink to(child.to);
/* Handshake. */
bool sendDerivation = true;
try {
writeInt(SERVE_MAGIC_1, to);
writeInt(SERVE_PROTOCOL_VERSION, to);
to << SERVE_MAGIC_1 << SERVE_PROTOCOL_VERSION;
to.flush();
unsigned int magic = readInt(from);
@ -161,19 +156,33 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
unsigned int version = readInt(from);
if (GET_PROTOCOL_MAJOR(version) != 0x200)
throw Error(format("unsupported nix-store --serve protocol version on %1%") % machine->sshName);
if (GET_PROTOCOL_MINOR(version) >= 1)
sendDerivation = false;
} catch (EndOfFile & e) {
child.pid.wait(true);
string s = chomp(readFile(result.logFile));
throw Error(format("cannot connect to %1%: %2%") % machine->sshName % s);
}
/* Gather the inputs. */
PathSet inputs({step->drvPath});
/* Gather the inputs. If the remote side is Nix <= 1.9, we have to
copy the entire closure of drvPath, as well the required
outputs of the input derivations. On Nix > 1.9, we only need to
copy the immediate sources of the derivation and the required
outputs of the input derivations. */
PathSet inputs;
if (sendDerivation)
inputs.insert(step->drvPath);
else
for (auto & p : step->drv.inputSrcs)
inputs.insert(p);
for (auto & input : step->drv.inputDrvs) {
Derivation drv2 = readDerivation(input.first);
for (auto & name : input.second) {
auto i = drv2.outputs.find(name);
if (i != drv2.outputs.end()) inputs.insert(i->second.path);
if (i == drv2.outputs.end()) continue;
inputs.insert(i->second.path);
}
}
@ -191,12 +200,14 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
/* Do the build. */
printMsg(lvlDebug, format("building %1% on %2%") % step->drvPath % machine->sshName);
writeInt(cmdBuildPaths, to);
writeStrings(PathSet({step->drvPath}), to);
writeInt(maxSilentTime, to);
writeInt(buildTimeout, to);
if (sendDerivation)
to << cmdBuildPaths << PathSet({step->drvPath}) << maxSilentTime << buildTimeout;
else
to << cmdBuildDerivation << step->drvPath << step->drv << maxSilentTime << buildTimeout;
// FIXME: send maxLogSize.
to.flush();
result.startTime = time(0);
int res;
{
@ -204,13 +215,28 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
res = readInt(from);
}
result.stopTime = time(0);
if (sendDerivation) {
if (res) {
result.errorMsg = (format("%1% on %2%") % readString(from) % machine->sshName).str();
if (res == 100) result.status = RemoteResult::rrPermanentFailure;
else if (res == 101) result.status = RemoteResult::rrTimedOut;
else result.status = RemoteResult::rrMiscFailure;
if (res == 100) result.status = BuildResult::PermanentFailure;
else if (res == 101) result.status = BuildResult::TimedOut;
else result.status = BuildResult::MiscFailure;
return;
}
result.status = BuildResult::Built;
} else {
result.status = (BuildResult::Status) res;
result.errorMsg = readString(from);
if (!result.success()) return;
}
/* If the path was substituted or already valid, then we didn't
get a build log. */
if (result.status == BuildResult::Substituted || result.status == BuildResult::AlreadyValid) {
unlink(result.logFile.c_str());
result.logFile = "";
}
/* Copy the output paths. */
if (machine->sshName != "localhost") {
@ -226,5 +252,4 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
child.to.close();
child.pid.wait(true);
result.status = RemoteResult::rrSuccess;
}

View file

@ -7,9 +7,9 @@
using namespace nix;
BuildResult getBuildResult(std::shared_ptr<StoreAPI> store, const Derivation & drv)
BuildOutput getBuildOutput(std::shared_ptr<StoreAPI> store, const Derivation & drv)
{
BuildResult res;
BuildOutput res;
/* Compute the closure size. */
PathSet outputs;

View file

@ -15,7 +15,7 @@ struct BuildProduct
BuildProduct() { }
};
struct BuildResult
struct BuildOutput
{
/* Whether this build has failed with output, i.e., the build
finished with exit code 0 but produced a file
@ -29,4 +29,4 @@ struct BuildResult
std::list<BuildProduct> products;
};
BuildResult getBuildResult(std::shared_ptr<nix::StoreAPI> store, const nix::Derivation & drv);
BuildOutput getBuildOutput(std::shared_ptr<nix::StoreAPI> store, const nix::Derivation & drv);

View file

@ -314,7 +314,7 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
all valid. So we mark this as a finished, cached build. */
if (!step) {
Derivation drv = readDerivation(build->drvPath);
BuildResult res = getBuildResult(store, drv);
BuildOutput res = getBuildOutput(store, drv);
pqxx::work txn(conn);
time_t now = time(0);
@ -822,7 +822,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
auto conn(dbPool.get());
RemoteResult result;
BuildResult res;
BuildOutput res;
int stepNr = 0;
time_t stepStartTime = result.startTime = time(0);
@ -832,7 +832,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
bool cachedFailure = checkCachedFailure(step, *conn);
if (cachedFailure)
result.status = RemoteResult::rrPermanentFailure;
result.status = BuildResult::CachedFailure;
else {
/* Create a build step record indicating that we started
@ -849,11 +849,11 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
/* FIXME: referring builds may have conflicting timeouts. */
buildRemote(store, machine, step, build->maxSilentTime, build->buildTimeout, result);
} catch (Error & e) {
result.status = RemoteResult::rrMiscFailure;
result.status = BuildResult::MiscFailure;
result.errorMsg = e.msg();
}
if (result.status == RemoteResult::rrSuccess) res = getBuildResult(store, step->drv);
if (result.success()) res = getBuildOutput(store, step->drv);
}
time_t stepStopTime = time(0);
@ -870,8 +870,8 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
/* The step had a hopefully temporary failure (e.g. network
issue). Retry a number of times. */
if (result.status == RemoteResult::rrMiscFailure) {
printMsg(lvlError, format("irregular failure building %1% on %2%: %3%")
if (result.canRetry()) {
printMsg(lvlError, format("possibly transient failure building %1% on %2%: %3%")
% step->drvPath % machine->sshName % result.errorMsg);
bool retry;
{
@ -888,7 +888,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
}
}
if (result.status == RemoteResult::rrSuccess) {
if (result.success()) {
/* Register success in the database for all Build objects that
have this step as the top-level step. Since the queue
@ -932,7 +932,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssSuccess);
for (auto & b : direct)
markSucceededBuild(txn, b, res, build != b,
markSucceededBuild(txn, b, res, build != b || result.status != BuildResult::Built,
result.startTime, result.stopTime);
txn.commit();
@ -1015,17 +1015,21 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
pqxx::work txn(*conn);
BuildStatus buildStatus =
result.status == RemoteResult::rrPermanentFailure ? bsFailed :
result.status == RemoteResult::rrTimedOut ? bsTimedOut :
bsAborted;
result.status == BuildResult::TimedOut ? bsTimedOut :
result.canRetry() ? bsAborted :
bsFailed;
BuildStepStatus buildStepStatus =
result.status == RemoteResult::rrPermanentFailure ? bssFailed :
result.status == RemoteResult::rrTimedOut ? bssTimedOut :
bssAborted;
result.status == BuildResult::TimedOut ? bssTimedOut :
result.canRetry() ? bssAborted :
bssFailed;
/* For regular failures, we don't care about the error
/* For standard failures, we don't care about the error
message. */
if (buildStatus != bsAborted) result.errorMsg = "";
if (result.status == BuildResult::PermanentFailure ||
result.status == BuildResult::TransientFailure ||
result.status == BuildResult::CachedFailure ||
result.status == BuildResult::TimedOut)
result.errorMsg = "";
/* Create failed build steps for every build that depends
on this. For cached failures, only create a step for
@ -1061,7 +1065,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
/* Remember failed paths in the database so that they
won't be built again. */
if (!cachedFailure && result.status == RemoteResult::rrPermanentFailure)
if (!cachedFailure && result.status == BuildResult::PermanentFailure)
for (auto & path : outputPaths(step->drv))
txn.parameterized("insert into FailedPaths values ($1)")(path).exec();
@ -1103,7 +1107,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
void State::markSucceededBuild(pqxx::work & txn, Build::ptr build,
const BuildResult & res, bool isCachedBuild, time_t startTime, time_t stopTime)
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime)
{
printMsg(lvlInfo, format("marking build %1% as succeeded") % build->id);

View file

@ -43,22 +43,20 @@ typedef enum {
} BuildStepStatus;
struct RemoteResult
struct RemoteResult : nix::BuildResult
{
enum {
rrSuccess = 0,
rrPermanentFailure = 1,
rrTimedOut = 2,
rrMiscFailure = 3
} status = rrMiscFailure;
std::string errorMsg;
time_t startTime = 0, stopTime = 0;
nix::Path logFile;
bool canRetry()
{
return status == TransientFailure || status == MiscFailure;
}
};
struct Step;
struct BuildResult;
struct BuildOutput;
struct Build
@ -283,7 +281,7 @@ private:
RemoteResult & result);
void markSucceededBuild(pqxx::work & txn, Build::ptr build,
const BuildResult & res, bool isCachedBuild, time_t startTime, time_t stopTime);
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime);
bool checkCachedFailure(Step::ptr step, Connection & conn);