forked from lix-project/hydra
hydra-queue-runner: Use cmdBuildDerivation
See1511aa9f48
andeda2f36c2a
.
This commit is contained in:
parent
62b1b095ad
commit
5370be9f52
5 changed files with 85 additions and 58 deletions
|
@ -81,10 +81,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
|
|||
enabled. This prevents a race where the remote host
|
||||
garbage-collect paths that are already there. Optionally, ask
|
||||
the remote host to substitute missing paths. */
|
||||
writeInt(cmdQueryValidPaths, to);
|
||||
writeInt(1, to); // == lock paths
|
||||
writeInt(useSubstitutes, to);
|
||||
writeStrings(closure, to);
|
||||
to << cmdQueryValidPaths << 1 << useSubstitutes << closure;
|
||||
to.flush();
|
||||
|
||||
/* Get back the set of paths that are already valid on the remote
|
||||
|
@ -104,7 +101,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
|
|||
for (auto & p : missing)
|
||||
bytesSent += store->queryPathInfo(p).narSize;
|
||||
|
||||
writeInt(cmdImportPaths, to);
|
||||
to << cmdImportPaths;
|
||||
exportPaths(*store, missing, false, to);
|
||||
to.flush();
|
||||
|
||||
|
@ -116,9 +113,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
|
|||
static void copyClosureFrom(std::shared_ptr<StoreAPI> store,
|
||||
FdSource & from, FdSink & to, const PathSet & paths, counter & bytesReceived)
|
||||
{
|
||||
writeInt(cmdExportPaths, to);
|
||||
writeInt(0, to); // == don't sign
|
||||
writeStrings(paths, to);
|
||||
to << cmdExportPaths << 0 << paths;
|
||||
to.flush();
|
||||
store->importPaths(false, from);
|
||||
|
||||
|
@ -150,9 +145,9 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
|||
FdSink to(child.to);
|
||||
|
||||
/* Handshake. */
|
||||
bool sendDerivation = true;
|
||||
try {
|
||||
writeInt(SERVE_MAGIC_1, to);
|
||||
writeInt(SERVE_PROTOCOL_VERSION, to);
|
||||
to << SERVE_MAGIC_1 << SERVE_PROTOCOL_VERSION;
|
||||
to.flush();
|
||||
|
||||
unsigned int magic = readInt(from);
|
||||
|
@ -161,19 +156,33 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
|||
unsigned int version = readInt(from);
|
||||
if (GET_PROTOCOL_MAJOR(version) != 0x200)
|
||||
throw Error(format("unsupported ‘nix-store --serve’ protocol version on ‘%1%’") % machine->sshName);
|
||||
if (GET_PROTOCOL_MINOR(version) >= 1)
|
||||
sendDerivation = false;
|
||||
} catch (EndOfFile & e) {
|
||||
child.pid.wait(true);
|
||||
string s = chomp(readFile(result.logFile));
|
||||
throw Error(format("cannot connect to ‘%1%’: %2%") % machine->sshName % s);
|
||||
}
|
||||
|
||||
/* Gather the inputs. */
|
||||
PathSet inputs({step->drvPath});
|
||||
/* Gather the inputs. If the remote side is Nix <= 1.9, we have to
|
||||
copy the entire closure of ‘drvPath’, as well the required
|
||||
outputs of the input derivations. On Nix > 1.9, we only need to
|
||||
copy the immediate sources of the derivation and the required
|
||||
outputs of the input derivations. */
|
||||
PathSet inputs;
|
||||
|
||||
if (sendDerivation)
|
||||
inputs.insert(step->drvPath);
|
||||
else
|
||||
for (auto & p : step->drv.inputSrcs)
|
||||
inputs.insert(p);
|
||||
|
||||
for (auto & input : step->drv.inputDrvs) {
|
||||
Derivation drv2 = readDerivation(input.first);
|
||||
for (auto & name : input.second) {
|
||||
auto i = drv2.outputs.find(name);
|
||||
if (i != drv2.outputs.end()) inputs.insert(i->second.path);
|
||||
if (i == drv2.outputs.end()) continue;
|
||||
inputs.insert(i->second.path);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -191,12 +200,14 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
|||
|
||||
/* Do the build. */
|
||||
printMsg(lvlDebug, format("building ‘%1%’ on ‘%2%’") % step->drvPath % machine->sshName);
|
||||
writeInt(cmdBuildPaths, to);
|
||||
writeStrings(PathSet({step->drvPath}), to);
|
||||
writeInt(maxSilentTime, to);
|
||||
writeInt(buildTimeout, to);
|
||||
|
||||
if (sendDerivation)
|
||||
to << cmdBuildPaths << PathSet({step->drvPath}) << maxSilentTime << buildTimeout;
|
||||
else
|
||||
to << cmdBuildDerivation << step->drvPath << step->drv << maxSilentTime << buildTimeout;
|
||||
// FIXME: send maxLogSize.
|
||||
to.flush();
|
||||
|
||||
result.startTime = time(0);
|
||||
int res;
|
||||
{
|
||||
|
@ -204,13 +215,28 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
|||
res = readInt(from);
|
||||
}
|
||||
result.stopTime = time(0);
|
||||
|
||||
if (sendDerivation) {
|
||||
if (res) {
|
||||
result.errorMsg = (format("%1% on ‘%2%’") % readString(from) % machine->sshName).str();
|
||||
if (res == 100) result.status = RemoteResult::rrPermanentFailure;
|
||||
else if (res == 101) result.status = RemoteResult::rrTimedOut;
|
||||
else result.status = RemoteResult::rrMiscFailure;
|
||||
if (res == 100) result.status = BuildResult::PermanentFailure;
|
||||
else if (res == 101) result.status = BuildResult::TimedOut;
|
||||
else result.status = BuildResult::MiscFailure;
|
||||
return;
|
||||
}
|
||||
result.status = BuildResult::Built;
|
||||
} else {
|
||||
result.status = (BuildResult::Status) res;
|
||||
result.errorMsg = readString(from);
|
||||
if (!result.success()) return;
|
||||
}
|
||||
|
||||
/* If the path was substituted or already valid, then we didn't
|
||||
get a build log. */
|
||||
if (result.status == BuildResult::Substituted || result.status == BuildResult::AlreadyValid) {
|
||||
unlink(result.logFile.c_str());
|
||||
result.logFile = "";
|
||||
}
|
||||
|
||||
/* Copy the output paths. */
|
||||
if (machine->sshName != "localhost") {
|
||||
|
@ -226,5 +252,4 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
|||
child.to.close();
|
||||
child.pid.wait(true);
|
||||
|
||||
result.status = RemoteResult::rrSuccess;
|
||||
}
|
||||
|
|
|
@ -7,9 +7,9 @@
|
|||
using namespace nix;
|
||||
|
||||
|
||||
BuildResult getBuildResult(std::shared_ptr<StoreAPI> store, const Derivation & drv)
|
||||
BuildOutput getBuildOutput(std::shared_ptr<StoreAPI> store, const Derivation & drv)
|
||||
{
|
||||
BuildResult res;
|
||||
BuildOutput res;
|
||||
|
||||
/* Compute the closure size. */
|
||||
PathSet outputs;
|
||||
|
|
|
@ -15,7 +15,7 @@ struct BuildProduct
|
|||
BuildProduct() { }
|
||||
};
|
||||
|
||||
struct BuildResult
|
||||
struct BuildOutput
|
||||
{
|
||||
/* Whether this build has failed with output, i.e., the build
|
||||
finished with exit code 0 but produced a file
|
||||
|
@ -29,4 +29,4 @@ struct BuildResult
|
|||
std::list<BuildProduct> products;
|
||||
};
|
||||
|
||||
BuildResult getBuildResult(std::shared_ptr<nix::StoreAPI> store, const nix::Derivation & drv);
|
||||
BuildOutput getBuildOutput(std::shared_ptr<nix::StoreAPI> store, const nix::Derivation & drv);
|
||||
|
|
|
@ -314,7 +314,7 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
|||
all valid. So we mark this as a finished, cached build. */
|
||||
if (!step) {
|
||||
Derivation drv = readDerivation(build->drvPath);
|
||||
BuildResult res = getBuildResult(store, drv);
|
||||
BuildOutput res = getBuildOutput(store, drv);
|
||||
|
||||
pqxx::work txn(conn);
|
||||
time_t now = time(0);
|
||||
|
@ -822,7 +822,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
auto conn(dbPool.get());
|
||||
|
||||
RemoteResult result;
|
||||
BuildResult res;
|
||||
BuildOutput res;
|
||||
int stepNr = 0;
|
||||
|
||||
time_t stepStartTime = result.startTime = time(0);
|
||||
|
@ -832,7 +832,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
bool cachedFailure = checkCachedFailure(step, *conn);
|
||||
|
||||
if (cachedFailure)
|
||||
result.status = RemoteResult::rrPermanentFailure;
|
||||
result.status = BuildResult::CachedFailure;
|
||||
else {
|
||||
|
||||
/* Create a build step record indicating that we started
|
||||
|
@ -849,11 +849,11 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
/* FIXME: referring builds may have conflicting timeouts. */
|
||||
buildRemote(store, machine, step, build->maxSilentTime, build->buildTimeout, result);
|
||||
} catch (Error & e) {
|
||||
result.status = RemoteResult::rrMiscFailure;
|
||||
result.status = BuildResult::MiscFailure;
|
||||
result.errorMsg = e.msg();
|
||||
}
|
||||
|
||||
if (result.status == RemoteResult::rrSuccess) res = getBuildResult(store, step->drv);
|
||||
if (result.success()) res = getBuildOutput(store, step->drv);
|
||||
}
|
||||
|
||||
time_t stepStopTime = time(0);
|
||||
|
@ -870,8 +870,8 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
|
||||
/* The step had a hopefully temporary failure (e.g. network
|
||||
issue). Retry a number of times. */
|
||||
if (result.status == RemoteResult::rrMiscFailure) {
|
||||
printMsg(lvlError, format("irregular failure building ‘%1%’ on ‘%2%’: %3%")
|
||||
if (result.canRetry()) {
|
||||
printMsg(lvlError, format("possibly transient failure building ‘%1%’ on ‘%2%’: %3%")
|
||||
% step->drvPath % machine->sshName % result.errorMsg);
|
||||
bool retry;
|
||||
{
|
||||
|
@ -888,7 +888,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
}
|
||||
}
|
||||
|
||||
if (result.status == RemoteResult::rrSuccess) {
|
||||
if (result.success()) {
|
||||
|
||||
/* Register success in the database for all Build objects that
|
||||
have this step as the top-level step. Since the queue
|
||||
|
@ -932,7 +932,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssSuccess);
|
||||
|
||||
for (auto & b : direct)
|
||||
markSucceededBuild(txn, b, res, build != b,
|
||||
markSucceededBuild(txn, b, res, build != b || result.status != BuildResult::Built,
|
||||
result.startTime, result.stopTime);
|
||||
|
||||
txn.commit();
|
||||
|
@ -1015,17 +1015,21 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
pqxx::work txn(*conn);
|
||||
|
||||
BuildStatus buildStatus =
|
||||
result.status == RemoteResult::rrPermanentFailure ? bsFailed :
|
||||
result.status == RemoteResult::rrTimedOut ? bsTimedOut :
|
||||
bsAborted;
|
||||
result.status == BuildResult::TimedOut ? bsTimedOut :
|
||||
result.canRetry() ? bsAborted :
|
||||
bsFailed;
|
||||
BuildStepStatus buildStepStatus =
|
||||
result.status == RemoteResult::rrPermanentFailure ? bssFailed :
|
||||
result.status == RemoteResult::rrTimedOut ? bssTimedOut :
|
||||
bssAborted;
|
||||
result.status == BuildResult::TimedOut ? bssTimedOut :
|
||||
result.canRetry() ? bssAborted :
|
||||
bssFailed;
|
||||
|
||||
/* For regular failures, we don't care about the error
|
||||
/* For standard failures, we don't care about the error
|
||||
message. */
|
||||
if (buildStatus != bsAborted) result.errorMsg = "";
|
||||
if (result.status == BuildResult::PermanentFailure ||
|
||||
result.status == BuildResult::TransientFailure ||
|
||||
result.status == BuildResult::CachedFailure ||
|
||||
result.status == BuildResult::TimedOut)
|
||||
result.errorMsg = "";
|
||||
|
||||
/* Create failed build steps for every build that depends
|
||||
on this. For cached failures, only create a step for
|
||||
|
@ -1061,7 +1065,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
|
||||
/* Remember failed paths in the database so that they
|
||||
won't be built again. */
|
||||
if (!cachedFailure && result.status == RemoteResult::rrPermanentFailure)
|
||||
if (!cachedFailure && result.status == BuildResult::PermanentFailure)
|
||||
for (auto & path : outputPaths(step->drv))
|
||||
txn.parameterized("insert into FailedPaths values ($1)")(path).exec();
|
||||
|
||||
|
@ -1103,7 +1107,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
|
||||
|
||||
void State::markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
||||
const BuildResult & res, bool isCachedBuild, time_t startTime, time_t stopTime)
|
||||
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime)
|
||||
{
|
||||
printMsg(lvlInfo, format("marking build %1% as succeeded") % build->id);
|
||||
|
||||
|
|
|
@ -43,22 +43,20 @@ typedef enum {
|
|||
} BuildStepStatus;
|
||||
|
||||
|
||||
struct RemoteResult
|
||||
struct RemoteResult : nix::BuildResult
|
||||
{
|
||||
enum {
|
||||
rrSuccess = 0,
|
||||
rrPermanentFailure = 1,
|
||||
rrTimedOut = 2,
|
||||
rrMiscFailure = 3
|
||||
} status = rrMiscFailure;
|
||||
std::string errorMsg;
|
||||
time_t startTime = 0, stopTime = 0;
|
||||
nix::Path logFile;
|
||||
|
||||
bool canRetry()
|
||||
{
|
||||
return status == TransientFailure || status == MiscFailure;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct Step;
|
||||
struct BuildResult;
|
||||
struct BuildOutput;
|
||||
|
||||
|
||||
struct Build
|
||||
|
@ -283,7 +281,7 @@ private:
|
|||
RemoteResult & result);
|
||||
|
||||
void markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
||||
const BuildResult & res, bool isCachedBuild, time_t startTime, time_t stopTime);
|
||||
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime);
|
||||
|
||||
bool checkCachedFailure(Step::ptr step, Connection & conn);
|
||||
|
||||
|
|
Loading…
Reference in a new issue