forked from lix-project/hydra
hydra-queue-runner: Use cmdBuildDerivation
See1511aa9f48
andeda2f36c2a
.
This commit is contained in:
parent
62b1b095ad
commit
5370be9f52
5 changed files with 85 additions and 58 deletions
|
@ -81,10 +81,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
|
||||||
enabled. This prevents a race where the remote host
|
enabled. This prevents a race where the remote host
|
||||||
garbage-collect paths that are already there. Optionally, ask
|
garbage-collect paths that are already there. Optionally, ask
|
||||||
the remote host to substitute missing paths. */
|
the remote host to substitute missing paths. */
|
||||||
writeInt(cmdQueryValidPaths, to);
|
to << cmdQueryValidPaths << 1 << useSubstitutes << closure;
|
||||||
writeInt(1, to); // == lock paths
|
|
||||||
writeInt(useSubstitutes, to);
|
|
||||||
writeStrings(closure, to);
|
|
||||||
to.flush();
|
to.flush();
|
||||||
|
|
||||||
/* Get back the set of paths that are already valid on the remote
|
/* Get back the set of paths that are already valid on the remote
|
||||||
|
@ -104,7 +101,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
|
||||||
for (auto & p : missing)
|
for (auto & p : missing)
|
||||||
bytesSent += store->queryPathInfo(p).narSize;
|
bytesSent += store->queryPathInfo(p).narSize;
|
||||||
|
|
||||||
writeInt(cmdImportPaths, to);
|
to << cmdImportPaths;
|
||||||
exportPaths(*store, missing, false, to);
|
exportPaths(*store, missing, false, to);
|
||||||
to.flush();
|
to.flush();
|
||||||
|
|
||||||
|
@ -116,9 +113,7 @@ static void copyClosureTo(std::shared_ptr<StoreAPI> store,
|
||||||
static void copyClosureFrom(std::shared_ptr<StoreAPI> store,
|
static void copyClosureFrom(std::shared_ptr<StoreAPI> store,
|
||||||
FdSource & from, FdSink & to, const PathSet & paths, counter & bytesReceived)
|
FdSource & from, FdSink & to, const PathSet & paths, counter & bytesReceived)
|
||||||
{
|
{
|
||||||
writeInt(cmdExportPaths, to);
|
to << cmdExportPaths << 0 << paths;
|
||||||
writeInt(0, to); // == don't sign
|
|
||||||
writeStrings(paths, to);
|
|
||||||
to.flush();
|
to.flush();
|
||||||
store->importPaths(false, from);
|
store->importPaths(false, from);
|
||||||
|
|
||||||
|
@ -150,9 +145,9 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
||||||
FdSink to(child.to);
|
FdSink to(child.to);
|
||||||
|
|
||||||
/* Handshake. */
|
/* Handshake. */
|
||||||
|
bool sendDerivation = true;
|
||||||
try {
|
try {
|
||||||
writeInt(SERVE_MAGIC_1, to);
|
to << SERVE_MAGIC_1 << SERVE_PROTOCOL_VERSION;
|
||||||
writeInt(SERVE_PROTOCOL_VERSION, to);
|
|
||||||
to.flush();
|
to.flush();
|
||||||
|
|
||||||
unsigned int magic = readInt(from);
|
unsigned int magic = readInt(from);
|
||||||
|
@ -161,19 +156,33 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
||||||
unsigned int version = readInt(from);
|
unsigned int version = readInt(from);
|
||||||
if (GET_PROTOCOL_MAJOR(version) != 0x200)
|
if (GET_PROTOCOL_MAJOR(version) != 0x200)
|
||||||
throw Error(format("unsupported ‘nix-store --serve’ protocol version on ‘%1%’") % machine->sshName);
|
throw Error(format("unsupported ‘nix-store --serve’ protocol version on ‘%1%’") % machine->sshName);
|
||||||
|
if (GET_PROTOCOL_MINOR(version) >= 1)
|
||||||
|
sendDerivation = false;
|
||||||
} catch (EndOfFile & e) {
|
} catch (EndOfFile & e) {
|
||||||
child.pid.wait(true);
|
child.pid.wait(true);
|
||||||
string s = chomp(readFile(result.logFile));
|
string s = chomp(readFile(result.logFile));
|
||||||
throw Error(format("cannot connect to ‘%1%’: %2%") % machine->sshName % s);
|
throw Error(format("cannot connect to ‘%1%’: %2%") % machine->sshName % s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Gather the inputs. */
|
/* Gather the inputs. If the remote side is Nix <= 1.9, we have to
|
||||||
PathSet inputs({step->drvPath});
|
copy the entire closure of ‘drvPath’, as well the required
|
||||||
|
outputs of the input derivations. On Nix > 1.9, we only need to
|
||||||
|
copy the immediate sources of the derivation and the required
|
||||||
|
outputs of the input derivations. */
|
||||||
|
PathSet inputs;
|
||||||
|
|
||||||
|
if (sendDerivation)
|
||||||
|
inputs.insert(step->drvPath);
|
||||||
|
else
|
||||||
|
for (auto & p : step->drv.inputSrcs)
|
||||||
|
inputs.insert(p);
|
||||||
|
|
||||||
for (auto & input : step->drv.inputDrvs) {
|
for (auto & input : step->drv.inputDrvs) {
|
||||||
Derivation drv2 = readDerivation(input.first);
|
Derivation drv2 = readDerivation(input.first);
|
||||||
for (auto & name : input.second) {
|
for (auto & name : input.second) {
|
||||||
auto i = drv2.outputs.find(name);
|
auto i = drv2.outputs.find(name);
|
||||||
if (i != drv2.outputs.end()) inputs.insert(i->second.path);
|
if (i == drv2.outputs.end()) continue;
|
||||||
|
inputs.insert(i->second.path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,12 +200,14 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
||||||
|
|
||||||
/* Do the build. */
|
/* Do the build. */
|
||||||
printMsg(lvlDebug, format("building ‘%1%’ on ‘%2%’") % step->drvPath % machine->sshName);
|
printMsg(lvlDebug, format("building ‘%1%’ on ‘%2%’") % step->drvPath % machine->sshName);
|
||||||
writeInt(cmdBuildPaths, to);
|
|
||||||
writeStrings(PathSet({step->drvPath}), to);
|
if (sendDerivation)
|
||||||
writeInt(maxSilentTime, to);
|
to << cmdBuildPaths << PathSet({step->drvPath}) << maxSilentTime << buildTimeout;
|
||||||
writeInt(buildTimeout, to);
|
else
|
||||||
// FIXME: send maxLogSize.
|
to << cmdBuildDerivation << step->drvPath << step->drv << maxSilentTime << buildTimeout;
|
||||||
|
// FIXME: send maxLogSize.
|
||||||
to.flush();
|
to.flush();
|
||||||
|
|
||||||
result.startTime = time(0);
|
result.startTime = time(0);
|
||||||
int res;
|
int res;
|
||||||
{
|
{
|
||||||
|
@ -204,12 +215,27 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
||||||
res = readInt(from);
|
res = readInt(from);
|
||||||
}
|
}
|
||||||
result.stopTime = time(0);
|
result.stopTime = time(0);
|
||||||
if (res) {
|
|
||||||
result.errorMsg = (format("%1% on ‘%2%’") % readString(from) % machine->sshName).str();
|
if (sendDerivation) {
|
||||||
if (res == 100) result.status = RemoteResult::rrPermanentFailure;
|
if (res) {
|
||||||
else if (res == 101) result.status = RemoteResult::rrTimedOut;
|
result.errorMsg = (format("%1% on ‘%2%’") % readString(from) % machine->sshName).str();
|
||||||
else result.status = RemoteResult::rrMiscFailure;
|
if (res == 100) result.status = BuildResult::PermanentFailure;
|
||||||
return;
|
else if (res == 101) result.status = BuildResult::TimedOut;
|
||||||
|
else result.status = BuildResult::MiscFailure;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
result.status = BuildResult::Built;
|
||||||
|
} else {
|
||||||
|
result.status = (BuildResult::Status) res;
|
||||||
|
result.errorMsg = readString(from);
|
||||||
|
if (!result.success()) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the path was substituted or already valid, then we didn't
|
||||||
|
get a build log. */
|
||||||
|
if (result.status == BuildResult::Substituted || result.status == BuildResult::AlreadyValid) {
|
||||||
|
unlink(result.logFile.c_str());
|
||||||
|
result.logFile = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy the output paths. */
|
/* Copy the output paths. */
|
||||||
|
@ -226,5 +252,4 @@ void State::buildRemote(std::shared_ptr<StoreAPI> store,
|
||||||
child.to.close();
|
child.to.close();
|
||||||
child.pid.wait(true);
|
child.pid.wait(true);
|
||||||
|
|
||||||
result.status = RemoteResult::rrSuccess;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,9 +7,9 @@
|
||||||
using namespace nix;
|
using namespace nix;
|
||||||
|
|
||||||
|
|
||||||
BuildResult getBuildResult(std::shared_ptr<StoreAPI> store, const Derivation & drv)
|
BuildOutput getBuildOutput(std::shared_ptr<StoreAPI> store, const Derivation & drv)
|
||||||
{
|
{
|
||||||
BuildResult res;
|
BuildOutput res;
|
||||||
|
|
||||||
/* Compute the closure size. */
|
/* Compute the closure size. */
|
||||||
PathSet outputs;
|
PathSet outputs;
|
||||||
|
|
|
@ -15,7 +15,7 @@ struct BuildProduct
|
||||||
BuildProduct() { }
|
BuildProduct() { }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BuildResult
|
struct BuildOutput
|
||||||
{
|
{
|
||||||
/* Whether this build has failed with output, i.e., the build
|
/* Whether this build has failed with output, i.e., the build
|
||||||
finished with exit code 0 but produced a file
|
finished with exit code 0 but produced a file
|
||||||
|
@ -29,4 +29,4 @@ struct BuildResult
|
||||||
std::list<BuildProduct> products;
|
std::list<BuildProduct> products;
|
||||||
};
|
};
|
||||||
|
|
||||||
BuildResult getBuildResult(std::shared_ptr<nix::StoreAPI> store, const nix::Derivation & drv);
|
BuildOutput getBuildOutput(std::shared_ptr<nix::StoreAPI> store, const nix::Derivation & drv);
|
||||||
|
|
|
@ -314,7 +314,7 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
||||||
all valid. So we mark this as a finished, cached build. */
|
all valid. So we mark this as a finished, cached build. */
|
||||||
if (!step) {
|
if (!step) {
|
||||||
Derivation drv = readDerivation(build->drvPath);
|
Derivation drv = readDerivation(build->drvPath);
|
||||||
BuildResult res = getBuildResult(store, drv);
|
BuildOutput res = getBuildOutput(store, drv);
|
||||||
|
|
||||||
pqxx::work txn(conn);
|
pqxx::work txn(conn);
|
||||||
time_t now = time(0);
|
time_t now = time(0);
|
||||||
|
@ -822,7 +822,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
auto conn(dbPool.get());
|
auto conn(dbPool.get());
|
||||||
|
|
||||||
RemoteResult result;
|
RemoteResult result;
|
||||||
BuildResult res;
|
BuildOutput res;
|
||||||
int stepNr = 0;
|
int stepNr = 0;
|
||||||
|
|
||||||
time_t stepStartTime = result.startTime = time(0);
|
time_t stepStartTime = result.startTime = time(0);
|
||||||
|
@ -832,7 +832,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
bool cachedFailure = checkCachedFailure(step, *conn);
|
bool cachedFailure = checkCachedFailure(step, *conn);
|
||||||
|
|
||||||
if (cachedFailure)
|
if (cachedFailure)
|
||||||
result.status = RemoteResult::rrPermanentFailure;
|
result.status = BuildResult::CachedFailure;
|
||||||
else {
|
else {
|
||||||
|
|
||||||
/* Create a build step record indicating that we started
|
/* Create a build step record indicating that we started
|
||||||
|
@ -849,11 +849,11 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
/* FIXME: referring builds may have conflicting timeouts. */
|
/* FIXME: referring builds may have conflicting timeouts. */
|
||||||
buildRemote(store, machine, step, build->maxSilentTime, build->buildTimeout, result);
|
buildRemote(store, machine, step, build->maxSilentTime, build->buildTimeout, result);
|
||||||
} catch (Error & e) {
|
} catch (Error & e) {
|
||||||
result.status = RemoteResult::rrMiscFailure;
|
result.status = BuildResult::MiscFailure;
|
||||||
result.errorMsg = e.msg();
|
result.errorMsg = e.msg();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.status == RemoteResult::rrSuccess) res = getBuildResult(store, step->drv);
|
if (result.success()) res = getBuildOutput(store, step->drv);
|
||||||
}
|
}
|
||||||
|
|
||||||
time_t stepStopTime = time(0);
|
time_t stepStopTime = time(0);
|
||||||
|
@ -870,8 +870,8 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
|
|
||||||
/* The step had a hopefully temporary failure (e.g. network
|
/* The step had a hopefully temporary failure (e.g. network
|
||||||
issue). Retry a number of times. */
|
issue). Retry a number of times. */
|
||||||
if (result.status == RemoteResult::rrMiscFailure) {
|
if (result.canRetry()) {
|
||||||
printMsg(lvlError, format("irregular failure building ‘%1%’ on ‘%2%’: %3%")
|
printMsg(lvlError, format("possibly transient failure building ‘%1%’ on ‘%2%’: %3%")
|
||||||
% step->drvPath % machine->sshName % result.errorMsg);
|
% step->drvPath % machine->sshName % result.errorMsg);
|
||||||
bool retry;
|
bool retry;
|
||||||
{
|
{
|
||||||
|
@ -888,7 +888,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.status == RemoteResult::rrSuccess) {
|
if (result.success()) {
|
||||||
|
|
||||||
/* Register success in the database for all Build objects that
|
/* Register success in the database for all Build objects that
|
||||||
have this step as the top-level step. Since the queue
|
have this step as the top-level step. Since the queue
|
||||||
|
@ -932,7 +932,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssSuccess);
|
finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssSuccess);
|
||||||
|
|
||||||
for (auto & b : direct)
|
for (auto & b : direct)
|
||||||
markSucceededBuild(txn, b, res, build != b,
|
markSucceededBuild(txn, b, res, build != b || result.status != BuildResult::Built,
|
||||||
result.startTime, result.stopTime);
|
result.startTime, result.stopTime);
|
||||||
|
|
||||||
txn.commit();
|
txn.commit();
|
||||||
|
@ -1015,17 +1015,21 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(*conn);
|
||||||
|
|
||||||
BuildStatus buildStatus =
|
BuildStatus buildStatus =
|
||||||
result.status == RemoteResult::rrPermanentFailure ? bsFailed :
|
result.status == BuildResult::TimedOut ? bsTimedOut :
|
||||||
result.status == RemoteResult::rrTimedOut ? bsTimedOut :
|
result.canRetry() ? bsAborted :
|
||||||
bsAborted;
|
bsFailed;
|
||||||
BuildStepStatus buildStepStatus =
|
BuildStepStatus buildStepStatus =
|
||||||
result.status == RemoteResult::rrPermanentFailure ? bssFailed :
|
result.status == BuildResult::TimedOut ? bssTimedOut :
|
||||||
result.status == RemoteResult::rrTimedOut ? bssTimedOut :
|
result.canRetry() ? bssAborted :
|
||||||
bssAborted;
|
bssFailed;
|
||||||
|
|
||||||
/* For regular failures, we don't care about the error
|
/* For standard failures, we don't care about the error
|
||||||
message. */
|
message. */
|
||||||
if (buildStatus != bsAborted) result.errorMsg = "";
|
if (result.status == BuildResult::PermanentFailure ||
|
||||||
|
result.status == BuildResult::TransientFailure ||
|
||||||
|
result.status == BuildResult::CachedFailure ||
|
||||||
|
result.status == BuildResult::TimedOut)
|
||||||
|
result.errorMsg = "";
|
||||||
|
|
||||||
/* Create failed build steps for every build that depends
|
/* Create failed build steps for every build that depends
|
||||||
on this. For cached failures, only create a step for
|
on this. For cached failures, only create a step for
|
||||||
|
@ -1061,7 +1065,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
|
|
||||||
/* Remember failed paths in the database so that they
|
/* Remember failed paths in the database so that they
|
||||||
won't be built again. */
|
won't be built again. */
|
||||||
if (!cachedFailure && result.status == RemoteResult::rrPermanentFailure)
|
if (!cachedFailure && result.status == BuildResult::PermanentFailure)
|
||||||
for (auto & path : outputPaths(step->drv))
|
for (auto & path : outputPaths(step->drv))
|
||||||
txn.parameterized("insert into FailedPaths values ($1)")(path).exec();
|
txn.parameterized("insert into FailedPaths values ($1)")(path).exec();
|
||||||
|
|
||||||
|
@ -1103,7 +1107,7 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
|
|
||||||
|
|
||||||
void State::markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
void State::markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
||||||
const BuildResult & res, bool isCachedBuild, time_t startTime, time_t stopTime)
|
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime)
|
||||||
{
|
{
|
||||||
printMsg(lvlInfo, format("marking build %1% as succeeded") % build->id);
|
printMsg(lvlInfo, format("marking build %1% as succeeded") % build->id);
|
||||||
|
|
||||||
|
|
|
@ -43,22 +43,20 @@ typedef enum {
|
||||||
} BuildStepStatus;
|
} BuildStepStatus;
|
||||||
|
|
||||||
|
|
||||||
struct RemoteResult
|
struct RemoteResult : nix::BuildResult
|
||||||
{
|
{
|
||||||
enum {
|
|
||||||
rrSuccess = 0,
|
|
||||||
rrPermanentFailure = 1,
|
|
||||||
rrTimedOut = 2,
|
|
||||||
rrMiscFailure = 3
|
|
||||||
} status = rrMiscFailure;
|
|
||||||
std::string errorMsg;
|
|
||||||
time_t startTime = 0, stopTime = 0;
|
time_t startTime = 0, stopTime = 0;
|
||||||
nix::Path logFile;
|
nix::Path logFile;
|
||||||
|
|
||||||
|
bool canRetry()
|
||||||
|
{
|
||||||
|
return status == TransientFailure || status == MiscFailure;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct Step;
|
struct Step;
|
||||||
struct BuildResult;
|
struct BuildOutput;
|
||||||
|
|
||||||
|
|
||||||
struct Build
|
struct Build
|
||||||
|
@ -283,7 +281,7 @@ private:
|
||||||
RemoteResult & result);
|
RemoteResult & result);
|
||||||
|
|
||||||
void markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
void markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
||||||
const BuildResult & res, bool isCachedBuild, time_t startTime, time_t stopTime);
|
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime);
|
||||||
|
|
||||||
bool checkCachedFailure(Step::ptr step, Connection & conn);
|
bool checkCachedFailure(Step::ptr step, Connection & conn);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue