hydra-queue-runner: Implement timeouts
Also, keep track of timeouts in the database as a distinct build status.
This commit is contained in:
parent
2da4987bc2
commit
745efce828
5 changed files with 32 additions and 10 deletions
|
@ -109,7 +109,8 @@ static void copyClosureFrom(std::shared_ptr<StoreAPI> store,
|
|||
void buildRemote(std::shared_ptr<StoreAPI> store,
|
||||
const string & sshName, const string & sshKey,
|
||||
const Path & drvPath, const Derivation & drv,
|
||||
const nix::Path & logDir, RemoteResult & result)
|
||||
const nix::Path & logDir, unsigned int maxSilentTime, unsigned int buildTimeout,
|
||||
RemoteResult & result)
|
||||
{
|
||||
string base = baseNameOf(drvPath);
|
||||
Path logFile = logDir + "/" + string(base, 0, 2) + "/" + string(base, 2);
|
||||
|
@ -152,8 +153,9 @@ void buildRemote(std::shared_ptr<StoreAPI> store,
|
|||
printMsg(lvlDebug, format("building ‘%1%’ on ‘%2%’") % drvPath % sshName);
|
||||
writeInt(cmdBuildPaths, to);
|
||||
writeStrings(PathSet({drvPath}), to);
|
||||
writeInt(3600, to); // == maxSilentTime, FIXME
|
||||
writeInt(7200, to); // == buildTimeout, FIXME
|
||||
writeInt(maxSilentTime, to);
|
||||
writeInt(buildTimeout, to);
|
||||
// FIXME: send maxLogSize.
|
||||
to.flush();
|
||||
result.startTime = time(0);
|
||||
int res = readInt(from);
|
||||
|
|
|
@ -18,4 +18,5 @@ struct RemoteResult
|
|||
void buildRemote(std::shared_ptr<nix::StoreAPI> store,
|
||||
const std::string & sshName, const std::string & sshKey,
|
||||
const nix::Path & drvPath, const nix::Derivation & drv,
|
||||
const nix::Path & logDir, RemoteResult & result);
|
||||
const nix::Path & logDir, unsigned int maxSilentTime, unsigned int buildTimeout,
|
||||
RemoteResult & result);
|
||||
|
|
|
@ -43,6 +43,7 @@ typedef enum {
|
|||
bsDepFailed = 2,
|
||||
bsAborted = 3,
|
||||
bsFailedWithOutput = 6,
|
||||
bsTimedOut = 7,
|
||||
bsUnsupported = 9,
|
||||
} BuildStatus;
|
||||
|
||||
|
@ -51,6 +52,7 @@ typedef enum {
|
|||
bssSuccess = 0,
|
||||
bssFailed = 1,
|
||||
bssAborted = 4,
|
||||
bssTimedOut = 7,
|
||||
bssUnsupported = 9,
|
||||
bssBusy = 100, // not stored
|
||||
} BuildStepStatus;
|
||||
|
@ -77,6 +79,7 @@ struct Build
|
|||
Path drvPath;
|
||||
std::map<string, Path> outputs;
|
||||
std::string fullJobName;
|
||||
unsigned int maxSilentTime, buildTimeout;
|
||||
|
||||
std::shared_ptr<Step> toplevel;
|
||||
|
||||
|
@ -481,7 +484,7 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
|||
{
|
||||
pqxx::work txn(conn);
|
||||
|
||||
auto res = txn.parameterized("select id, project, jobset, job, drvPath from Builds where id > $1 and finished = 0 order by id")(lastBuildId).exec();
|
||||
auto res = txn.parameterized("select id, project, jobset, job, drvPath, maxsilent, timeout from Builds where id > $1 and finished = 0 order by id")(lastBuildId).exec();
|
||||
|
||||
for (auto const & row : res) {
|
||||
auto builds_(builds.lock());
|
||||
|
@ -493,6 +496,9 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
|||
build->id = id;
|
||||
build->drvPath = row["drvPath"].as<string>();
|
||||
build->fullJobName = row["project"].as<string>() + ":" + row["jobset"].as<string>() + ":" + row["job"].as<string>();
|
||||
build->maxSilentTime = row["maxsilent"].as<int>();
|
||||
build->buildTimeout = row["timeout"].as<int>();
|
||||
std::cerr << build->id << " " << build->buildTimeout << std::endl;
|
||||
|
||||
newBuilds.push_back(build);
|
||||
}
|
||||
|
@ -975,8 +981,8 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
|
||||
if (!build) build = *dependents.begin();
|
||||
|
||||
printMsg(lvlInfo, format("performing step ‘%1%’ on ‘%2%’ (needed by %3% builds)")
|
||||
% step->drvPath % machine->sshName % dependents.size());
|
||||
printMsg(lvlInfo, format("performing step ‘%1%’ on ‘%2%’ (needed by build %3% and %4% others)")
|
||||
% step->drvPath % machine->sshName % build->id % (dependents.size() - 1));
|
||||
}
|
||||
|
||||
auto conn(dbPool.get());
|
||||
|
@ -1005,7 +1011,9 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
}
|
||||
|
||||
try {
|
||||
buildRemote(store, machine->sshName, machine->sshKey, step->drvPath, step->drv, logDir, result);
|
||||
/* FIXME: referring builds may have conflicting timeouts. */
|
||||
buildRemote(store, machine->sshName, machine->sshKey, step->drvPath, step->drv,
|
||||
logDir, build->maxSilentTime, build->buildTimeout, result);
|
||||
} catch (Error & e) {
|
||||
result.status = RemoteResult::rrMiscFailure;
|
||||
result.errorMsg = e.msg();
|
||||
|
@ -1066,9 +1074,13 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
/* Failure case. */
|
||||
|
||||
BuildStatus buildStatus =
|
||||
result.status == RemoteResult::rrPermanentFailure ? bsFailed : bsAborted;
|
||||
result.status == RemoteResult::rrPermanentFailure ? bsFailed :
|
||||
result.status == RemoteResult::rrTimedOut ? bsTimedOut :
|
||||
bsAborted;
|
||||
BuildStepStatus buildStepStatus =
|
||||
result.status == RemoteResult::rrPermanentFailure ? bssFailed : bssAborted;
|
||||
result.status == RemoteResult::rrPermanentFailure ? bssFailed :
|
||||
result.status == RemoteResult::rrTimedOut ? bssTimedOut :
|
||||
bssAborted;
|
||||
|
||||
/* For regular failures, we don't care about the error
|
||||
message. */
|
||||
|
@ -1223,6 +1235,8 @@ void State::run()
|
|||
|
||||
auto queueMonitorThread = std::thread(&State::queueMonitor, this);
|
||||
|
||||
sleep(5);
|
||||
|
||||
std::thread(&State::dispatcher, this).detach();
|
||||
|
||||
queueMonitorThread.join();
|
||||
|
|
|
@ -204,6 +204,8 @@ BLOCK renderBuildStatusIcon;
|
|||
<img src="[% c.uri_for("/static/images/forbidden_${size}.png") %]" alt="Cancelled" class="build-status" />
|
||||
[% ELSIF buildstatus == 6 %]
|
||||
<img src="[% c.uri_for("/static/images/error_${size}.png") %]" alt="Failed (with result)" class="build-status" />
|
||||
[% ELSIF buildstatus == 7 %]
|
||||
<img src="[% c.uri_for("/static/images/warning_${size}.png") %]" alt="Timed out" class="build-status" />
|
||||
[% ELSE %]
|
||||
<img src="[% c.uri_for("/static/images/error_${size}.png") %]" alt="Failed" class="build-status" />
|
||||
[% END;
|
||||
|
@ -229,6 +231,8 @@ BLOCK renderStatus;
|
|||
<span class="error">Cancelled by user</span>
|
||||
[% ELSIF buildstatus == 6 %]
|
||||
<span class="error">Build failed (with result)</span>
|
||||
[% ELSIF buildstatus == 7 %]
|
||||
<span class="error">Timed out</span>
|
||||
[% ELSIF buildstatus == 9 %]
|
||||
<span class="error">Unsupported system type</span>
|
||||
[% ELSE %]
|
||||
|
|
|
@ -180,6 +180,7 @@ create table Builds (
|
|||
-- 4 = build cancelled (removed from queue; never built)
|
||||
-- 5 = build not done because a dependency failed previously (obsolete)
|
||||
-- 6 = failure with output
|
||||
-- 7 = timed out
|
||||
-- 9 = unsupported system type
|
||||
buildStatus integer,
|
||||
|
||||
|
|
Loading…
Reference in a new issue