hydra-queue-runner: Implement timeouts
Also, keep track of timeouts in the database as a distinct build status.
This commit is contained in:
parent
2da4987bc2
commit
745efce828
5 changed files with 32 additions and 10 deletions
|
@ -109,7 +109,8 @@ static void copyClosureFrom(std::shared_ptr<StoreAPI> store,
|
||||||
void buildRemote(std::shared_ptr<StoreAPI> store,
|
void buildRemote(std::shared_ptr<StoreAPI> store,
|
||||||
const string & sshName, const string & sshKey,
|
const string & sshName, const string & sshKey,
|
||||||
const Path & drvPath, const Derivation & drv,
|
const Path & drvPath, const Derivation & drv,
|
||||||
const nix::Path & logDir, RemoteResult & result)
|
const nix::Path & logDir, unsigned int maxSilentTime, unsigned int buildTimeout,
|
||||||
|
RemoteResult & result)
|
||||||
{
|
{
|
||||||
string base = baseNameOf(drvPath);
|
string base = baseNameOf(drvPath);
|
||||||
Path logFile = logDir + "/" + string(base, 0, 2) + "/" + string(base, 2);
|
Path logFile = logDir + "/" + string(base, 0, 2) + "/" + string(base, 2);
|
||||||
|
@ -152,8 +153,9 @@ void buildRemote(std::shared_ptr<StoreAPI> store,
|
||||||
printMsg(lvlDebug, format("building ‘%1%’ on ‘%2%’") % drvPath % sshName);
|
printMsg(lvlDebug, format("building ‘%1%’ on ‘%2%’") % drvPath % sshName);
|
||||||
writeInt(cmdBuildPaths, to);
|
writeInt(cmdBuildPaths, to);
|
||||||
writeStrings(PathSet({drvPath}), to);
|
writeStrings(PathSet({drvPath}), to);
|
||||||
writeInt(3600, to); // == maxSilentTime, FIXME
|
writeInt(maxSilentTime, to);
|
||||||
writeInt(7200, to); // == buildTimeout, FIXME
|
writeInt(buildTimeout, to);
|
||||||
|
// FIXME: send maxLogSize.
|
||||||
to.flush();
|
to.flush();
|
||||||
result.startTime = time(0);
|
result.startTime = time(0);
|
||||||
int res = readInt(from);
|
int res = readInt(from);
|
||||||
|
|
|
@ -18,4 +18,5 @@ struct RemoteResult
|
||||||
void buildRemote(std::shared_ptr<nix::StoreAPI> store,
|
void buildRemote(std::shared_ptr<nix::StoreAPI> store,
|
||||||
const std::string & sshName, const std::string & sshKey,
|
const std::string & sshName, const std::string & sshKey,
|
||||||
const nix::Path & drvPath, const nix::Derivation & drv,
|
const nix::Path & drvPath, const nix::Derivation & drv,
|
||||||
const nix::Path & logDir, RemoteResult & result);
|
const nix::Path & logDir, unsigned int maxSilentTime, unsigned int buildTimeout,
|
||||||
|
RemoteResult & result);
|
||||||
|
|
|
@ -43,6 +43,7 @@ typedef enum {
|
||||||
bsDepFailed = 2,
|
bsDepFailed = 2,
|
||||||
bsAborted = 3,
|
bsAborted = 3,
|
||||||
bsFailedWithOutput = 6,
|
bsFailedWithOutput = 6,
|
||||||
|
bsTimedOut = 7,
|
||||||
bsUnsupported = 9,
|
bsUnsupported = 9,
|
||||||
} BuildStatus;
|
} BuildStatus;
|
||||||
|
|
||||||
|
@ -51,6 +52,7 @@ typedef enum {
|
||||||
bssSuccess = 0,
|
bssSuccess = 0,
|
||||||
bssFailed = 1,
|
bssFailed = 1,
|
||||||
bssAborted = 4,
|
bssAborted = 4,
|
||||||
|
bssTimedOut = 7,
|
||||||
bssUnsupported = 9,
|
bssUnsupported = 9,
|
||||||
bssBusy = 100, // not stored
|
bssBusy = 100, // not stored
|
||||||
} BuildStepStatus;
|
} BuildStepStatus;
|
||||||
|
@ -77,6 +79,7 @@ struct Build
|
||||||
Path drvPath;
|
Path drvPath;
|
||||||
std::map<string, Path> outputs;
|
std::map<string, Path> outputs;
|
||||||
std::string fullJobName;
|
std::string fullJobName;
|
||||||
|
unsigned int maxSilentTime, buildTimeout;
|
||||||
|
|
||||||
std::shared_ptr<Step> toplevel;
|
std::shared_ptr<Step> toplevel;
|
||||||
|
|
||||||
|
@ -481,7 +484,7 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
||||||
{
|
{
|
||||||
pqxx::work txn(conn);
|
pqxx::work txn(conn);
|
||||||
|
|
||||||
auto res = txn.parameterized("select id, project, jobset, job, drvPath from Builds where id > $1 and finished = 0 order by id")(lastBuildId).exec();
|
auto res = txn.parameterized("select id, project, jobset, job, drvPath, maxsilent, timeout from Builds where id > $1 and finished = 0 order by id")(lastBuildId).exec();
|
||||||
|
|
||||||
for (auto const & row : res) {
|
for (auto const & row : res) {
|
||||||
auto builds_(builds.lock());
|
auto builds_(builds.lock());
|
||||||
|
@ -493,6 +496,9 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
||||||
build->id = id;
|
build->id = id;
|
||||||
build->drvPath = row["drvPath"].as<string>();
|
build->drvPath = row["drvPath"].as<string>();
|
||||||
build->fullJobName = row["project"].as<string>() + ":" + row["jobset"].as<string>() + ":" + row["job"].as<string>();
|
build->fullJobName = row["project"].as<string>() + ":" + row["jobset"].as<string>() + ":" + row["job"].as<string>();
|
||||||
|
build->maxSilentTime = row["maxsilent"].as<int>();
|
||||||
|
build->buildTimeout = row["timeout"].as<int>();
|
||||||
|
std::cerr << build->id << " " << build->buildTimeout << std::endl;
|
||||||
|
|
||||||
newBuilds.push_back(build);
|
newBuilds.push_back(build);
|
||||||
}
|
}
|
||||||
|
@ -975,8 +981,8 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
|
|
||||||
if (!build) build = *dependents.begin();
|
if (!build) build = *dependents.begin();
|
||||||
|
|
||||||
printMsg(lvlInfo, format("performing step ‘%1%’ on ‘%2%’ (needed by %3% builds)")
|
printMsg(lvlInfo, format("performing step ‘%1%’ on ‘%2%’ (needed by build %3% and %4% others)")
|
||||||
% step->drvPath % machine->sshName % dependents.size());
|
% step->drvPath % machine->sshName % build->id % (dependents.size() - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto conn(dbPool.get());
|
auto conn(dbPool.get());
|
||||||
|
@ -1005,7 +1011,9 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
buildRemote(store, machine->sshName, machine->sshKey, step->drvPath, step->drv, logDir, result);
|
/* FIXME: referring builds may have conflicting timeouts. */
|
||||||
|
buildRemote(store, machine->sshName, machine->sshKey, step->drvPath, step->drv,
|
||||||
|
logDir, build->maxSilentTime, build->buildTimeout, result);
|
||||||
} catch (Error & e) {
|
} catch (Error & e) {
|
||||||
result.status = RemoteResult::rrMiscFailure;
|
result.status = RemoteResult::rrMiscFailure;
|
||||||
result.errorMsg = e.msg();
|
result.errorMsg = e.msg();
|
||||||
|
@ -1066,9 +1074,13 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
/* Failure case. */
|
/* Failure case. */
|
||||||
|
|
||||||
BuildStatus buildStatus =
|
BuildStatus buildStatus =
|
||||||
result.status == RemoteResult::rrPermanentFailure ? bsFailed : bsAborted;
|
result.status == RemoteResult::rrPermanentFailure ? bsFailed :
|
||||||
|
result.status == RemoteResult::rrTimedOut ? bsTimedOut :
|
||||||
|
bsAborted;
|
||||||
BuildStepStatus buildStepStatus =
|
BuildStepStatus buildStepStatus =
|
||||||
result.status == RemoteResult::rrPermanentFailure ? bssFailed : bssAborted;
|
result.status == RemoteResult::rrPermanentFailure ? bssFailed :
|
||||||
|
result.status == RemoteResult::rrTimedOut ? bssTimedOut :
|
||||||
|
bssAborted;
|
||||||
|
|
||||||
/* For regular failures, we don't care about the error
|
/* For regular failures, we don't care about the error
|
||||||
message. */
|
message. */
|
||||||
|
@ -1223,6 +1235,8 @@ void State::run()
|
||||||
|
|
||||||
auto queueMonitorThread = std::thread(&State::queueMonitor, this);
|
auto queueMonitorThread = std::thread(&State::queueMonitor, this);
|
||||||
|
|
||||||
|
sleep(5);
|
||||||
|
|
||||||
std::thread(&State::dispatcher, this).detach();
|
std::thread(&State::dispatcher, this).detach();
|
||||||
|
|
||||||
queueMonitorThread.join();
|
queueMonitorThread.join();
|
||||||
|
|
|
@ -204,6 +204,8 @@ BLOCK renderBuildStatusIcon;
|
||||||
<img src="[% c.uri_for("/static/images/forbidden_${size}.png") %]" alt="Cancelled" class="build-status" />
|
<img src="[% c.uri_for("/static/images/forbidden_${size}.png") %]" alt="Cancelled" class="build-status" />
|
||||||
[% ELSIF buildstatus == 6 %]
|
[% ELSIF buildstatus == 6 %]
|
||||||
<img src="[% c.uri_for("/static/images/error_${size}.png") %]" alt="Failed (with result)" class="build-status" />
|
<img src="[% c.uri_for("/static/images/error_${size}.png") %]" alt="Failed (with result)" class="build-status" />
|
||||||
|
[% ELSIF buildstatus == 7 %]
|
||||||
|
<img src="[% c.uri_for("/static/images/warning_${size}.png") %]" alt="Timed out" class="build-status" />
|
||||||
[% ELSE %]
|
[% ELSE %]
|
||||||
<img src="[% c.uri_for("/static/images/error_${size}.png") %]" alt="Failed" class="build-status" />
|
<img src="[% c.uri_for("/static/images/error_${size}.png") %]" alt="Failed" class="build-status" />
|
||||||
[% END;
|
[% END;
|
||||||
|
@ -229,6 +231,8 @@ BLOCK renderStatus;
|
||||||
<span class="error">Cancelled by user</span>
|
<span class="error">Cancelled by user</span>
|
||||||
[% ELSIF buildstatus == 6 %]
|
[% ELSIF buildstatus == 6 %]
|
||||||
<span class="error">Build failed (with result)</span>
|
<span class="error">Build failed (with result)</span>
|
||||||
|
[% ELSIF buildstatus == 7 %]
|
||||||
|
<span class="error">Timed out</span>
|
||||||
[% ELSIF buildstatus == 9 %]
|
[% ELSIF buildstatus == 9 %]
|
||||||
<span class="error">Unsupported system type</span>
|
<span class="error">Unsupported system type</span>
|
||||||
[% ELSE %]
|
[% ELSE %]
|
||||||
|
|
|
@ -180,6 +180,7 @@ create table Builds (
|
||||||
-- 4 = build cancelled (removed from queue; never built)
|
-- 4 = build cancelled (removed from queue; never built)
|
||||||
-- 5 = build not done because a dependency failed previously (obsolete)
|
-- 5 = build not done because a dependency failed previously (obsolete)
|
||||||
-- 6 = failure with output
|
-- 6 = failure with output
|
||||||
|
-- 7 = timed out
|
||||||
-- 9 = unsupported system type
|
-- 9 = unsupported system type
|
||||||
buildStatus integer,
|
buildStatus integer,
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue