forked from lix-project/hydra
Warn if PostgreSQL appears stalled
This commit is contained in:
parent
922dc541c2
commit
7cd08c7c46
|
@ -112,6 +112,7 @@ bool State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
|||
/* Create a build step record indicating that we started
|
||||
building. */
|
||||
{
|
||||
auto mc = startDbUpdate();
|
||||
pqxx::work txn(*conn);
|
||||
stepNr = createBuildStep(txn, result.startTime, build, step, machine->sshName, bssBusy);
|
||||
txn.commit();
|
||||
|
@ -165,6 +166,7 @@ bool State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
|||
retry = step_->tries + 1 < maxTries;
|
||||
}
|
||||
if (retry) {
|
||||
auto mc = startDbUpdate();
|
||||
pqxx::work txn(*conn);
|
||||
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead, build->id,
|
||||
stepNr, machine->sshName, bssAborted, result.errorMsg);
|
||||
|
@ -213,6 +215,8 @@ bool State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
|||
|
||||
/* Update the database. */
|
||||
{
|
||||
auto mc = startDbUpdate();
|
||||
|
||||
pqxx::work txn(*conn);
|
||||
|
||||
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead,
|
||||
|
@ -299,6 +303,8 @@ bool State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
|||
|
||||
/* Update the database. */
|
||||
{
|
||||
auto mc = startDbUpdate();
|
||||
|
||||
pqxx::work txn(*conn);
|
||||
|
||||
BuildStatus buildStatus =
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
|
||||
typedef std::atomic<unsigned long> counter;
|
||||
|
||||
|
@ -8,5 +9,9 @@ struct MaintainCount
|
|||
{
|
||||
counter & c;
|
||||
MaintainCount(counter & c) : c(c) { c++; }
|
||||
MaintainCount(counter & c, std::function<void(unsigned long)> warn) : c(c)
|
||||
{
|
||||
warn(++c);
|
||||
}
|
||||
~MaintainCount() { auto prev = c--; assert(prev); }
|
||||
};
|
||||
|
|
|
@ -45,6 +45,16 @@ State::State()
|
|||
}
|
||||
|
||||
|
||||
MaintainCount State::startDbUpdate()
|
||||
{
|
||||
return MaintainCount(nrActiveDbUpdates, [](unsigned long c) {
|
||||
if (c > 6) {
|
||||
printMsg(lvlError, format("warning: %d concurrent database updates; PostgreSQL may be stalled") % c);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
ref<Store> State::getLocalStore()
|
||||
{
|
||||
return ref<Store>(_localStore);
|
||||
|
@ -552,6 +562,7 @@ void State::dumpStatus(Connection & conn, bool log)
|
|||
root.attr("nrQueueWakeups", nrQueueWakeups);
|
||||
root.attr("nrDispatcherWakeups", nrDispatcherWakeups);
|
||||
root.attr("nrDbConnections", dbPool.count());
|
||||
root.attr("nrActiveDbUpdates", nrActiveDbUpdates);
|
||||
{
|
||||
root.attr("machines");
|
||||
JSONObject nested(out);
|
||||
|
@ -661,6 +672,7 @@ void State::dumpStatus(Connection & conn, bool log)
|
|||
if (log) printMsg(lvlInfo, format("status: %1%") % out.str());
|
||||
|
||||
{
|
||||
auto mc = startDbUpdate();
|
||||
pqxx::work txn(conn);
|
||||
// FIXME: use PostgreSQL 9.5 upsert.
|
||||
txn.exec("delete from SystemStatus where what = 'queue-runner'");
|
||||
|
|
|
@ -124,6 +124,7 @@ bool State::getQueuedBuilds(Connection & conn, ref<Store> localStore,
|
|||
/* Derivation has been GC'ed prematurely. */
|
||||
printMsg(lvlError, format("aborting GC'ed build %1%") % build->id);
|
||||
if (!build->finishedInDB) {
|
||||
auto mc = startDbUpdate();
|
||||
pqxx::work txn(conn);
|
||||
txn.parameterized
|
||||
("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $3, errorMsg = $4 where id = $1 and finished = 0")
|
||||
|
@ -161,10 +162,13 @@ bool State::getQueuedBuilds(Connection & conn, ref<Store> localStore,
|
|||
Derivation drv = readDerivation(build->drvPath);
|
||||
BuildOutput res = getBuildOutput(destStore, destStore->getFSAccessor(), drv);
|
||||
|
||||
{
|
||||
auto mc = startDbUpdate();
|
||||
pqxx::work txn(conn);
|
||||
time_t now = time(0);
|
||||
markSucceededBuild(txn, build, res, true, now, now);
|
||||
txn.commit();
|
||||
}
|
||||
|
||||
build->finishedInDB = true;
|
||||
|
||||
|
@ -178,6 +182,7 @@ bool State::getQueuedBuilds(Connection & conn, ref<Store> localStore,
|
|||
if (checkCachedFailure(r, conn)) {
|
||||
printMsg(lvlError, format("marking build %1% as cached failure") % build->id);
|
||||
if (!build->finishedInDB) {
|
||||
auto mc = startDbUpdate();
|
||||
pqxx::work txn(conn);
|
||||
|
||||
/* Find the previous build step record, first by
|
||||
|
@ -421,6 +426,7 @@ Step::ptr State::createStep(ref<Store> destStore,
|
|||
time_t stopTime = time(0);
|
||||
|
||||
{
|
||||
auto mc = startDbUpdate();
|
||||
pqxx::work txn(conn);
|
||||
createSubstitutionStep(txn, startTime, stopTime, build, drvPath, "out", i.second.path);
|
||||
txn.commit();
|
||||
|
|
|
@ -313,6 +313,7 @@ private:
|
|||
counter nrDispatcherWakeups{0};
|
||||
counter bytesSent{0};
|
||||
counter bytesReceived{0};
|
||||
counter nrActiveDbUpdates{0};
|
||||
|
||||
/* Log compressor work queue. */
|
||||
nix::Sync<std::queue<nix::Path>> logCompressorQueue;
|
||||
|
@ -359,6 +360,8 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
MaintainCount startDbUpdate();
|
||||
|
||||
/* Return a store object that can access derivations produced by
|
||||
hydra-evaluator. */
|
||||
nix::ref<nix::Store> getLocalStore();
|
||||
|
|
Loading…
Reference in a new issue