From 7c976d2aecba43cd5086345ffa8013f652b2678d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 26 Jul 2017 15:17:51 +0200 Subject: [PATCH] hydra-queue-runner: Make build notification more reliable Previously, when hydra-queue-runner was restarted, any pending "build finished" notifications were lost. Now hydra-queue-runner marks finished but unnotified builds in the database and uses that to run pending notifications at startup. --- src/hydra-queue-runner/builder.cc | 2 +- src/hydra-queue-runner/hydra-queue-runner.cc | 25 +++++++++++++++++++- src/hydra-queue-runner/queue-monitor.cc | 3 ++- src/sql/hydra.sql | 10 +++++--- src/sql/upgrade-55.sql | 3 +++ 5 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 src/sql/upgrade-55.sql diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index 8159075b..983ebfec 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -418,7 +418,7 @@ State::StepResult State::doBuildStep(nix::ref destStore, if (build2->finishedInDB) continue; printMsg(lvlError, format("marking build %1% as failed") % build2->id); txn.parameterized - ("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5 where id = $1 and finished = 0") + ("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5, notificationPendingSince = $4 where id = $1 and finished = 0") (build2->id) ((int) (build2->drvPath != step->drvPath && result.buildStatus() == bsFailed ? bsDepFailed : result.buildStatus())) (result.startTime) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 2312eef9..b8c50a43 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -414,7 +414,7 @@ void State::markSucceededBuild(pqxx::work & txn, Build::ptr build, if (txn.parameterized("select 1 from Builds where id = $1 and finished = 0")(build->id).exec().empty()) return; txn.parameterized - ("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, size = $5, closureSize = $6, releaseName = $7, isCachedBuild = $8 where id = $1") + ("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, size = $5, closureSize = $6, releaseName = $7, isCachedBuild = $8, notificationPendingSince = $4 where id = $1") (build->id) ((int) (res.failed ? bsFailedWithOutput : bsSuccess)) (startTime) @@ -519,6 +519,16 @@ void State::notificationSender() auto now2 = std::chrono::steady_clock::now(); + if (item.type == NotificationItem::Type::BuildFinished) { + auto conn(dbPool.get()); + pqxx::work txn(*conn); + txn.parameterized + ("update Builds set notificationPendingSince = null where id = $1") + (item.id) + .exec(); + txn.commit(); + } + nrNotificationTimeMs += std::chrono::duration_cast(now2 - now1).count(); nrNotificationsDone++; @@ -837,6 +847,19 @@ void State::run(BuildID buildOne) for (uint64_t i = 0; i < maxConcurrentNotifications; ++i) std::thread(&State::notificationSender, this).detach(); + /* Enqueue notification items for builds that were finished + previously, but for which we didn't manage to send + notifications. */ + { + auto conn(dbPool.get()); + pqxx::work txn(*conn); + auto res = txn.parameterized("select id from Builds where notificationPendingSince > 0").exec(); + for (auto const & row : res) { + auto id = row["id"].as(); + enqueueNotificationItem({NotificationItem::Type::BuildFinished, id}); + } + } + /* Periodically clean up orphaned busy steps in the database. */ std::thread([&]() { while (true) { diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index 21981aa9..895363bc 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -188,7 +188,8 @@ bool State::getQueuedBuilds(Connection & conn, createBuildStep(txn, 0, build->id, ex.step, "", bsCachedFailure, "", propagatedFrom); txn.parameterized - ("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $3, isCachedBuild = 1 where id = $1 and finished = 0") + ("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $3, isCachedBuild = 1, notificationPendingSince = $3 " + "where id = $1 and finished = 0") (build->id) ((int) (ex.step->drvPath == build->drvPath ? bsFailed : bsDepFailed)) (time(0)).exec(); diff --git a/src/sql/hydra.sql b/src/sql/hydra.sql index 197a7dfb..d1c3ee37 100644 --- a/src/sql/hydra.sql +++ b/src/sql/hydra.sql @@ -222,12 +222,14 @@ create table Builds ( keep integer not null default 0, -- true means never garbage-collect the build output + notificationPendingSince integer, + check (finished = 0 or (stoptime is not null and stoptime != 0)), check (finished = 0 or (starttime is not null and starttime != 0)), - foreign key (project) references Projects(name) on update cascade, - foreign key (project, jobset) references Jobsets(project, name) on update cascade, - foreign key (project, jobset, job) references Jobs(project, jobset, name) on update cascade + foreign key (project) references Projects(name) on update cascade, + foreign key (project, jobset) references Jobsets(project, name) on update cascade, + foreign key (project, jobset, job) references Jobs(project, jobset, name) on update cascade ); @@ -677,3 +679,5 @@ create index IndexBuildsOnKeep on Builds(keep) where keep = 1; -- To get the most recent eval for a jobset. create index IndexJobsetEvalsOnJobsetId on JobsetEvals(project, jobset, id desc) where hasNewBuilds = 1; + +create index IndexBuildsOnNotificationPendingSince on Builds(notificationPendingSince) where notificationPendingSince is not null; diff --git a/src/sql/upgrade-55.sql b/src/sql/upgrade-55.sql new file mode 100644 index 00000000..cd8de9e8 --- /dev/null +++ b/src/sql/upgrade-55.sql @@ -0,0 +1,3 @@ +alter table Builds add column notificationPendingSince integer; + +create index IndexBuildsOnNotificationPendingSince on Builds(notificationPendingSince) where notificationPendingSince is not null;