From 285754aff69202b02846132100d7b90276d621a0 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 13 Mar 2017 16:19:22 +0100 Subject: [PATCH] hydra-evaluator improvements * The "Jobset" page now shows when evaluations are in progress (rather than just pending). * Restored the ability to do a single evaluation from the command line by doing "hydra-evaluator ". * Fix some consistency issues between jobset status in PostgreSQL and in hydra-evaluator. In particular, "lastCheckedTime" was never updated internally. --- hydra-module.nix | 1 + src/hydra-evaluator/hydra-evaluator.cc | 142 +++++++++++++++++++------ src/lib/Hydra/Controller/Jobset.pm | 2 +- src/lib/Hydra/Schema/Jobsets.pm | 11 +- src/root/jobset.tt | 9 +- src/script/hydra-eval-jobset | 18 ---- src/sql/hydra.sql | 1 + src/sql/upgrade-53.sql | 1 + 8 files changed, 127 insertions(+), 58 deletions(-) create mode 100644 src/sql/upgrade-53.sql diff --git a/hydra-module.nix b/hydra-module.nix index 276cb2ae..2aa8f28f 100644 --- a/hydra-module.nix +++ b/hydra-module.nix @@ -345,6 +345,7 @@ in environment = env; serviceConfig = { ExecStart = "@${cfg.package}/bin/hydra-evaluator hydra-evaluator"; + ExecStopPost = "${cfg.package}/bin/hydra-evaluator --unlock"; User = "hydra"; Restart = "always"; WorkingDirectory = baseDir; diff --git a/src/hydra-evaluator/hydra-evaluator.cc b/src/hydra-evaluator/hydra-evaluator.cc index 7184f365..005fad9d 100644 --- a/src/hydra-evaluator/hydra-evaluator.cc +++ b/src/hydra-evaluator/hydra-evaluator.cc @@ -5,18 +5,19 @@ #include #include #include +#include #include #include using namespace nix; +typedef std::pair JobsetName; + struct Evaluator { nix::Pool dbPool; - typedef std::pair JobsetName; - struct Jobset { JobsetName name; @@ -27,6 +28,8 @@ struct Evaluator typedef std::map Jobsets; + std::experimental::optional evalOne; + size_t maxEvals = 4; struct State @@ -59,6 +62,8 @@ struct Evaluator for (auto const & row : res) { auto name = JobsetName{row["project"].as(), row["name"].as()}; + if (evalOne && name != *evalOne) continue; + auto res = state->jobsets.try_emplace(name, Jobset{name}); auto & jobset = res.first->second; @@ -69,6 +74,11 @@ struct Evaluator seen.insert(name); } + if (evalOne && seen.empty()) { + printError("the specified jobset does not exist"); + std::_Exit(1); + } + for (auto i = state->jobsets.begin(); i != state->jobsets.end(); ) if (seen.count(i->first)) ++i; @@ -80,7 +90,23 @@ struct Evaluator void startEval(State & state, Jobset & jobset) { - printInfo("starting evaluation of jobset ‘%s:%s’", jobset.name.first, jobset.name.second); + time_t now = time(0); + + printInfo("starting evaluation of jobset ‘%s:%s’ (last checked %d s ago)", + jobset.name.first, jobset.name.second, + now - jobset.lastCheckedTime); + + { + auto conn(dbPool.get()); + pqxx::work txn(*conn); + txn.parameterized + ("update Jobsets set startTime = $1 where project = $2 and name = $3") + (now) + (jobset.name.first) + (jobset.name.second) + .exec(); + txn.commit(); + } assert(jobset.pid == -1); @@ -93,23 +119,6 @@ struct Evaluator state.runningEvals++; childStarted.notify_one(); - - time_t now = time(0); - - { - auto conn(dbPool.get()); - pqxx::work txn(*conn); - txn.parameterized - ("update Jobsets set lastCheckedTime = $1, triggerTime = null where project = $2 and name = $3") - (now) - (jobset.name.first) - (jobset.name.second) - .exec(); - txn.commit(); - - jobset.lastCheckedTime = now; - jobset.triggerTime = notTriggered; - } } void startEvals(State & state) @@ -121,9 +130,10 @@ struct Evaluator /* Filter out jobsets that have been evaluated recently and have not been triggered. */ for (auto i = state.jobsets.begin(); i != state.jobsets.end(); ++i) - if (i->second.pid == -1 && - (i->second.triggerTime != std::numeric_limits::max() || - (i->second.checkInterval > 0 && i->second.lastCheckedTime + i->second.checkInterval <= now))) + if (evalOne || + (i->second.pid == -1 && + (i->second.triggerTime != std::numeric_limits::max() || + (i->second.checkInterval > 0 && i->second.lastCheckedTime + i->second.checkInterval <= now)))) sorted.push_back(i); /* Put jobsets in order of ascending trigger time, last checked @@ -226,40 +236,83 @@ struct Evaluator auto state(state_.lock()); assert(state->runningEvals); state->runningEvals--; - for (auto & jobset : state->jobsets) - if (jobset.second.pid == pid) { + + // FIXME: should use a map. + for (auto & i : state->jobsets) { + auto & jobset(i.second); + + if (jobset.pid == pid) { printInfo("evaluation of jobset ‘%s:%s’ %s", - jobset.first.first, jobset.first.second, statusToString(status)); + jobset.name.first, jobset.name.second, statusToString(status)); + + auto now = time(0); + + jobset.triggerTime = notTriggered; + jobset.lastCheckedTime = now; try { + auto conn(dbPool.get()); + pqxx::work txn(*conn); + + /* Clear the trigger time to prevent this + jobset from getting stuck in an endless + failing eval loop. */ + txn.parameterized + ("update Jobsets set triggerTime = null where project = $1 and name = $2 and startTime is not null and triggerTime < startTime") + (jobset.name.first) + (jobset.name.second) + .exec(); + + /* Clear the start time. */ + txn.parameterized + ("update Jobsets set startTime = null where project = $1 and name = $2") + (jobset.name.first) + (jobset.name.second) + .exec(); + if (!WIFEXITED(status) || WEXITSTATUS(status) > 1) { - auto conn(dbPool.get()); - pqxx::work txn(*conn); txn.parameterized - ("update Jobsets set errorMsg = $1, errorTime = $2") + ("update Jobsets set errorMsg = $1, lastCheckedTime = $2, errorTime = $2, fetchErrorMsg = null where project = $3 and name = $4") (fmt("evaluation %s", statusToString(status))) - (time(0)) + (now) + (jobset.name.first) + (jobset.name.second) .exec(); - txn.commit(); } + txn.commit(); + } catch (std::exception & e) { printError("exception setting jobset error: %s", e.what()); } - jobset.second.pid.release(); + jobset.pid.release(); maybeDoWork.notify_one(); + + if (evalOne) std::_Exit(0); + break; } + } } } } + void unlock() + { + auto conn(dbPool.get()); + pqxx::work txn(*conn); + txn.parameterized("update Jobsets set startTime = null").exec(); + txn.commit(); + } + void run() { + unlock(); + /* Can't be bothered to shut down cleanly. Goodbye! */ - auto callback = createInterruptCallback([&]() { std::_Exit(0); }); + auto callback = createInterruptCallback([&]() { std::_Exit(1); }); std::thread reaperThread([&]() { reaper(); }); @@ -285,10 +338,29 @@ int main(int argc, char * * argv) signal(SIGTERM, SIG_DFL); signal(SIGHUP, SIG_DFL); + bool unlock = false; + + Evaluator evaluator; + + std::vector args; + parseCmdLine(argc, argv, [&](Strings::iterator & arg, const Strings::iterator & end) { - return false; + if (*arg == "--unlock") + unlock = true; + else if (hasPrefix(*arg, "-")) + return false; + args.push_back(*arg); + return true; }); - Evaluator().run(); + if (!args.empty()) { + if (args.size() != 2) throw UsageError("Syntax: hydra-evaluator [ ]"); + evaluator.evalOne = JobsetName(args[0], args[1]); + } + + if (unlock) + evaluator.unlock(); + else + evaluator.run(); }); } diff --git a/src/lib/Hydra/Controller/Jobset.pm b/src/lib/Hydra/Controller/Jobset.pm index afe72ffd..3087f477 100644 --- a/src/lib/Hydra/Controller/Jobset.pm +++ b/src/lib/Hydra/Controller/Jobset.pm @@ -43,7 +43,7 @@ sub jobset_GET { $c->stash->{evals} = getEvals($self, $c, scalar $c->stash->{jobset}->jobsetevals, 0, 10); - $c->stash->{latestEval} = $c->stash->{jobset}->jobsetevals->search({}, { rows => 1, order_by => ["id desc"] })->single; + $c->stash->{latestEval} = $c->stash->{jobset}->jobsetevals->search({ hasnewbuilds => 1 }, { rows => 1, order_by => ["id desc"] })->single; $c->stash->{totalShares} = getTotalShares($c->model('DB')->schema); diff --git a/src/lib/Hydra/Schema/Jobsets.pm b/src/lib/Hydra/Schema/Jobsets.pm index 965ffcfc..17b4ab93 100644 --- a/src/lib/Hydra/Schema/Jobsets.pm +++ b/src/lib/Hydra/Schema/Jobsets.pm @@ -134,6 +134,11 @@ __PACKAGE__->table("Jobsets"); data_type: 'boolean' is_nullable: 1 +=head2 starttime + + data_type: 'integer' + is_nullable: 1 + =cut __PACKAGE__->add_columns( @@ -173,6 +178,8 @@ __PACKAGE__->add_columns( { data_type => "text", is_nullable => 1 }, "forceeval", { data_type => "boolean", is_nullable => 1 }, + "starttime", + { data_type => "integer", is_nullable => 1 }, ); =head1 PRIMARY KEY @@ -345,8 +352,8 @@ __PACKAGE__->has_many( ); -# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-10-24 20:12:51 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:PSR66NnVRNTMFhDEm10erA +# Created by DBIx::Class::Schema::Loader v0.07045 @ 2017-03-09 13:03:05 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:ivYvsUyhEeaeI4EmRQ0/QQ my %hint = ( columns => [ diff --git a/src/root/jobset.tt b/src/root/jobset.tt index 283d1a25..2cf92a35 100644 --- a/src/root/jobset.tt +++ b/src/root/jobset.tt @@ -94,10 +94,15 @@ [% END %] - [% IF jobset.triggertime %] + [% IF jobset.starttime %] + + Evaluation running since: + [% INCLUDE renderRelativeDate timestamp = jobset.starttime %] + + [% ELSIF jobset.triggertime %] Evaluation pending since: - [% INCLUDE renderDateTime timestamp = jobset.triggertime %] + [% INCLUDE renderRelativeDate timestamp = jobset.triggertime %] [% END %] diff --git a/src/script/hydra-eval-jobset b/src/script/hydra-eval-jobset index 469e75c6..1ac9b06b 100755 --- a/src/script/hydra-eval-jobset +++ b/src/script/hydra-eval-jobset @@ -749,13 +749,6 @@ sub checkJobsetWrapped { sub checkJobset { my ($jobset) = @_; - print STDERR "considering jobset ", $jobset->project->name, ":", $jobset->name, - $jobset->lastcheckedtime - ? " (last checked " . (time() - $jobset->lastcheckedtime) . "s ago)\n" - : " (never checked)\n"; - - my $triggerTime = $jobset->triggertime; - my $startTime = clock_gettime(CLOCK_MONOTONIC); eval { @@ -776,17 +769,6 @@ sub checkJobset { $failed = 1; } - if (defined $triggerTime) { - txn_do($db, sub { - # Only clear the trigger time if the jobset hasn't been - # triggered in the meantime. In that case, we need to - # evaluate again. - my $new = $jobset->get_from_storage(); - $jobset->update({ triggertime => undef }) - if $new->triggertime == $triggerTime; - }) if !$dryRun; - } - return $failed; } diff --git a/src/sql/hydra.sql b/src/sql/hydra.sql index 7d73eb4c..7fd37f00 100644 --- a/src/sql/hydra.sql +++ b/src/sql/hydra.sql @@ -69,6 +69,7 @@ create table Jobsets ( schedulingShares integer not null default 100, fetchErrorMsg text, forceEval boolean, + startTime integer, -- if jobset is currently running check (schedulingShares > 0), primary key (project, name), foreign key (project) references Projects(name) on delete cascade on update cascade diff --git a/src/sql/upgrade-53.sql b/src/sql/upgrade-53.sql new file mode 100644 index 00000000..5b9b70b8 --- /dev/null +++ b/src/sql/upgrade-53.sql @@ -0,0 +1 @@ +alter table Jobsets add column startTime integer;