Output evaluation errors without crashing if aggregate job is broken.

At the moment, aggregate jobs can easily break and cause the entire
evaluation to fail, which is not ideal. For Nixpkgs, we do have some
important aggregate jobs (like `tested`), but for debugging and building
purposes it's still useful to get a partial result even if the channel
won't actually advance.

This commit changes the behaviour of hydra-eval-jobs such that it
aggregates any errors found during the construction of an aggregate, and
will instead annotate the job with the evaluation failure such that it
shows up in a "cleaner" way.

There are really two types of failure that we care about: one is where
the attribute just ends up missing altogether in the final output, and
also where the attribute is in the output but fails to evaluate. Both
are handled here.

Note that this does mean that the same error message may be output
multiple times, but this aids debuggability because it'll be much
clearer what's blocking the job from being created.
This commit is contained in:
Luke Granger-Brown 2021-09-22 20:54:58 +00:00
parent f2b51a017b
commit 67ebce8493
3 changed files with 97 additions and 18 deletions

View file

@ -1,6 +1,7 @@
#include <map>
#include <iostream>
#include <thread>
#include <optional>
#include <unordered_map>
#include "shared.hh"
#include "store-api.hh"
@ -442,11 +443,30 @@ int main(int argc, char * * argv)
auto named = job.find("namedConstituents");
if (named == job.end()) continue;
std::unordered_map<std::string, std::string> brokenJobs;
auto getNonBrokenJobOrRecordError = [&brokenJobs, &jobName, &state](
const std::string & childJobName) -> std::optional<nlohmann::json> {
auto childJob = state->jobs.find(childJobName);
if (childJob == state->jobs.end()) {
printError("aggregate job '%s' references non-existent job '%s'", jobName, childJobName);
brokenJobs[childJobName] = "does not exist";
return std::nullopt;
}
if (childJob->find("error") != childJob->end()) {
std::string error = (*childJob)["error"];
printError("aggregate job '%s' references broken job '%s': %s", jobName, childJobName, error);
brokenJobs[childJobName] = error;
return std::nullopt;
}
return *childJob;
};
if (myArgs.dryRun) {
for (std::string jobName2 : *named) {
auto job2 = state->jobs.find(jobName2);
if (job2 == state->jobs.end())
throw Error("aggregate job '%s' references non-existent job '%s'", jobName, jobName2);
auto job2 = getNonBrokenJobOrRecordError(jobName2);
if (!job2) {
continue;
}
std::string drvPath2 = (*job2)["drvPath"];
job["constituents"].push_back(drvPath2);
}
@ -455,15 +475,17 @@ int main(int argc, char * * argv)
auto drv = store->readDerivation(drvPath);
for (std::string jobName2 : *named) {
auto job2 = state->jobs.find(jobName2);
if (job2 == state->jobs.end())
throw Error("aggregate job '%s' references non-existent job '%s'", jobName, jobName2);
auto job2 = getNonBrokenJobOrRecordError(jobName2);
if (!job2) {
continue;
}
auto drvPath2 = store->parseStorePath((std::string) (*job2)["drvPath"]);
auto drv2 = store->readDerivation(drvPath2);
job["constituents"].push_back(store->printStorePath(drvPath2));
drv.inputDrvs[drvPath2] = {drv2.outputs.begin()->first};
}
if (brokenJobs.empty()) {
std::string drvName(drvPath.name());
assert(hasSuffix(drvName, drvExtension));
drvName.resize(drvName.size() - drvExtension.size());
@ -478,8 +500,17 @@ int main(int argc, char * * argv)
job["drvPath"] = newDrvPath;
job["outputs"]["out"] = store->printStorePath(outPath);
}
}
job.erase("namedConstituents");
if (!brokenJobs.empty()) {
std::stringstream ss;
for (const auto& [jobName, error] : brokenJobs) {
ss << jobName << ": " << error << "\n";
}
job["error"] = ss.str();
}
}
std::cout << state->jobs.dump(2) << "\n";

View file

@ -0,0 +1,16 @@
with import ./config.nix;
{
broken = mkDerivation {
name = "broken";
_hydraAggregate = true;
constituents = [
"does-not-exist"
"does-not-evaluate"
];
builder = ./fail.sh;
};
# does-not-exist doesn't exist.
does-not-evaluate = assert false; {};
}

View file

@ -0,0 +1,32 @@
use feature 'unicode_strings';
use strict;
use warnings;
use Setup;
my %ctx = test_init();
require Hydra::Schema;
require Hydra::Model::DB;
use Test2::V0;
my $db = Hydra::Model::DB->new;
hydra_setup($db);
my $project = $db->resultset('Projects')->create({name => "tests", displayname => "", owner => "root"});
my $jobset = createBaseJobset("broken-constituent", "broken-constituent.nix", $ctx{jobsdir});
ok(evalSucceeds($jobset), "Evaluating jobs/broken-constituent.nix should exit with return code 0");
is(nrQueuedBuildsForJobset($jobset), 0, "Evaluating jobs/broken-constituent.nix should not queue any builds");
like(
$jobset->errormsg,
qr/^does-not-exist: does not exist$/m,
"Evaluating jobs/broken-constituent.nix should log an error for does-not-exist");
like(
$jobset->errormsg,
qr/^does-not-evaluate: error: assertion 'false' failed$/m,
"Evaluating jobs/broken-constituent.nix should log an error for does-not-evaluate");
done_testing;