From 67ebce8493a1c9816dc5de4e819368415c1cc5c6 Mon Sep 17 00:00:00 2001 From: Luke Granger-Brown Date: Wed, 22 Sep 2021 20:54:58 +0000 Subject: [PATCH] Output evaluation errors without crashing if aggregate job is broken. At the moment, aggregate jobs can easily break and cause the entire evaluation to fail, which is not ideal. For Nixpkgs, we do have some important aggregate jobs (like `tested`), but for debugging and building purposes it's still useful to get a partial result even if the channel won't actually advance. This commit changes the behaviour of hydra-eval-jobs such that it aggregates any errors found during the construction of an aggregate, and will instead annotate the job with the evaluation failure such that it shows up in a "cleaner" way. There are really two types of failure that we care about: one is where the attribute just ends up missing altogether in the final output, and also where the attribute is in the output but fails to evaluate. Both are handled here. Note that this does mean that the same error message may be output multiple times, but this aids debuggability because it'll be much clearer what's blocking the job from being created. --- src/hydra-eval-jobs/hydra-eval-jobs.cc | 67 +++++++++++++++++++------- t/jobs/broken-constituent.nix | 16 ++++++ t/queue-runner/constituents.t | 32 ++++++++++++ 3 files changed, 97 insertions(+), 18 deletions(-) create mode 100644 t/jobs/broken-constituent.nix create mode 100644 t/queue-runner/constituents.t diff --git a/src/hydra-eval-jobs/hydra-eval-jobs.cc b/src/hydra-eval-jobs/hydra-eval-jobs.cc index f50ac5e5..acffe1d1 100644 --- a/src/hydra-eval-jobs/hydra-eval-jobs.cc +++ b/src/hydra-eval-jobs/hydra-eval-jobs.cc @@ -1,6 +1,7 @@ -#include #include #include +#include +#include #include "shared.hh" #include "store-api.hh" @@ -442,11 +443,30 @@ int main(int argc, char * * argv) auto named = job.find("namedConstituents"); if (named == job.end()) continue; + std::unordered_map brokenJobs; + auto getNonBrokenJobOrRecordError = [&brokenJobs, &jobName, &state]( + const std::string & childJobName) -> std::optional { + auto childJob = state->jobs.find(childJobName); + if (childJob == state->jobs.end()) { + printError("aggregate job '%s' references non-existent job '%s'", jobName, childJobName); + brokenJobs[childJobName] = "does not exist"; + return std::nullopt; + } + if (childJob->find("error") != childJob->end()) { + std::string error = (*childJob)["error"]; + printError("aggregate job '%s' references broken job '%s': %s", jobName, childJobName, error); + brokenJobs[childJobName] = error; + return std::nullopt; + } + return *childJob; + }; + if (myArgs.dryRun) { for (std::string jobName2 : *named) { - auto job2 = state->jobs.find(jobName2); - if (job2 == state->jobs.end()) - throw Error("aggregate job '%s' references non-existent job '%s'", jobName, jobName2); + auto job2 = getNonBrokenJobOrRecordError(jobName2); + if (!job2) { + continue; + } std::string drvPath2 = (*job2)["drvPath"]; job["constituents"].push_back(drvPath2); } @@ -455,31 +475,42 @@ int main(int argc, char * * argv) auto drv = store->readDerivation(drvPath); for (std::string jobName2 : *named) { - auto job2 = state->jobs.find(jobName2); - if (job2 == state->jobs.end()) - throw Error("aggregate job '%s' references non-existent job '%s'", jobName, jobName2); + auto job2 = getNonBrokenJobOrRecordError(jobName2); + if (!job2) { + continue; + } auto drvPath2 = store->parseStorePath((std::string) (*job2)["drvPath"]); auto drv2 = store->readDerivation(drvPath2); job["constituents"].push_back(store->printStorePath(drvPath2)); drv.inputDrvs[drvPath2] = {drv2.outputs.begin()->first}; } - std::string drvName(drvPath.name()); - assert(hasSuffix(drvName, drvExtension)); - drvName.resize(drvName.size() - drvExtension.size()); - auto h = std::get(hashDerivationModulo(*store, drv, true)); - auto outPath = store->makeOutputPath("out", h, drvName); - drv.env["out"] = store->printStorePath(outPath); - drv.outputs.insert_or_assign("out", DerivationOutput { .output = DerivationOutputInputAddressed { .path = outPath } }); - auto newDrvPath = store->printStorePath(writeDerivation(*store, drv)); + if (brokenJobs.empty()) { + std::string drvName(drvPath.name()); + assert(hasSuffix(drvName, drvExtension)); + drvName.resize(drvName.size() - drvExtension.size()); + auto h = std::get(hashDerivationModulo(*store, drv, true)); + auto outPath = store->makeOutputPath("out", h, drvName); + drv.env["out"] = store->printStorePath(outPath); + drv.outputs.insert_or_assign("out", DerivationOutput { .output = DerivationOutputInputAddressed { .path = outPath } }); + auto newDrvPath = store->printStorePath(writeDerivation(*store, drv)); - debug("rewrote aggregate derivation %s -> %s", store->printStorePath(drvPath), newDrvPath); + debug("rewrote aggregate derivation %s -> %s", store->printStorePath(drvPath), newDrvPath); - job["drvPath"] = newDrvPath; - job["outputs"]["out"] = store->printStorePath(outPath); + job["drvPath"] = newDrvPath; + job["outputs"]["out"] = store->printStorePath(outPath); + } } job.erase("namedConstituents"); + + if (!brokenJobs.empty()) { + std::stringstream ss; + for (const auto& [jobName, error] : brokenJobs) { + ss << jobName << ": " << error << "\n"; + } + job["error"] = ss.str(); + } } std::cout << state->jobs.dump(2) << "\n"; diff --git a/t/jobs/broken-constituent.nix b/t/jobs/broken-constituent.nix new file mode 100644 index 00000000..d895129d --- /dev/null +++ b/t/jobs/broken-constituent.nix @@ -0,0 +1,16 @@ +with import ./config.nix; +{ + broken = mkDerivation { + name = "broken"; + _hydraAggregate = true; + constituents = [ + "does-not-exist" + "does-not-evaluate" + ]; + builder = ./fail.sh; + }; + + # does-not-exist doesn't exist. + + does-not-evaluate = assert false; {}; +} diff --git a/t/queue-runner/constituents.t b/t/queue-runner/constituents.t new file mode 100644 index 00000000..c6333642 --- /dev/null +++ b/t/queue-runner/constituents.t @@ -0,0 +1,32 @@ +use feature 'unicode_strings'; +use strict; +use warnings; +use Setup; + +my %ctx = test_init(); + +require Hydra::Schema; +require Hydra::Model::DB; + +use Test2::V0; + +my $db = Hydra::Model::DB->new; +hydra_setup($db); + +my $project = $db->resultset('Projects')->create({name => "tests", displayname => "", owner => "root"}); + +my $jobset = createBaseJobset("broken-constituent", "broken-constituent.nix", $ctx{jobsdir}); + +ok(evalSucceeds($jobset), "Evaluating jobs/broken-constituent.nix should exit with return code 0"); +is(nrQueuedBuildsForJobset($jobset), 0, "Evaluating jobs/broken-constituent.nix should not queue any builds"); + +like( + $jobset->errormsg, + qr/^does-not-exist: does not exist$/m, + "Evaluating jobs/broken-constituent.nix should log an error for does-not-exist"); +like( + $jobset->errormsg, + qr/^does-not-evaluate: error: assertion 'false' failed$/m, + "Evaluating jobs/broken-constituent.nix should log an error for does-not-evaluate"); + +done_testing;