hydra-eval-jobset: incrementally ingest eval results
Some checks failed
Test / tests (push) Has been cancelled
Test / tests (pull_request) Has been cancelled

nix-eval-jobs streams output, unlike hydra-eval-jobs. Now that we've
migrated, we can use this to:

1. Use less RAM by avoiding buffering a whole eval's worth of metadata
   into a Perl string and an array of JSON objects.
2. Make evals latency a bit lower by allowing the queue runner to start
   ingesting builds faster.
This commit is contained in:
Pierre Bourdon 2024-07-17 12:05:41 +02:00
parent 370a4bf138
commit b0e9b4b2f9
Signed by: delroth
GPG key ID: 6FB80DCD84DA0F1C

View file

@ -17,6 +17,7 @@ use Hydra::Helper::Nix;
use Hydra::Model::DB;
use Hydra::Plugin;
use Hydra::Schema;
use IPC::Run;
use JSON::MaybeXS;
use Net::Statsd;
use Nix::Store;
@ -383,23 +384,33 @@ sub evalJobs {
print STDERR "evaluator: @escaped\n";
}
(my $res, my $jobsJSONLines, my $stderr) = captureStdoutStderr(21600, @cmd);
die "nix-eval-jobs returned " . ($res & 127 ? "signal $res" : "exit code " . ($res >> 8))
. ":\n" . ($stderr ? decode("utf-8", $stderr) : "(no output)\n")
if $res;
my $h = IPC::Run::start \@cmd,
'>', IPC::Run::new_chunker, \my $out,
'2>', \my $err;
print STDERR "$stderr";
return sub {
while (1) {
$h->pump;
if (!defined $out && !defined $err) {
$h->finish;
if ($?) {
die "nix-eval-jobs returned " . ($? & 127 ? "signal $?" : "exit code " . ($? >> 8)) . "\n";
}
return;
}
# XXX: take advantage of nix-eval-jobs's streaming instead of parsing everything in one block at
# the end.
my @jobs;
foreach my $line (split(/\n/, $jobsJSONLines)) {
last if $line eq "";
if (defined $err) {
print STDERR "$err";
undef $err;
}
push(@jobs, decode_json($line));
if (defined $out && $out ne '') {
my $job = decode_json($out);
undef $out;
return $job;
}
}
};
return @jobs;
}
@ -716,17 +727,11 @@ sub checkJobsetWrapped {
# Evaluate the job expression.
my $evalStart = clock_gettime(CLOCK_MONOTONIC);
my @jobs = evalJobs($project->name . ":" . $jobset->name, $inputInfo, $jobset->nixexprinput, $jobset->nixexprpath, $flakeRef);
my $evalStop = clock_gettime(CLOCK_MONOTONIC);
if ($jobsetsJobset) {
die "The .jobsets jobset must only have a single job named 'jobsets'"
unless (scalar @jobs) == 1 && $jobs[0]->{attr} eq "jobsets";
}
Net::Statsd::timing("hydra.evaluator.eval_time", int(($evalStop - $evalStart) * 1000));
my $evalStop;
my $jobsIter = evalJobs($project->name . ":" . $jobset->name, $inputInfo, $jobset->nixexprinput, $jobset->nixexprpath, $flakeRef);
if ($dryRun) {
foreach my $job (@jobs) {
while (defined(my $job = $jobsIter->())) {
my $name = $job->{attr};
if (defined $job->{drvPath}) {
print STDERR "good job $name: $job->{drvPath}\n";
@ -737,31 +742,20 @@ sub checkJobsetWrapped {
return;
}
my $jobOutPathMap = {};
my $jobsetChanged = 0;
my $dbStart = clock_gettime(CLOCK_MONOTONIC);
# Store the error messages for jobs that failed to evaluate.
my $evaluationErrorTime = time;
my $evaluationErrorMsg = "";
foreach my $job (@jobs) {
next unless defined $job->{error};
$evaluationErrorMsg .=
($job->{attr} ne "" ? "in job $job->{attr}" : "at top-level") .
":\n" . $job->{error} . "\n\n";
}
setJobsetError($jobset, $evaluationErrorMsg, $evaluationErrorTime);
my $evaluationErrorRecord = $db->resultset('EvaluationErrors')->create(
{ errormsg => $evaluationErrorMsg
, errortime => $evaluationErrorTime
}
);
my $jobOutPathMap = {};
my $jobsetChanged = 0;
my %buildMap;
$db->txn_do(sub {
$db->txn_do(sub {
my $prevEval = getPrevJobsetEval($db, $jobset, 1);
# Clear the "current" flag on all builds. Since we're in a
@ -774,7 +768,7 @@ sub checkJobsetWrapped {
, evaluationerror => $evaluationErrorRecord
, timestamp => time
, checkouttime => abs(int($checkoutStop - $checkoutStart))
, evaltime => abs(int($evalStop - $evalStart))
, evaltime => 0
, hasnewbuilds => 0
, nrbuilds => 0
, flake => $flakeRef
@ -782,11 +776,18 @@ sub checkJobsetWrapped {
, nixexprpath => $jobset->nixexprpath
});
# Schedule each successfully evaluated job.
foreach my $job (permute(@jobs)) {
next if defined $job->{error};
#print STDERR "considering job " . $project->name, ":", $jobset->name, ":", $job->{jobName} . "\n";
checkBuild($db, $jobset, $ev, $inputInfo, $job, \%buildMap, $prevEval, $jobOutPathMap, $plugins);
while (defined(my $job = $jobsIter->())) {
if ($jobsetsJobset) {
die "The .jobsets jobset must only have a single job named 'jobsets'"
unless $job->{attr} eq "jobsets";
}
$evaluationErrorMsg .=
($job->{attr} ne "" ? "in job $job->{attr}" : "at top-level") .
":\n" . $job->{error} . "\n\n" if defined $job->{error};
checkBuild($db, $jobset, $ev, $inputInfo, $job, \%buildMap, $prevEval, $jobOutPathMap, $plugins)
unless defined $job->{error};
}
# Have any builds been added or removed since last time?
@ -846,11 +847,15 @@ sub checkJobsetWrapped {
$jobset->update({ enabled => 0 }) if $jobset->enabled == 2;
$jobset->update({ lastcheckedtime => time, forceeval => undef });
$evaluationErrorRecord->update({ errormsg => $evaluationErrorMsg });
setJobsetError($jobset, $evaluationErrorMsg, $evaluationErrorTime);
$evalStop = clock_gettime(CLOCK_MONOTONIC);
$ev->update({ evaltime => abs(int($evalStop - $evalStart)) });
});
my $dbStop = clock_gettime(CLOCK_MONOTONIC);
Net::Statsd::timing("hydra.evaluator.db_time", int(($dbStop - $dbStart) * 1000));
Net::Statsd::timing("hydra.evaluator.eval_time", int(($evalStop - $evalStart) * 1000));
Net::Statsd::increment("hydra.evaluator.evals");
Net::Statsd::increment("hydra.evaluator.cached_evals") unless $jobsetChanged;
}