hydra/src/script/hydra-update-gc-roots
Eelco Dolstra 29d5a02b94 Speed up hydra-update-gc-roots
The hydra-update-gc-roots script is taking around 95 minutes on our
Hydra instance (though a lot of that is I/O wait).  This patch
significantly reduces the number of database queries.  In particular,
the N most recent successful builds for each job in a jobset are now
determined in a single query.  Also, it removes the calls to
readlink().
2012-03-12 20:47:29 +01:00

141 lines
5 KiB
Perl
Executable file

#! /var/run/current-system/sw/bin/perl -w
use strict;
use File::Path;
use File::Basename;
use Nix::Store;
use Hydra::Schema;
use Hydra::Helper::Nix;
use POSIX qw(strftime);
my $db = openHydraDB;
my %roots;
sub addRoot {
my ($path) = @_;
registerRoot($path);
$roots{$path} = 1;
}
my @columns = ( "id", "project", "jobset", "job", "system", "finished", "outpath", "drvpath", "timestamp" );
sub keepBuild {
my ($build) = @_;
print STDERR " keeping ", ($build->finished ? "" : "scheduled "), "build ", $build->id, " (",
$build->get_column('project'), ":", $build->get_column('jobset'), ":", $build->get_column('job'), "; ",
$build->system, "; ",
strftime("%Y-%m-%d %H:%M:%S", localtime($build->timestamp)), ")\n";
if (isValidPath($build->outpath)) {
addRoot $build->outpath;
} else {
print STDERR " warning: output ", $build->outpath, " has disappeared\n" if $build->finished;
}
if (!$build->finished) {
if (isValidPath($build->drvpath)) {
addRoot $build->drvpath;
} else {
print STDERR " warning: derivation ", $build->drvpath, " has disappeared\n";
}
}
}
# Keep every build in every release of every project.
print STDERR "*** looking for release members\n";
keepBuild $_ foreach $db->resultset('Builds')->search_literal(
"exists (select 1 from releasemembers where build = me.id)", { order_by => ["project", "jobset", "job", "id"] });
# Keep all builds that have been marked as "keep".
print STDERR "*** looking for kept builds\n";
my @buildsToKeep = $db->resultset('Builds')->search(
{ finished => 1, keep => 1 }, { order_by => ["project", "jobset", "job", "id"], columns => [ @columns ] });
keepBuild $_ foreach @buildsToKeep;
# Go over all projects.
foreach my $project ($db->resultset('Projects')->search({}, { order_by => ["name"] })) {
# Go over all jobsets in this project.
foreach my $jobset ($project->jobsets->search({}, { order_by => ["name" ]})) {
my $keepnr = $jobset->keepnr;
# If the jobset has been disabled for more than one week, than
# don't keep its builds anymore.
if ($jobset->enabled == 0 && (time() - $jobset->lastcheckedtime > (7 * 24 * 3600))) {
print STDERR "*** skipping disabled jobset ", $project->name, ":", $jobset->name, "\n";
next;
}
if ($keepnr <= 0 ) {
print STDERR "*** jobset ", $project->name, ":", $jobset->name, " set to keep 0 builds\n";
next;
}
print STDERR "*** looking for the $keepnr most recent successful builds of each job in jobset ",
$project->name, ":", $jobset->name, "\n";
keepBuild $_ foreach $jobset->builds->search(
{ 'me.id' => { 'in' => \
[ "select b2.id from Builds b2 join " .
" (select distinct job, system, coalesce( " .
" (select id from builds where project = b.project and jobset = b.jobset and job = b.job and system = b.system and finished = 1 and buildStatus = 0 order by id desc offset ? limit 1)" .
" , 0) nth from builds b where project = ? and jobset = ? and isCurrent = 1) x " .
" on b2.project = ? and b2.jobset = ? and b2.job = x.job and b2.system = x.system and (id >= x.nth) where finished = 1 and buildStatus = 0"
, [ '', $keepnr - 1 ], [ '', $project->name ], [ '', $jobset->name ], [ '', $project->name ], [ '', $jobset->name ] ] }
},
{ order_by => ["job", "system", "id"], columns => [ @columns ] });
}
# Go over all views in this project.
foreach my $view ($project->views->all) {
print STDERR "*** looking for builds to keep in view ", $project->name, ":", $view->name, "\n";
(my $primaryJob) = $view->viewjobs->search({isprimary => 1});
my $jobs = [$view->viewjobs->all];
# Keep all builds belonging to the most recent successful view result.
my $latest = getLatestSuccessfulViewResult($project, $primaryJob, $jobs);
if (defined $latest) {
print STDERR " keeping latest successful view result ", $latest->id, " (", $latest->get_column('releasename'), ")\n";
my $result = getViewResult($latest, $jobs);
keepBuild $_->{build} foreach @{$result->{jobs}};
}
}
}
# For scheduled builds, we register the derivation as a GC root.
print STDERR "*** looking for scheduled builds\n";
keepBuild $_ foreach $db->resultset('Builds')->search({ finished => 0 }, { columns => [ @columns ] });
# Remove existing roots that are no longer wanted. !!! racy
print STDERR "*** removing unneeded GC roots\n";
my $gcRootsDir = getGCRootsDir;
opendir DIR, $gcRootsDir or die;
my $rootsKept = 0;
my $rootsDeleted = 0;
foreach my $link (readdir DIR) {
next if $link eq "." || $link eq "..";
my $path = "/nix/store/$link";
if (!defined $roots{$path}) {
print STDERR "removing root $path\n";
$rootsDeleted++;
unlink "$gcRootsDir/$link" or warn "cannot remove $gcRootsDir/$link";
} else {
$rootsKept++;
}
}
closedir DIR;
print STDERR "kept $rootsKept roots, deleted $rootsDeleted roots\n";