Fix extreme slowness in hydra-queue-runner

If there are builds in the queue that depend on another scheduled
build, then hydra-queue-runner will start the dependency first and
block the dependent builds.  This is implemented in
findBuildDependencyInQueue.  However, if there are tens of thousands
of such dependent builds, since each call to
findBuildDependencyInQueue may take a second or so, hydra-queue-runner
will spend hours just deciding which builds *not* to do.  Thus very
little progress is made.

So now, when a build is started, we immediately check which builds are
"blocked" by it (i.e. depend on it), and remove such builds from
consideration.
This commit is contained in:
Eelco Dolstra 2013-10-11 10:46:40 +02:00
parent a49457b2fd
commit f592ce0026

View file

@ -9,6 +9,7 @@ use Hydra::Helper::Nix;
use Hydra::Model::DB; use Hydra::Model::DB;
use IO::Handle; use IO::Handle;
use Nix::Store; use Nix::Store;
use Set::Scalar;
chdir Hydra::Model::DB::getHydraPath or die; chdir Hydra::Model::DB::getHydraPath or die;
my $db = Hydra::Model::DB->new(); my $db = Hydra::Model::DB->new();
@ -52,14 +53,25 @@ sub findBuildDependencyInQueue {
my @deps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(0, 0, $build->drvpath); my @deps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(0, 0, $build->drvpath);
return unless scalar @deps > 0; return unless scalar @deps > 0;
foreach my $d (@deps) { foreach my $d (@deps) {
my $b = $buildsByDrv->{$d}; my $bs = $buildsByDrv->{$d};
next unless defined $b; next unless defined $bs;
return $db->resultset('Builds')->find($b); return $db->resultset('Builds')->find((@$bs)[0]);
} }
return undef; return undef;
} }
sub blockBuilds {
my ($buildsByDrv, $blockedBuilds, $build) = @_;
my @rdeps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(1, 0, $build->drvpath);
foreach my $drv (@rdeps) {
my $bs = $buildsByDrv->{$drv};
next if !defined $bs;
$blockedBuilds->insert($_) foreach @$bs;
}
}
sub checkBuilds { sub checkBuilds {
# print "looking for runnable builds...\n"; # print "looking for runnable builds...\n";
@ -80,8 +92,13 @@ sub checkBuilds {
# Cache scheduled builds by derivation path to speed up # Cache scheduled builds by derivation path to speed up
# findBuildDependencyInQueue. # findBuildDependencyInQueue.
my $buildsByDrv = {}; my $buildsByDrv = {};
$buildsByDrv->{$_->drvpath} = $_->id push @{$buildsByDrv->{$_->drvpath}}, $_->id
foreach $db->resultset('Builds')->search({ finished => 0 }, { join => ['project'] }); foreach $db->resultset('Builds')->search({ finished => 0 });
# Builds in the queue of which a dependency is already building.
my $blockedBuilds = Set::Scalar->new();
blockBuilds($buildsByDrv, $blockedBuilds, $_)
foreach $db->resultset('Builds')->search({ finished => 0, busy => 1 });
# Get the system types for the runnable builds. # Get the system types for the runnable builds.
my @systemTypes = $db->resultset('Builds')->search( my @systemTypes = $db->resultset('Builds')->search(
@ -162,6 +179,8 @@ sub checkBuilds {
{ order_by => ["priority DESC", "id"] }); { order_by => ["priority DESC", "id"] });
foreach my $build (@builds) { foreach my $build (@builds) {
next if $blockedBuilds->has($build->id);
# Find a dependency of $build that has no queued # Find a dependency of $build that has no queued
# dependencies itself. This isn't strictly necessary, # dependencies itself. This isn't strictly necessary,
# but it ensures that Nix builds are done as part of # but it ensures that Nix builds are done as part of
@ -187,6 +206,8 @@ sub checkBuilds {
$timeSpentPerJobset->{$jobset->get_column('project')}->{$jobset->name} += $costPerBuild; $timeSpentPerJobset->{$jobset->get_column('project')}->{$jobset->name} += $costPerBuild;
blockBuilds($buildsByDrv, $blockedBuilds, $build);
next j; next j;
} }
} }