hydra/src/script/hydra-queue-runner
2013-11-06 12:15:11 +01:00

272 lines
9.8 KiB
Perl
Executable file

#! /var/run/current-system/sw/bin/perl -w
use strict;
use Cwd;
use File::Basename;
use POSIX qw(dup2 :sys_wait_h);
use Hydra::Schema;
use Hydra::Helper::Nix;
use Hydra::Model::DB;
use IO::Handle;
use Nix::Store;
use Set::Scalar;
chdir Hydra::Model::DB::getHydraPath or die;
my $db = Hydra::Model::DB->new();
STDOUT->autoflush();
my $lastTime;
#$SIG{CHLD} = 'IGNORE';
sub unlockDeadBuilds {
# Unlock builds whose building process has died.
txn_do($db, sub {
my @builds = $db->resultset('Builds')->search({finished => 0, busy => 1});
foreach my $build (@builds) {
my $pid = $build->locker;
my $unlock = 0;
if ($pid == $$) {
if (!defined $lastTime || $build->starttime < $lastTime - 300) {
$unlock = 1;
}
} elsif (kill(0, $pid) != 1) { # see if we can signal the process
$unlock = 1;
}
if ($unlock) {
print "build ", $build->id, " pid $pid died, unlocking\n";
$build->update({ busy => 0, locker => "" });
$build->buildsteps->search({ busy => 1 })->update({ busy => 0, status => 4, stoptime => time });
}
}
});
}
# Given a build, return an arbitrary queued build on which this build
# depends; or undef if no such build exists.
sub findBuildDependencyInQueue {
my ($buildsByDrv, $build) = @_;
return undef unless isValidPath($build->drvpath);
my @deps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(0, 0, $build->drvpath);
return unless scalar @deps > 0;
foreach my $d (@deps) {
my $bs = $buildsByDrv->{$d};
next unless defined $bs;
return $db->resultset('Builds')->find((@$bs)[0]);
}
return undef;
}
sub blockBuilds {
my ($buildsByDrv, $blockedBuilds, $build) = @_;
my @rdeps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(1, 0, $build->drvpath);
foreach my $drv (@rdeps) {
my $bs = $buildsByDrv->{$drv};
next if !defined $bs;
$blockedBuilds->insert($_) foreach @$bs;
}
}
sub checkBuilds {
# print "looking for runnable builds...\n";
my @buildsStarted;
my $machines = getMachines;
my %maxConcurrent;
foreach my $machineName (keys %{$machines}) {
foreach my $system (@{${$machines}{$machineName}{'systemTypes'}}) {
$maxConcurrent{$system} = (${$machines}{$machineName}{'maxJobs'} or 0) + ($maxConcurrent{$system} or 0)
}
}
txn_do($db, sub {
# Cache scheduled builds by derivation path to speed up
# findBuildDependencyInQueue.
my $buildsByDrv = {};
push @{$buildsByDrv->{$_->drvpath}}, $_->id
foreach $db->resultset('Builds')->search({ finished => 0 });
# Builds in the queue of which a dependency is already building.
my $blockedBuilds = Set::Scalar->new();
blockBuilds($buildsByDrv, $blockedBuilds, $_)
foreach $db->resultset('Builds')->search({ finished => 0, busy => 1 });
# Get the system types for the runnable builds.
my @systemTypes = $db->resultset('Builds')->search(
{ finished => 0, busy => 0 },
{ join => ['project'], select => ['system'], as => ['system'], distinct => 1 });
# Get the total number of scheduling shares.
my $totalShares = getTotalShares($db) || 1;
# For each system type, select up to the maximum number of
# concurrent build for that system type.
foreach my $system (@systemTypes) {
# How many builds are already currently executing for this
# system type?
my $nrActive = $db->resultset('Builds')->search(
{finished => 0, busy => 1, system => $system->system})->count;
(my $systemTypeInfo) = $db->resultset('SystemTypes')->search({system => $system->system});
my $max = defined $systemTypeInfo ? $systemTypeInfo->maxconcurrent : $maxConcurrent{$system->system} // 2;
my $extraAllowed = $max - $nrActive;
next if $extraAllowed <= 0;
print STDERR "starting at most $extraAllowed builds for system ${\$system->system}\n";
my $timeSpentPerJobset;
j: while ($extraAllowed-- > 0) {
my @runnableJobsets = $db->resultset('Builds')->search(
{ finished => 0, busy => 0, system => $system->system },
{ select => ['project', 'jobset'], distinct => 1 });
next if @runnableJobsets == 0;
my $windowSize = 24 * 3600;
my $costPerBuild = 30;
my $totalWindowSize = $windowSize * $max;
my @res;
foreach my $b (@runnableJobsets) {
my $jobset = $db->resultset('Jobsets')->find($b->get_column('project'), $b->get_column('jobset')) or die;
my $timeSpent = $timeSpentPerJobset->{$b->get_column('project')}->{$b->get_column('jobset')};
if (!defined $timeSpent) {
$timeSpent = $jobset->builds->search(
{ },
{ where => \ ("(finished = 0 or (me.stoptime >= " . (time() - $windowSize) . "))")
, join => 'buildsteps'
, select => \ "sum(coalesce(buildsteps.stoptime, ${\time}) - buildsteps.starttime)"
, as => "sum" })->single->get_column("sum") // 0;
# Add a 30s penalty for each started build. This
# is to account for jobsets that have running
# builds but no build steps yet.
$timeSpent += $jobset->builds->search({ finished => 0, busy => 1 })->count * $costPerBuild;
$timeSpentPerJobset->{$b->get_column('project')}->{$b->get_column('jobset')} = $timeSpent;
}
my $share = $jobset->schedulingshares || 1; # prevent division by zero
my $used = $timeSpent / ($totalWindowSize * ($share / $totalShares));
#printf STDERR "%s:%s: %d s, total used = %.2f%%, share used = %.2f%%\n", $jobset->get_column('project'), $jobset->name, $timeSpent, $timeSpent / $totalWindowSize * 100, $used * 100;
push @res, { jobset => $jobset, used => $used };
}
foreach my $r (sort { $a->{used} <=> $b->{used} } @res) {
my $jobset = $r->{jobset};
#print STDERR "selected ", $jobset->get_column('project'), ':', $jobset->name, "\n";
# Select the highest-priority build for this jobset.
my @builds = $jobset->builds->search(
{ finished => 0, busy => 0, system => $system->system },
{ order_by => ["priority DESC", "id"] });
foreach my $build (@builds) {
next if $blockedBuilds->has($build->id);
# Find a dependency of $build that has no queued
# dependencies itself. This isn't strictly necessary,
# but it ensures that Nix builds are done as part of
# their corresponding Hydra builds, rather than as a
# dependency of some other Hydra build.
while (my $dep = findBuildDependencyInQueue($buildsByDrv, $build)) {
$build = $dep;
}
next if $build->busy;
printf STDERR "starting build %d (%s:%s:%s) on %s; jobset at %.2f%% of its share\n",
$build->id, $build->project->name, $build->jobset->name, $build->job->name, $build->system, $r->{used} * 100;
my $logfile = getcwd . "/logs/" . $build->id;
mkdir(dirname $logfile);
unlink($logfile);
$build->update(
{ busy => 1
, locker => $$
, logfile => $logfile
});
push @buildsStarted, $build;
$timeSpentPerJobset->{$jobset->get_column('project')}->{$jobset->name} += $costPerBuild;
blockBuilds($buildsByDrv, $blockedBuilds, $build);
next j;
}
}
last; # nothing found, give up on this system type
}
}
$lastTime = time();
$_->update({ starttime => time() }) foreach @buildsStarted;
});
# Actually start the builds we just selected. We need to do this
# outside the transaction in case it aborts or something.
foreach my $build (@buildsStarted) {
my $id = $build->id;
eval {
my $logfile = $build->logfile;
my $child = fork();
die unless defined $child;
if ($child == 0) {
eval {
open LOG, ">$logfile" or die "cannot create logfile $logfile";
POSIX::dup2(fileno(LOG), 1) or die;
POSIX::dup2(fileno(LOG), 2) or die;
exec("hydra-build", $id);
};
warn "cannot start build $id: $@";
POSIX::_exit(1);
}
};
if ($@) {
warn $@;
txn_do($db, sub {
$build->update({ busy => 0, locker => $$ });
});
}
}
}
if (scalar(@ARGV) == 1 && $ARGV[0] eq "--unlock") {
unlockDeadBuilds;
exit 0;
}
while (1) {
eval {
# Clean up zombies.
while ((waitpid(-1, &WNOHANG)) > 0) { };
unlockDeadBuilds;
checkBuilds;
};
warn $@ if $@;
# print "sleeping...\n";
sleep(5);
}