hydra/src/script/hydra-queue-runner

190 lines
6.3 KiB
Plaintext
Raw Normal View History

#! /var/run/current-system/sw/bin/perl -w
2008-11-10 13:33:12 +00:00
use strict;
2008-11-11 14:45:33 +00:00
use Cwd;
2008-11-28 14:36:04 +00:00
use File::Basename;
use POSIX qw(dup2 :sys_wait_h);
2008-11-25 11:09:15 +00:00
use Hydra::Schema;
2008-11-28 14:36:04 +00:00
use Hydra::Helper::Nix;
use Hydra::Model::DB;
2011-08-19 16:09:14 +00:00
use IO::Handle;
2011-11-30 14:25:28 +00:00
use Nix::Store;
2008-11-10 13:33:12 +00:00
chdir Hydra::Model::DB::getHydraPath or die;
my $db = Hydra::Model::DB->new();
2008-11-10 13:33:12 +00:00
2009-04-22 22:59:54 +00:00
STDOUT->autoflush();
#$SIG{CHLD} = 'IGNORE';
2008-11-29 01:01:22 +00:00
2008-11-10 13:33:12 +00:00
2008-11-28 11:16:53 +00:00
sub unlockDeadBuilds {
# Unlock builds whose building process has died.
txn_do($db, sub {
my @builds = $db->resultset('Builds')->search({finished => 0, busy => 1});
2008-11-28 11:16:53 +00:00
foreach my $build (@builds) {
my $pid = $build->locker;
my $unlock = 0;
if ($pid == $$) {
# Work around sqlite locking timeouts: if the child
# barfed because of a locked DB before updating the
# `locker' field, then `locker' is still set to $$.
2008-11-29 01:26:51 +00:00
# So if after a minute it hasn't been updated,
# unlock the build. !!! need a better fix for those
# locking timeouts.
if ($build->starttime + 60 < time) {
$unlock = 1;
}
} elsif (kill(0, $pid) != 1) { # see if we can signal the process
$unlock = 1;
}
if ($unlock) {
2008-11-28 11:16:53 +00:00
print "build ", $build->id, " pid $pid died, unlocking\n";
$build->update({ busy => 0, locker => ""});
$build->buildsteps->search({ busy => 1 })->update({ busy => 0, status => 4, stoptime => time });
}
2008-11-10 13:33:12 +00:00
}
});
}
2008-11-10 13:33:12 +00:00
# Given a build, return an arbitrary queued build on which this build
# depends; or undef if no such build exists.
2010-04-13 08:42:44 +00:00
sub findBuildDependencyInQueue {
my ($build) = @_;
my @deps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(0, 0, $build->drvpath);
return unless scalar @deps > 0;
return $db->resultset('Builds')->search(
2013-05-03 14:39:17 +00:00
{ drvpath => [ @deps ], finished => 0, enabled => 1 },
{ join => ['project'], rows => 1 })->single;
2010-04-13 08:42:44 +00:00
}
2008-11-10 13:33:12 +00:00
2008-11-28 11:16:53 +00:00
sub checkBuilds {
# print "looking for runnable builds...\n";
2008-11-10 13:33:12 +00:00
2008-11-28 11:16:53 +00:00
my @buildsStarted;
2008-11-10 13:33:12 +00:00
my $machines = getMachines;
2013-03-05 17:01:44 +00:00
my %maxConcurrent;
foreach my $machineName (keys %{$machines}) {
foreach my $system (${$machines}{$machineName}{'systemTypes'}) {
$maxConcurrent{$system} = (${$machines}{$machineName}{'maxJobs'} or 0) + ($maxConcurrent{$system} or 0)
}
}
txn_do($db, sub {
2008-11-10 13:33:12 +00:00
# Get the system types for the runnable builds.
my @systemTypes = $db->resultset('Builds')->search(
2013-05-03 14:39:17 +00:00
{ finished => 0, busy => 0, enabled => 1 },
{ join => ['project'], select => ['system'], as => ['system'], distinct => 1 });
2013-01-22 13:41:02 +00:00
# For each system type, select up to the maximum number of
# concurrent build for that system type. Choose the highest
# priority builds first, then the oldest builds.
foreach my $system (@systemTypes) {
# How many builds are already currently executing for this
# system type?
my $nrActive = $db->resultset('Builds')->search(
{finished => 0, busy => 1, system => $system->system})->count;
2013-03-05 17:01:44 +00:00
(my $systemTypeInfo) = $db->resultset('SystemTypes')->search({system => $system->system});
2013-03-07 15:52:31 +00:00
my $max = defined $systemTypeInfo ? $systemTypeInfo->maxconcurrent : $maxConcurrent{$system->system} // 2;
2013-03-05 17:01:44 +00:00
my $extraAllowed = $max - $nrActive;
$extraAllowed = 0 if $extraAllowed < 0;
# Select the highest-priority builds to start.
2008-11-28 11:16:53 +00:00
my @builds = $extraAllowed == 0 ? () : $db->resultset('Builds')->search(
2013-05-03 14:39:17 +00:00
{ finished => 0, busy => 0, system => $system->system, enabled => 1 },
{ join => ['project'], order_by => ["priority DESC", "id"] });
my $started = 0;
2008-11-28 11:16:53 +00:00
foreach my $build (@builds) {
# Find a dependency of $build that has no queued
# dependencies itself. This isn't strictly necessary,
# but it ensures that Nix builds are done as part of
# their corresponding Hydra builds, rather than as a
# dependency of some other Hydra build.
while (my $dep = findBuildDependencyInQueue($build)) {
$build = $dep;
}
next if $build->busy;
2010-04-13 08:42:44 +00:00
2008-11-28 11:16:53 +00:00
my $logfile = getcwd . "/logs/" . $build->id;
2008-11-28 14:36:04 +00:00
mkdir(dirname $logfile);
2008-11-27 19:06:11 +00:00
unlink($logfile);
$build->update(
{ busy => 1
, locker => $$
, logfile => $logfile
, starttime => time()
});
2008-11-28 11:16:53 +00:00
push @buildsStarted, $build;
last if ++$started >= $extraAllowed;
}
if ($started > 0) {
print STDERR "system type `", $system->system,
"': $nrActive active, $max allowed, started $started builds\n";
}
2008-11-10 13:33:12 +00:00
}
});
# Actually start the builds we just selected. We need to do this
# outside the transaction in case it aborts or something.
2008-11-28 11:16:53 +00:00
foreach my $build (@buildsStarted) {
my $id = $build->id;
2009-03-31 16:09:04 +00:00
print "starting build $id (", $build->project->name, ":", $build->jobset->name, ':', $build->job->name, ") on ", $build->system, "\n";
2008-11-10 13:33:12 +00:00
eval {
my $logfile = $build->logfile;
my $child = fork();
die unless defined $child;
if ($child == 0) {
2008-11-28 14:36:04 +00:00
eval {
open LOG, ">$logfile" or die "cannot create logfile $logfile";
POSIX::dup2(fileno(LOG), 1) or die;
POSIX::dup2(fileno(LOG), 2) or die;
exec("hydra-build", $id);
2008-11-28 14:36:04 +00:00
};
warn "cannot start build $id: $@";
POSIX::_exit(1);
}
2008-11-10 13:33:12 +00:00
};
if ($@) {
warn $@;
txn_do($db, sub {
$build->busy(0);
$build->locker($$);
$build->update;
2008-11-10 13:33:12 +00:00
});
}
}
}
if (scalar(@ARGV) == 1 && $ARGV[0] eq "--unlock") {
unlockDeadBuilds;
exit 0;
}
while (1) {
eval {
# Clean up zombies.
while ((waitpid(-1, &WNOHANG)) > 0) { };
2013-01-22 13:41:02 +00:00
2008-11-28 11:16:53 +00:00
unlockDeadBuilds;
2013-01-22 13:41:02 +00:00
2008-11-28 11:16:53 +00:00
checkBuilds;
};
warn $@ if $@;
2008-11-10 13:33:12 +00:00
# print "sleeping...\n";
2008-11-12 11:09:21 +00:00
sleep(5);
2008-11-10 13:33:12 +00:00
}