#! /var/run/current-system/sw/bin/perl -w use strict; use Cwd; use File::Basename; use POSIX qw(dup2 :sys_wait_h); use Hydra::Schema; use Hydra::Helper::Nix; use Hydra::Model::DB; use IO::Handle; use Nix::Store; chdir Hydra::Model::DB::getHydraPath or die; my $db = Hydra::Model::DB->new(); STDOUT->autoflush(); #$SIG{CHLD} = 'IGNORE'; sub unlockDeadBuilds { # Unlock builds whose building process has died. txn_do($db, sub { my @builds = $db->resultset('Builds')->search({finished => 0, busy => 1}); foreach my $build (@builds) { my $pid = $build->locker; my $unlock = 0; if ($pid == $$) { # Work around sqlite locking timeouts: if the child # barfed because of a locked DB before updating the # `locker' field, then `locker' is still set to $$. # So if after a minute it hasn't been updated, # unlock the build. !!! need a better fix for those # locking timeouts. if ($build->starttime + 60 < time) { $unlock = 1; } } elsif (kill(0, $pid) != 1) { # see if we can signal the process $unlock = 1; } if ($unlock) { print "build ", $build->id, " pid $pid died, unlocking\n"; $build->update({ busy => 0, locker => ""}); $build->buildsteps->search({ busy => 1 })->update({ busy => 0, status => 4, stoptime => time }); } } }); } sub findBuildDependencyInQueue { my ($build) = @_; my $drvpath = $build->drvpath; my @paths = reverse(split '\n', `nix-store -qR $drvpath`); my $depBuild; my @drvs = (); foreach my $path (@paths) { push @drvs, $path if $path =~ /\.drv$/ && $path ne $drvpath; } return unless scalar @drvs > 0; ($depBuild) = $db->resultset('Builds')->search( { drvpath => [ @drvs ], finished => 0, busy => 0, enabled => 1, disabled => 0 }, { join => ['project'], rows => 1 } ) ; return $depBuild; } sub checkBuilds { print "looking for runnable builds...\n"; my @buildsStarted; my $machines = getMachines; my %maxConcurrent = (); foreach my $machineName (keys %{$machines}) { foreach my $system (${$machines}{$machineName}{'systemTypes'}) { $maxConcurrent{$system} = (${$machines}{$machineName}{'maxJobs'} or 0) + ($maxConcurrent{$system} or 0) } } txn_do($db, sub { # Get the system types for the runnable builds. my @systemTypes = $db->resultset('Builds')->search( { finished => 0, busy => 0, enabled => 1, disabled => 0 }, { join => ['project'], select => ['system'], as => ['system'], distinct => 1 }); # For each system type, select up to the maximum number of # concurrent build for that system type. Choose the highest # priority builds first, then the oldest builds. foreach my $system (@systemTypes) { # How many builds are already currently executing for this # system type? my $nrActive = $db->resultset('Builds')->search( {finished => 0, busy => 1, system => $system->system})->count; my $extraAllowed = $maxConcurrent{$system} - $nrActive; $extraAllowed = 0 if $extraAllowed < 0; # Select the highest-priority builds to start. my @builds = $extraAllowed == 0 ? () : $db->resultset('Builds')->search( { finished => 0, busy => 0, system => $system->system, enabled => 1, disabled => 0 }, { join => ['project'], order_by => ["priority DESC", "timestamp"], rows => $extraAllowed }); print "system type `", $system->system, "': $nrActive active, $maxConcurrent{$system} allowed, ", "starting ", scalar(@builds), " builds\n"; foreach my $build (@builds) { my $depbuild = findBuildDependencyInQueue($build); $build = $depbuild if defined $depbuild; my $logfile = getcwd . "/logs/" . $build->id; mkdir(dirname $logfile); unlink($logfile); $build->update( { busy => 1 , locker => $$ , logfile => $logfile , starttime => time() }); push @buildsStarted, $build; } } }); # Actually start the builds we just selected. We need to do this # outside the transaction in case it aborts or something. foreach my $build (@buildsStarted) { my $id = $build->id; print "starting build $id (", $build->project->name, ":", $build->jobset->name, ':', $build->job->name, ") on ", $build->system, "\n"; eval { my $logfile = $build->logfile; my $child = fork(); die unless defined $child; if ($child == 0) { eval { open LOG, ">$logfile" or die "cannot create logfile $logfile"; POSIX::dup2(fileno(LOG), 1) or die; POSIX::dup2(fileno(LOG), 2) or die; exec("hydra-build", $id); }; warn "cannot start build $id: $@"; POSIX::_exit(1); } }; if ($@) { warn $@; txn_do($db, sub { $build->busy(0); $build->locker($$); $build->update; }); } } } if (scalar(@ARGV) == 1 && $ARGV[0] eq "--unlock") { unlockDeadBuilds; exit 0; } while (1) { eval { # Clean up zombies. while ((waitpid(-1, &WNOHANG)) > 0) { }; unlockDeadBuilds; checkBuilds; }; warn $@ if $@; print "sleeping...\n"; sleep(5); }