#! /var/run/current-system/sw/bin/perl -w use strict; use Cwd; use File::Basename; use POSIX qw(dup2 :sys_wait_h); use Hydra::Schema; use Hydra::Helper::Nix; use Hydra::Model::DB; use IO::Handle; use Nix::Store; chdir Hydra::Model::DB::getHydraPath or die; my $db = Hydra::Model::DB->new(); STDOUT->autoflush(); my $lastTime; #$SIG{CHLD} = 'IGNORE'; sub unlockDeadBuilds { # Unlock builds whose building process has died. txn_do($db, sub { my @builds = $db->resultset('Builds')->search({finished => 0, busy => 1}); foreach my $build (@builds) { my $pid = $build->locker; my $unlock = 0; if ($pid == $$) { if (!defined $lastTime || $build->starttime < $lastTime - 600) { $unlock = 1; } } elsif (kill(0, $pid) != 1) { # see if we can signal the process $unlock = 1; } if ($unlock) { print "build ", $build->id, " pid $pid died, unlocking\n"; $build->update({ busy => 0, locker => ""}); $build->buildsteps->search({ busy => 1 })->update({ busy => 0, status => 4, stoptime => time }); } } }); } # Given a build, return an arbitrary queued build on which this build # depends; or undef if no such build exists. sub findBuildDependencyInQueue { my ($buildsByDrv, $build) = @_; return undef unless isValidPath($build->drvpath); my @deps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(0, 0, $build->drvpath); return unless scalar @deps > 0; foreach my $d (@deps) { my $b = $buildsByDrv->{$d}; next unless defined $b; return $db->resultset('Builds')->find($b); } return undef; } sub checkBuilds { # print "looking for runnable builds...\n"; my @buildsStarted; my $machines = getMachines; my %maxConcurrent; foreach my $machineName (keys %{$machines}) { foreach my $system (@{${$machines}{$machineName}{'systemTypes'}}) { $maxConcurrent{$system} = (${$machines}{$machineName}{'maxJobs'} or 0) + ($maxConcurrent{$system} or 0) } } txn_do($db, sub { # Cache scheduled builds by derivation path to speed up # findBuildDependencyInQueue. my $buildsByDrv = {}; $buildsByDrv->{$_->drvpath} = $_->id foreach $db->resultset('Builds')->search({ finished => 0 }, { join => ['project'] }); # Get the system types for the runnable builds. my @systemTypes = $db->resultset('Builds')->search( { finished => 0, busy => 0 }, { join => ['project'], select => ['system'], as => ['system'], distinct => 1 }); # Get the total number of scheduling shares. my $totalShares = getTotalShares($db); # For each system type, select up to the maximum number of # concurrent build for that system type. foreach my $system (@systemTypes) { # How many builds are already currently executing for this # system type? my $nrActive = $db->resultset('Builds')->search( {finished => 0, busy => 1, system => $system->system})->count; (my $systemTypeInfo) = $db->resultset('SystemTypes')->search({system => $system->system}); my $max = defined $systemTypeInfo ? $systemTypeInfo->maxconcurrent : $maxConcurrent{$system->system} // 2; my $extraAllowed = $max - $nrActive; next if $extraAllowed <= 0; print STDERR "starting at most $extraAllowed builds for system ${\$system->system}\n"; j: while ($extraAllowed-- > 0) { my @runnableJobsets = $db->resultset('Builds')->search( { finished => 0, busy => 0, system => $system->system }, { select => ['project', 'jobset'], distinct => 1 }); next if @runnableJobsets == 0; my $windowSize = 24 * 3600; my $totalWindowSize = $windowSize * $max; my @res; foreach my $b (@runnableJobsets) { my $jobset = $db->resultset('Jobsets')->find($b->get_column('project'), $b->get_column('jobset')) or die; my $duration = $jobset->builds->search( { }, { where => \ ("(finished = 0 or (me.stoptime >= " . (time() - $windowSize) . "))") , join => 'buildsteps' , select => \ "sum(coalesce(buildsteps.stoptime, ${\time}) - buildsteps.starttime)" , as => "sum" })->single->get_column("sum") // 0; # Add a 30s penalty for each started build. This # is to account for jobsets that have running # builds but no build steps yet. $duration += $jobset->builds->search({ finished => 0, busy => 1 })->count * 30; my $share = $jobset->schedulingshares; my $delta = ($share / $totalShares) - ($duration / $totalWindowSize); #printf STDERR "%s:%s: %d s, %.3f%%, allowance = %.3f%%\n", $jobset->get_column('project'), $jobset->name, $duration, $duration / $totalWindowSize, $delta; push @res, { jobset => $jobset, delta => $delta }; } foreach my $r (sort { $b->{delta} <=> $a->{delta} } @res) { my $jobset = $r->{jobset}; #print STDERR "selected ", $jobset->get_column('project'), ':', $jobset->name, "\n"; # Select the highest-priority build for this jobset. my @builds = $jobset->builds->search( { finished => 0, busy => 0, system => $system->system }, { order_by => ["priority DESC", "id"] }); foreach my $build (@builds) { # Find a dependency of $build that has no queued # dependencies itself. This isn't strictly necessary, # but it ensures that Nix builds are done as part of # their corresponding Hydra builds, rather than as a # dependency of some other Hydra build. while (my $dep = findBuildDependencyInQueue($buildsByDrv, $build)) { $build = $dep; } next if $build->busy; printf STDERR "starting build %d (%s:%s:%s) on %s (jobset allowance = %.3f%%)\n", $build->id, $build->project->name, $build->jobset->name, $build->job->name, $build->system, $r->{delta}; my $logfile = getcwd . "/logs/" . $build->id; mkdir(dirname $logfile); unlink($logfile); $build->update( { busy => 1 , locker => $$ , logfile => $logfile , starttime => time() }); push @buildsStarted, $build; next j; } } last; # nothing found, give up on this system type } } }); # Actually start the builds we just selected. We need to do this # outside the transaction in case it aborts or something. foreach my $build (@buildsStarted) { my $id = $build->id; eval { my $logfile = $build->logfile; my $child = fork(); die unless defined $child; if ($child == 0) { eval { open LOG, ">$logfile" or die "cannot create logfile $logfile"; POSIX::dup2(fileno(LOG), 1) or die; POSIX::dup2(fileno(LOG), 2) or die; exec("hydra-build", $id); }; warn "cannot start build $id: $@"; POSIX::_exit(1); } }; if ($@) { warn $@; txn_do($db, sub { $build->busy(0); $build->locker($$); $build->update; }); } } } if (scalar(@ARGV) == 1 && $ARGV[0] eq "--unlock") { unlockDeadBuilds; exit 0; } while (1) { eval { # Clean up zombies. while ((waitpid(-1, &WNOHANG)) > 0) { }; unlockDeadBuilds; checkBuilds; $lastTime = time(); }; warn $@ if $@; # print "sleeping...\n"; sleep(5); }