From e32ee3d5b9b756c2c49ad0f2c44eb2de5c0e31d8 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 22 Jun 2015 15:43:15 +0200 Subject: [PATCH] Remove hydra-build and the old hydra-queue-runner --- doc/manual/installation.xml | 9 +- src/script/Makefile.am | 2 - src/script/hydra-build | 385 ---------------------------- src/script/hydra-eval-guile-jobs.in | 2 +- src/script/hydra-queue-runner | 279 -------------------- 5 files changed, 3 insertions(+), 674 deletions(-) delete mode 100755 src/script/hydra-build delete mode 100755 src/script/hydra-queue-runner diff --git a/doc/manual/installation.xml b/doc/manual/installation.xml index fb7ef5ce..64af9374 100644 --- a/doc/manual/installation.xml +++ b/doc/manual/installation.xml @@ -100,13 +100,8 @@ nix-env -i hydra - Command completion should reveal a number of command-line tools from Hydra: - - -hydra-build hydra-init hydra-update-gc-roots -hydra-eval-jobs hydra-queue-runner -hydra-evaluator hydra-server - + Command completion should reveal a number of command-line tools + from Hydra, such as hydra-queue-runner. diff --git a/src/script/Makefile.am b/src/script/Makefile.am index c05c9a12..cfdeea8c 100644 --- a/src/script/Makefile.am +++ b/src/script/Makefile.am @@ -4,9 +4,7 @@ EXTRA_DIST = \ distributable_scripts = \ hydra-init \ - hydra-build \ hydra-evaluator \ - hydra-queue-runner \ hydra-server \ hydra-update-gc-roots \ hydra-s3-backup-collect-garbage \ diff --git a/src/script/hydra-build b/src/script/hydra-build deleted file mode 100755 index 7013c3a4..00000000 --- a/src/script/hydra-build +++ /dev/null @@ -1,385 +0,0 @@ -#! /var/run/current-system/sw/bin/perl - -use strict; -use List::MoreUtils qw(all); -use File::Basename; -use File::stat; -use Nix::Store; -use Hydra::Plugin; -use Hydra::Schema; -use Hydra::Helper::Nix; -use Hydra::Helper::PluginHooks; -use Hydra::Model::DB; -use Hydra::Helper::AddBuilds; -use Set::Scalar; - -STDOUT->autoflush(); - -my $db = Hydra::Model::DB->new(); - -my $config = getHydraConfig(); - -my @plugins = Hydra::Plugin->instantiate(db => $db, config => $config); - - -sub addBuildStepOutputs { - my ($step) = @_; - my $drv = derivationFromPath($step->drvpath); - $step->buildstepoutputs->create({ name => $_, path => $drv->{outputs}->{$_} }) - foreach keys %{$drv->{outputs}}; -} - - -sub nextFreeStepNr { - my ($build) = @_; - my $max = $build->buildsteps->find( - {}, {select => {max => 'stepnr + 1'}, as => ['max']}); - return (defined $max && defined $max->get_column('max')) ? $max->get_column('max') : 1; -} - - -sub failDependents { - my ($drvPath, $status, $errorMsg, $dependents, $startTime, $stopTime, $machine, $propagatedFrom) = @_; - - # Get the referrer closure of $drvPath. - my $dependentDrvs = Set::Scalar->new(computeFSClosure(1, 0, $drvPath)); - - my $time = time(); - - txn_do($db, sub { - - my @dependentBuilds = $db->resultset('Builds')->search( - { finished => 0, busy => 0 }, - { columns => ["id", "project", "jobset", "job", "drvpath", "finished", "busy"] }); - - for my $d (@dependentBuilds) { - next unless $dependentDrvs->has($d->drvpath); - print STDERR "failing dependent build ", $d->id, " of ", $d->project->name, ":", $d->jobset->name, ":", $d->job->name, "\n"; - $d->update( - { finished => 1 - , logfile => '' - , iscachedbuild => 0 - , buildstatus => $drvPath eq $d->drvpath ? 1 : 2 - , starttime => $time - , stoptime => $time - , errormsg => undef - }); - - my $step = $d->buildsteps->create( - { stepnr => nextFreeStepNr($d) - , type => 0 # = build - , drvpath => $drvPath - , busy => 0 - , status => $status - , starttime => $startTime - , stoptime => $stopTime - , errormsg => $errorMsg - , machine => $machine - , propagatedfrom => $propagatedFrom->id - }); - addBuildStepOutputs($step); - - push @$dependents, $d; - } - - }); -} - - -sub doBuild { - my ($build) = @_; - - my %outputs; - $outputs{$_->name} = $_->path foreach $build->buildoutputs->all; - - my $drvPath = $build->drvpath; - my $maxsilent = $build->maxsilent; - my $timeout = $build->timeout; - - my $isCachedBuild = 1; - my $outputCreated = 1; # i.e., the Nix build succeeded (but it could be a positive failure) - my $startTime = time(); - my $stopTime = undef; - - my $buildStatus = 0; # = succeeded - - my $errormsg = undef; - - my $dependents = []; - - if (!isValidPath($drvPath)) { - $buildStatus = 3; - $errormsg = "derivation was garbage-collected prior to build"; - goto done; - } - - unless (all { isValidPath($_) } values(%outputs)) { - $isCachedBuild = 0; - - # Do the build. - my $thisBuildFailed = 0; - my $someBuildFailed = 0; - - # Run Nix to perform the build, and monitor the stderr output - # to get notifications about specific build steps, the - # associated log files, etc. - my $cmd = "nix-store --realise $drvPath " . - "--timeout $timeout " . - "--max-silent-time $maxsilent " . - "--option build-max-log-size 67108864 " . - "--option print-missing false " . - "--keep-going --fallback " . - "--no-build-output --log-type flat --print-build-trace " . - "--add-root " . gcRootFor($outputs{out} // $outputs{(sort keys %outputs)[0]}) . " 2>&1"; - - my $buildStepNr = nextFreeStepNr($build); - my %buildSteps; - - open OUT, "$cmd |" or die; - - while () { - $errormsg .= $_; - - unless (/^@\s+/) { - print STDERR "$_"; - next; - } - - if (/^@\s+build-started\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)$/) { - my $drvPathStep = $1; - txn_do($db, sub { - my $step = $build->buildsteps->create( - { stepnr => ($buildSteps{$drvPathStep} = $buildStepNr++) - , type => 0 # = build - , drvpath => $drvPathStep - , system => $3 - , busy => 1 - , starttime => time - }); - addBuildStepOutputs($step); - }); - } - - elsif (/^@\s+build-remote\s+(\S+)\s+(\S+)$/) { - my $drvPathStep = $1; - my $machine = $2; - txn_do($db, sub { - my $step = $build->buildsteps->find({stepnr => $buildSteps{$drvPathStep}}) or die; - $step->update({machine => $machine}); - }); - } - - elsif (/^@\s+build-remote-start\s+(\S+)\s+/) { - my $drvPathStep = $1; - txn_do($db, sub { - my $step = $build->buildsteps->find({stepnr => $buildSteps{$drvPathStep}}) or die; - $step->update({starttime => time}); - }); - } - - elsif (/^@\s+build-remote-done\s+(\S+)\s+/) { - my $drvPathStep = $1; - txn_do($db, sub { - my $step = $build->buildsteps->find({stepnr => $buildSteps{$drvPathStep}}) or die; - $step->update({stoptime => time}); - }); - } - - elsif (/^@\s+build-succeeded\s+(\S+)\s+(\S+)$/) { - my $drvPathStep = $1; - txn_do($db, sub { - my $step = $build->buildsteps->find({stepnr => $buildSteps{$drvPathStep}}) or die; - $step->update({busy => 0, status => 0, stoptime => time}); - $step->update({stoptime => time}) unless defined $step->update; - }); - } - - elsif (/^@\s+build-failed\s+(\S+)\s+(\S+)\s+(\S+)\s+(.*)$/) { - my $drvPathStep = $1; - $someBuildFailed = 1; - $thisBuildFailed = 1 if $drvPath eq $drvPathStep; - my $errorMsg; - my $status = 1; - if ($3 eq "cached") { - $status = 8; - } elsif ($3 eq "timeout") { - $status = 7; - } else { - $errorMsg = $4; - } - my $now = time; - my $stepStartTime = $now; - my $stepStopTime = $now; - my $machine = ""; - txn_do($db, sub { - if ($buildSteps{$drvPathStep}) { - my $step = $build->buildsteps->find({stepnr => $buildSteps{$drvPathStep}}) or die; - $stepStartTime = $step->starttime; - $stepStopTime = $now; - $machine = $step->machine; - $step->update({busy => 0, status => $status, errormsg => $errorMsg, stoptime => $now}); - } - # Don't write a record if this derivation already - # failed previously. This can happen if this is a - # restarted build. - elsif (scalar $build->buildsteps->search({drvpath => $drvPathStep, type => 0, busy => 0, status => 1}) == 0) { - my $step = $build->buildsteps->create( - { stepnr => ($buildSteps{$drvPathStep} = $buildStepNr++) - , type => 0 # = build - , drvpath => $drvPathStep - , busy => 0 - , status => $status - , starttime => $now - , stoptime => $now - , errormsg => $errorMsg - }); - addBuildStepOutputs($step); - } - }); - - # Immediately fail all builds that depend on this derivation. - failDependents($drvPathStep, $status, $errorMsg, $dependents, $stepStartTime, $stepStopTime, $machine, $build); - } - - elsif (/^@\s+substituter-started\s+(\S+)\s+(\S+)$/) { - my $path = $1; - txn_do($db, sub { - my $step = $build->buildsteps->create( - { stepnr => ($buildSteps{$path} = $buildStepNr++) - , type => 1 # = substitution - , busy => 1 - , starttime => time - }); - # "out" is kinda fake (substitutions don't have named outputs). - $step->buildstepoutputs->create({ name => "out", path => $path }); - }); - } - - elsif (/^@\s+substituter-succeeded\s+(\S+)$/) { - my $path = $1; - txn_do($db, sub { - my $step = $build->buildsteps->find({stepnr => $buildSteps{$path}}) or die; - $step->update({busy => 0, status => 0, stoptime => time}); - }); - } - - elsif (/^@\s+substituter-failed\s+(\S+)\s+(\S+)\s+(\S+)$/) { - my $path = $1; - txn_do($db, sub { - my $step = $build->buildsteps->find({stepnr => $buildSteps{$path}}) or die; - $step->update({busy => 0, status => 1, errormsg => $3, stoptime => time}); - }); - } - - else { - print STDERR "unknown Nix trace message: $_"; - } - } - - close OUT; - - my $res = $?; - - $stopTime = time(); - - if ($res != 0) { - if ($thisBuildFailed) { $buildStatus = 1; } - elsif ($someBuildFailed) { $buildStatus = 2; } - else { $buildStatus = 3; } - } - - # Only store the output of running Nix if we have a miscellaneous error. - $errormsg = undef unless $buildStatus == 3; - } - - done: - - txn_do($db, sub { - if ($buildStatus == 0) { - - my $size = 0; - my $closureSize = 0; - my $releaseName; - - my @closure = computeFSClosure(0, 0, values %outputs); - foreach my $path (@closure) { - my ($deriver, $hash, $time, $narSize, $refs) = queryPathInfo($path, 0); - $closureSize += $narSize; - $size += $narSize if grep { $path eq $_ } values(%outputs); - } - - foreach my $path (values %outputs) { - $buildStatus = 6 if $buildStatus == 0 && -f "$path/nix-support/failed"; - $releaseName //= getReleaseName($path); - } - - $build->update( - { releasename => $releaseName - , size => $size - , closuresize => $closureSize - }); - - addBuildProducts($db, $build); - } - - # Mark any remaining active build steps as aborted. - $build->buildsteps->search({ busy => 1 })->update({ busy => 0, status => 4, stoptime => time }); - - $build->update( - { finished => 1 - , busy => 0 - , locker => '' - , logfile => '' - , iscachedbuild => $isCachedBuild - , buildstatus => $buildStatus - , starttime => $startTime - , stoptime => $stopTime // time() - , errormsg => $errormsg - }); - - }); - - notifyBuildFinished(\@plugins, $build, $dependents); -} - - -my $buildId = $ARGV[0] or die "syntax: $0 BUILD-ID\n"; -print STDERR "performing build $buildId\n"; - -if ($ENV{'HYDRA_MAIL_TEST'}) { - my $build = $db->resultset('Builds')->find($buildId); - notifyBuildFinished(\@plugins, $build, []); - exit 0; -} - -# Lock the build. If necessary, steal the lock from the parent -# process (runner.pl). This is so that if the runner dies, the -# children (i.e. the build.pl instances) can continue to run and won't -# have the lock taken away. -my $build; -txn_do($db, sub { - $build = $db->resultset('Builds')->find($buildId); - die "build $buildId doesn't exist\n" unless defined $build; - die "build $buildId already done\n" if $build->finished; - if ($build->busy != 0 && $build->locker != getppid) { - die "build $buildId is already being built"; - } - $build->update({busy => 1, locker => $$}); - $build->buildsteps->search({busy => 1})->delete; - $build->buildproducts->delete; -}); - -die unless $build; - -# Do the build. If it throws an error, unlock the build so that it -# can be retried. -eval { - doBuild $build; - print "done\n"; -}; -if ($@) { - warn $@; - txn_do($db, sub { - $build->update({busy => 0, locker => $$}); - }); -} diff --git a/src/script/hydra-eval-guile-jobs.in b/src/script/hydra-eval-guile-jobs.in index 70550db7..8c5df125 100644 --- a/src/script/hydra-eval-guile-jobs.in +++ b/src/script/hydra-eval-guile-jobs.in @@ -104,7 +104,7 @@ symbol/thunk pairs." (when gc-roots-dir ;; Register DRV as a GC root so that it's not collected by - ;; the time 'hydra-build' attempts to build it. + ;; the time 'hydra-queue-runner' attempts to build it. (register-gc-root drv gc-roots-dir)) ;; XXX: Add tags? diff --git a/src/script/hydra-queue-runner b/src/script/hydra-queue-runner deleted file mode 100755 index 4caae3c8..00000000 --- a/src/script/hydra-queue-runner +++ /dev/null @@ -1,279 +0,0 @@ -#! /var/run/current-system/sw/bin/perl - -use strict; -use Cwd; -use File::Basename; -use POSIX qw(dup2 :sys_wait_h); -use Hydra::Schema; -use Hydra::Helper::Nix; -use Hydra::Model::DB; -use IO::Handle; -use Nix::Store; -use Set::Scalar; - -chdir Hydra::Model::DB::getHydraPath or die; -my $db = Hydra::Model::DB->new(); - -STDOUT->autoflush(); - -my $lastTime; - -#$SIG{CHLD} = 'IGNORE'; - - -sub unlockDeadBuilds { - # Unlock builds whose building process has died. - txn_do($db, sub { - my @builds = $db->resultset('Builds')->search({finished => 0, busy => 1}); - foreach my $build (@builds) { - my $pid = $build->locker; - my $unlock = 0; - if ($pid == $$) { - if (!defined $lastTime || $build->starttime < $lastTime - 300) { - $unlock = 1; - } - } elsif (kill(0, $pid) != 1) { # see if we can signal the process - $unlock = 1; - } - if ($unlock) { - print "build ", $build->id, " pid $pid died, unlocking\n"; - $build->update({ busy => 0, locker => "" }); - $build->buildsteps->search({ busy => 1 })->update({ busy => 0, status => 4, stoptime => time }); - } - } - }); -} - - -# Given a build, return an arbitrary queued build on which this build -# depends; or undef if no such build exists. -sub findBuildDependencyInQueue { - my ($buildsByDrv, $build) = @_; - return undef unless isValidPath($build->drvpath); - my @deps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(0, 0, $build->drvpath); - return unless scalar @deps > 0; - foreach my $d (@deps) { - my $bs = $buildsByDrv->{$d}; - next unless defined $bs; - return $db->resultset('Builds')->find((@$bs)[0]); - } - return undef; -} - - -sub blockBuilds { - my ($buildsByDrv, $blockedBuilds, $build) = @_; - my @rdeps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(1, 0, $build->drvpath); - foreach my $drv (@rdeps) { - my $bs = $buildsByDrv->{$drv}; - next if !defined $bs; - $blockedBuilds->insert($_) foreach @$bs; - } -} - - -sub checkBuilds { - # print "looking for runnable builds...\n"; - - my @buildsStarted; - - my $machines = getMachines; - - my %maxConcurrent; - - foreach my $machineName (keys %{$machines}) { - foreach my $system (@{${$machines}{$machineName}{'systemTypes'}}) { - $maxConcurrent{$system} = (${$machines}{$machineName}{'maxJobs'} or 0) + ($maxConcurrent{$system} or 0) - } - } - - txn_do($db, sub { - - # Cache scheduled builds by derivation path to speed up - # findBuildDependencyInQueue. - my $buildsByDrv = {}; - push @{$buildsByDrv->{$_->drvpath}}, $_->id - foreach $db->resultset('Builds')->search({ finished => 0 }); - - # Builds in the queue of which a dependency is already building. - my $blockedBuilds = Set::Scalar->new(); - blockBuilds($buildsByDrv, $blockedBuilds, $_) - foreach $db->resultset('Builds')->search({ finished => 0, busy => 1 }); - - # Get the system types for the runnable builds. - my @systemTypes = $db->resultset('Builds')->search( - { finished => 0, busy => 0 }, - { join => ['project'], select => ['system'], as => ['system'], distinct => 1 }); - - # Get the total number of scheduling shares. - my $totalShares = getTotalShares($db) || 1; - - # For each system type, select up to the maximum number of - # concurrent build for that system type. - foreach my $system (@systemTypes) { - # How many builds are already currently executing for this - # system type? - my $nrActive = $db->resultset('Builds')->search( - {finished => 0, busy => 1, system => $system->system})->count; - - (my $systemTypeInfo) = $db->resultset('SystemTypes')->search({system => $system->system}); - my $max = defined $systemTypeInfo ? $systemTypeInfo->maxconcurrent : $maxConcurrent{$system->system} // 2; - - my $extraAllowed = $max - $nrActive; - next if $extraAllowed <= 0; - - print STDERR "starting at most $extraAllowed builds for system ${\$system->system}\n"; - - my $timeSpentPerJobset; - - j: while ($extraAllowed-- > 0) { - - my @runnableJobsets = $db->resultset('Builds')->search( - { finished => 0, busy => 0, system => $system->system }, - { select => ['project', 'jobset'], distinct => 1 }); - - next if @runnableJobsets == 0; - - my $windowSize = 24 * 3600; - my $costPerBuild = 30; - my $totalWindowSize = $windowSize * $max; - - my @res; - - foreach my $b (@runnableJobsets) { - my $jobset = $db->resultset('Jobsets')->find($b->get_column('project'), $b->get_column('jobset')) or die; - - my $timeSpent = $timeSpentPerJobset->{$b->get_column('project')}->{$b->get_column('jobset')}; - - if (!defined $timeSpent) { - $timeSpent = $jobset->builds->search( - { }, - { where => \ ("(finished = 0)") - , join => 'buildsteps' - , select => \ "sum(coalesce(buildsteps.stoptime, ${\time}) - buildsteps.starttime)" - , as => "sum" })->single->get_column("sum") // 0; - - $timeSpent += $jobset->builds->search( - { }, - { where => \ ("(me.stoptime >= " . (time() - $windowSize) . ")") - , join => 'buildsteps' - , select => \ "sum(coalesce(buildsteps.stoptime, ${\time}) - buildsteps.starttime)" - , as => "sum" })->single->get_column("sum") // 0; - - # Add a 30s penalty for each started build. This - # is to account for jobsets that have running - # builds but no build steps yet. - $timeSpent += $jobset->builds->search({ finished => 0, busy => 1 })->count * $costPerBuild; - - $timeSpentPerJobset->{$b->get_column('project')}->{$b->get_column('jobset')} = $timeSpent; - } - - my $share = $jobset->schedulingshares || 1; # prevent division by zero - my $used = $timeSpent / ($totalWindowSize * ($share / $totalShares)); - - #printf STDERR "%s:%s: %d s, total used = %.2f%%, share used = %.2f%%\n", $jobset->get_column('project'), $jobset->name, $timeSpent, $timeSpent / $totalWindowSize * 100, $used * 100; - - push @res, { jobset => $jobset, used => $used }; - } - - foreach my $r (sort { $a->{used} <=> $b->{used} } @res) { - my $jobset = $r->{jobset}; - #print STDERR "selected ", $jobset->get_column('project'), ':', $jobset->name, "\n"; - - # Select the highest-priority build for this jobset. - my @builds = $jobset->builds->search( - { finished => 0, busy => 0, system => $system->system }, - { order_by => ["priority DESC", "id"] }); - - foreach my $build (@builds) { - next if $blockedBuilds->has($build->id); - - # Find a dependency of $build that has no queued - # dependencies itself. This isn't strictly necessary, - # but it ensures that Nix builds are done as part of - # their corresponding Hydra builds, rather than as a - # dependency of some other Hydra build. - while (my $dep = findBuildDependencyInQueue($buildsByDrv, $build)) { - $build = $dep; - } - next if $build->busy; - - printf STDERR "starting build %d (%s:%s:%s) on %s; jobset at %.2f%% of its share\n", - $build->id, $build->project->name, $build->jobset->name, $build->job->name, $build->system, $r->{used} * 100; - - my $logfile = getcwd . "/logs/" . $build->id; - mkdir(dirname $logfile); - unlink($logfile); - $build->update( - { busy => 1 - , locker => $$ - , logfile => $logfile - }); - push @buildsStarted, $build; - - $timeSpentPerJobset->{$jobset->get_column('project')}->{$jobset->name} += $costPerBuild; - - blockBuilds($buildsByDrv, $blockedBuilds, $build); - - next j; - } - } - - last; # nothing found, give up on this system type - } - } - - $lastTime = time(); - - $_->update({ starttime => time() }) foreach @buildsStarted; - }); - - # Actually start the builds we just selected. We need to do this - # outside the transaction in case it aborts or something. - foreach my $build (@buildsStarted) { - my $id = $build->id; - eval { - my $logfile = $build->logfile; - my $child = fork(); - die unless defined $child; - if ($child == 0) { - eval { - open LOG, ">$logfile" or die "cannot create logfile $logfile"; - POSIX::dup2(fileno(LOG), 1) or die; - POSIX::dup2(fileno(LOG), 2) or die; - exec("hydra-build", $id); - }; - warn "cannot start build $id: $@"; - POSIX::_exit(1); - } - }; - if ($@) { - warn $@; - txn_do($db, sub { - $build->update({ busy => 0, locker => $$ }); - }); - } - } -} - - -if (scalar(@ARGV) == 1 && $ARGV[0] eq "--unlock") { - unlockDeadBuilds; - exit 0; -} - - -while (1) { - eval { - # Clean up zombies. - while ((waitpid(-1, &WNOHANG)) > 0) { }; - - unlockDeadBuilds; - - checkBuilds; - }; - warn $@ if $@; - - # print "sleeping...\n"; - sleep(5); -}