hydra-queue-runner: Improved scheduling

Each jobset now has a "scheduling share" that determines how much of
the build farm's time it is entitled to.  For instance, if a jobset
has 100 shares and the total number of shares of all jobsets is 1000,
it's entitled to 10% of the build farm's time.  When there is a free
build slot for a given system type, the queue runner will select the
jobset that is furthest below its scheduling share over a certain time
window (currently, the last day).  Withing that jobset, it will pick
the build with the highest priority.

So meta.schedulingPriority now only determines the order of builds
within a jobset, not between jobsets.  This makes it much easier to
prioritise one jobset over another (e.g. nixpkgs:trunk over
nixpkgs:stdenv).
This commit is contained in:
Eelco Dolstra 2013-09-21 14:47:52 +00:00
parent 7efe793ee6
commit 4ed877360b
10 changed files with 164 additions and 66 deletions

View file

@ -50,7 +50,9 @@ sub jobset_GET {
$c->stash->{evals} = getEvals($self, $c, scalar $c->stash->{jobset}->jobsetevals, 0, 10);
($c->stash->{latestEval}) = $c->stash->{jobset}->jobsetevals->search({}, { rows => 1, order_by => ["id desc"] });
$c->stash->{latestEval} = $c->stash->{jobset}->jobsetevals->search({}, { rows => 1, order_by => ["id desc"] })->single;
$c->stash->{totalShares} = getTotalShares($c->model('DB')->schema);
$self->status_ok(
$c,
@ -161,22 +163,22 @@ sub jobs_tab : Chained('jobsetChain') PathPart('jobs-tab') Args(0) {
my @builds = $eval->builds->search(
{ job => { ilike => $filter } },
{ columns => ['id', 'job', 'finished', 'buildstatus'] });
foreach my $b (@builds) {
my $jobName = $b->get_column('job');
$evals->{$eval->id}->{$jobName} =
{ id => $b->id, finished => $b->finished, buildstatus => $b->buildstatus };
$jobs{$jobName} = 1;
$nrBuilds++;
}
last if $nrBuilds >= 10000;
foreach my $b (@builds) {
my $jobName = $b->get_column('job');
$evals->{$eval->id}->{$jobName} =
{ id => $b->id, finished => $b->finished, buildstatus => $b->buildstatus };
$jobs{$jobName} = 1;
$nrBuilds++;
}
last if $nrBuilds >= 10000;
}
if ($c->request->params->{showInactive}) {
$c->stash->{showInactive} = 1;
foreach my $job ($c->stash->{jobset}->jobs->search({ name => { ilike => $filter } })) {
next if defined $jobs{$job->name};
$c->stash->{inactiveJobs}->{$job->name} = $jobs{$job->name} = 1;
}
$c->stash->{showInactive} = 1;
foreach my $job ($c->stash->{jobset}->jobs->search({ name => { ilike => $filter } })) {
next if defined $jobs{$job->name};
$c->stash->{inactiveJobs}->{$job->name} = $jobs{$job->name} = 1;
}
}
$c->stash->{evals} = $evals;
@ -209,6 +211,7 @@ sub edit : Chained('jobsetChain') PathPart Args(0) {
$c->stash->{template} = 'edit-jobset.tt';
$c->stash->{edit} = 1;
$c->stash->{totalShares} = getTotalShares($c->model('DB')->schema);
}
@ -287,6 +290,7 @@ sub updateJobset {
, keepnr => int(trim($c->stash->{params}->{keepnr}))
, checkinterval => int(trim($c->stash->{params}->{checkinterval}))
, triggertime => $enabled ? $jobset->triggertime // time() : undef
, schedulingshares => int($c->stash->{params}->{schedulingshares})
});
# Process the inputs of this jobset.

View file

@ -201,6 +201,7 @@ sub create_jobset : Chained('projectChain') PathPart('create-jobset') Args(0) {
$c->stash->{template} = 'edit-jobset.tt';
$c->stash->{create} = 1;
$c->stash->{edit} = 1;
$c->stash->{totalShares} = getTotalShares($c->model('DB')->schema);
}

View file

@ -20,7 +20,8 @@ our @EXPORT = qw(
getMainOutput
getEvals getMachines
pathIsInsidePrefix
captureStdoutStderr run grab);
captureStdoutStderr run grab
getTotalShares);
sub getHydraHome {
@ -533,4 +534,12 @@ sub grab {
}
sub getTotalShares {
my ($db) = @_;
return $db->resultset('Jobsets')->search(
{ 'project.enabled' => 1, 'me.enabled' => 1 },
{ join => 'project', select => { sum => 'schedulingshares' }, as => 'sum' })->single->get_column('sum');
}
1;

View file

@ -15,6 +15,18 @@ use warnings;
use base 'DBIx::Class::Core';
=head1 COMPONENTS LOADED
=over 4
=item * L<Hydra::Component::ToJSON>
=back
=cut
__PACKAGE__->load_components("+Hydra::Component::ToJSON");
=head1 TABLE: C<CachedDarcsInputs>
=cut
@ -28,11 +40,6 @@ __PACKAGE__->table("CachedDarcsInputs");
data_type: 'text'
is_nullable: 0
=head2 branch
data_type: 'text'
is_nullable: 0
=head2 revision
data_type: 'text'
@ -48,6 +55,11 @@ __PACKAGE__->table("CachedDarcsInputs");
data_type: 'text'
is_nullable: 0
=head2 revcount
data_type: 'integer'
is_nullable: 0
=cut
__PACKAGE__->add_columns(
@ -55,12 +67,12 @@ __PACKAGE__->add_columns(
{ data_type => "text", is_nullable => 0 },
"revision",
{ data_type => "text", is_nullable => 0 },
"revcount",
{ data_type => "integer", is_nullable => 0 },
"sha256hash",
{ data_type => "text", is_nullable => 0 },
"storepath",
{ data_type => "text", is_nullable => 0 },
"revcount",
{ data_type => "integer", is_nullable => 0 },
);
=head1 PRIMARY KEY
@ -69,8 +81,6 @@ __PACKAGE__->add_columns(
=item * L</uri>
=item * L</branch>
=item * L</revision>
=back
@ -80,7 +90,9 @@ __PACKAGE__->add_columns(
__PACKAGE__->set_primary_key("uri", "revision");
# Created by DBIx::Class::Schema::Loader v0.07014 @ 2011-12-05 14:15:43
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:fx3yosWMmJ+MnvL/dSWtFA
# Created by DBIx::Class::Schema::Loader v0.07033 @ 2013-09-20 11:08:50
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:Yl1slt3SAizijgu0KUTn0A
# You can replace this text with custom code or comments, and it will be preserved on regeneration
1;

View file

@ -118,6 +118,12 @@ __PACKAGE__->table("Jobsets");
default_value: 300
is_nullable: 0
=head2 schedulingshares
data_type: 'integer'
default_value: 100
is_nullable: 0
=cut
__PACKAGE__->add_columns(
@ -151,6 +157,8 @@ __PACKAGE__->add_columns(
{ data_type => "integer", default_value => 3, is_nullable => 0 },
"checkinterval",
{ data_type => "integer", default_value => 300, is_nullable => 0 },
"schedulingshares",
{ data_type => "integer", default_value => 100, is_nullable => 0 },
);
=head1 PRIMARY KEY
@ -272,7 +280,7 @@ __PACKAGE__->belongs_to(
);
# Created by DBIx::Class::Schema::Loader v0.07033 @ 2013-06-13 01:54:50
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:tsGR8MhZRIUeNwpcVczMUw
# Created by DBIx::Class::Schema::Loader v0.07033 @ 2013-09-20 12:15:23
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:pD6tGW0Ob3fuA1p0uQnBWw
1;

View file

@ -1,5 +1,6 @@
[% WRAPPER layout.tt title=(create ? "Create jobset in project $project.name" : "Editing jobset $project.name:$jobset.name") %]
[% PROCESS common.tt %]
[% USE format %]
[% BLOCK renderJobsetInputAlt %]
<button type="button" class="btn btn-warning" onclick='$(this).parents(".inputalt").remove()'><i class="icon-trash icon-white"></i></button>
@ -94,6 +95,18 @@
</div>
</div>
<div class="control-group">
<label class="control-label">Scheduling shares</label>
<div class="controls">
<div class="input-append">
<input type="number" class="span3" name="schedulingshares" [% HTML.attributes(value => jobset.schedulingshares) %]/>
</div>
[% IF totalShares %]
<span class="help-inline">([% f = format("%.2f"); f(jobset.schedulingshares / totalShares * 100) %]% out of [% totalShares %] shares)</span>
[% END %]
</div>
</div>
<div class="control-group">
<div class="controls">
<label class="checkbox">

View file

@ -1,5 +1,6 @@
[% WRAPPER layout.tt title="Jobset $project.name:$jobset.name" %]
[% PROCESS common.tt %]
[% USE format %]
[% BLOCK renderJobsetInput %]
@ -121,6 +122,10 @@
<th>Check interval:</th>
<td>[% jobset.checkinterval || "<em>disabled</em>" %]</td>
</tr>
<tr>
<th>Scheduling shares:</th>
<td>[% jobset.schedulingshares %] [% IF totalShares %] ([% f = format("%.2f"); f(jobset.schedulingshares / totalShares * 100) %]% out of [% totalShares %] shares)[% END %]</td>
</tr>
<tr>
<th>Enable email notification:</th>
<td>[% jobset.enableemail ? "Yes" : "No" %]</td>

View file

@ -28,7 +28,7 @@ sub unlockDeadBuilds {
my $pid = $build->locker;
my $unlock = 0;
if ($pid == $$) {
if (!defined $lastTime || $build->starttime < $lastTime - 300) {
if (!defined $lastTime || $build->starttime < $lastTime - 600) {
$unlock = 1;
}
} elsif (kill(0, $pid) != 1) { # see if we can signal the process
@ -70,27 +70,29 @@ sub checkBuilds {
my %maxConcurrent;
foreach my $machineName (keys %{$machines}) {
foreach my $system (${$machines}{$machineName}{'systemTypes'}) {
foreach my $system (@{${$machines}{$machineName}{'systemTypes'}}) {
$maxConcurrent{$system} = (${$machines}{$machineName}{'maxJobs'} or 0) + ($maxConcurrent{$system} or 0)
}
}
txn_do($db, sub {
# Cache scheduled by derivation path to speed up
# Cache scheduled builds by derivation path to speed up
# findBuildDependencyInQueue.
my $buildsByDrv = {};
$buildsByDrv->{$_->drvpath} = $_->id
foreach $db->resultset('Builds')->search({ finished => 0, enabled => 1 }, { join => ['project'] });
foreach $db->resultset('Builds')->search({ finished => 0 }, { join => ['project'] });
# Get the system types for the runnable builds.
my @systemTypes = $db->resultset('Builds')->search(
{ finished => 0, busy => 0, enabled => 1 },
{ finished => 0, busy => 0 },
{ join => ['project'], select => ['system'], as => ['system'], distinct => 1 });
# Get the total number of scheduling shares.
my $totalShares = getTotalShares($db);
# For each system type, select up to the maximum number of
# concurrent build for that system type. Choose the highest
# priority builds first, then the oldest builds.
# concurrent build for that system type.
foreach my $system (@systemTypes) {
# How many builds are already currently executing for this
# system type?
@ -101,42 +103,84 @@ sub checkBuilds {
my $max = defined $systemTypeInfo ? $systemTypeInfo->maxconcurrent : $maxConcurrent{$system->system} // 2;
my $extraAllowed = $max - $nrActive;
$extraAllowed = 0 if $extraAllowed < 0;
next if $extraAllowed <= 0;
# Select the highest-priority builds to start.
my @builds = $extraAllowed == 0 ? () : $db->resultset('Builds')->search(
{ finished => 0, busy => 0, system => $system->system, enabled => 1 },
{ join => ['project'], order_by => ["priority DESC", "id"] });
print STDERR "starting at most $extraAllowed builds for system ${\$system->system}\n";
my $started = 0;
foreach my $build (@builds) {
# Find a dependency of $build that has no queued
# dependencies itself. This isn't strictly necessary,
# but it ensures that Nix builds are done as part of
# their corresponding Hydra builds, rather than as a
# dependency of some other Hydra build.
while (my $dep = findBuildDependencyInQueue($buildsByDrv, $build)) {
$build = $dep;
j: while ($extraAllowed-- > 0) {
my @runnableJobsets = $db->resultset('Builds')->search(
{ finished => 0, busy => 0, system => $system->system },
{ select => ['project', 'jobset'], distinct => 1 });
next if @runnableJobsets == 0;
my $windowSize = 24 * 3600;
my $totalWindowSize = $windowSize * $max;
my @res;
foreach my $b (@runnableJobsets) {
my $jobset = $db->resultset('Jobsets')->find($b->get_column('project'), $b->get_column('jobset')) or die;
my $duration = $jobset->builds->search(
{ },
{ where => \ ("(finished = 0 or (me.stoptime >= " . (time() - $windowSize) . "))")
, join => 'buildsteps'
, select => \ "sum(coalesce(buildsteps.stoptime, ${\time}) - buildsteps.starttime)"
, as => "sum" })->single->get_column("sum") // 0;
# Add a 30s penalty for each started build. This
# is to account for jobsets that have running
# builds but no build steps yet.
$duration += $jobset->builds->search({ finished => 0, busy => 1 })->count * 30;
my $share = $jobset->schedulingshares;
my $delta = ($share / $totalShares) - ($duration / $totalWindowSize);
#printf STDERR "%s:%s: %d s, %.3f%%, allowance = %.3f%%\n", $jobset->get_column('project'), $jobset->name, $duration, $duration / $totalWindowSize, $delta;
push @res, { jobset => $jobset, delta => $delta };
}
next if $build->busy;
my $logfile = getcwd . "/logs/" . $build->id;
mkdir(dirname $logfile);
unlink($logfile);
$build->update(
{ busy => 1
, locker => $$
, logfile => $logfile
, starttime => time()
});
push @buildsStarted, $build;
foreach my $r (sort { $b->{delta} <=> $a->{delta} } @res) {
my $jobset = $r->{jobset};
#print STDERR "selected ", $jobset->get_column('project'), ':', $jobset->name, "\n";
last if ++$started >= $extraAllowed;
}
# Select the highest-priority build for this jobset.
my @builds = $jobset->builds->search(
{ finished => 0, busy => 0, system => $system->system },
{ order_by => ["priority DESC", "id"] });
if ($started > 0) {
print STDERR "system type `", $system->system,
"': $nrActive active, $max allowed, started $started builds\n";
foreach my $build (@builds) {
# Find a dependency of $build that has no queued
# dependencies itself. This isn't strictly necessary,
# but it ensures that Nix builds are done as part of
# their corresponding Hydra builds, rather than as a
# dependency of some other Hydra build.
while (my $dep = findBuildDependencyInQueue($buildsByDrv, $build)) {
$build = $dep;
}
next if $build->busy;
printf STDERR "starting build %d (%s:%s:%s) on %s (jobset allowance = %.3f%%)\n",
$build->id, $build->project->name, $build->jobset->name, $build->job->name, $build->system, $r->{delta};
my $logfile = getcwd . "/logs/" . $build->id;
mkdir(dirname $logfile);
unlink($logfile);
$build->update(
{ busy => 1
, locker => $$
, logfile => $logfile
, starttime => time()
});
push @buildsStarted, $build;
next j;
}
}
last; # nothing found, give up on this system type
}
}
});
@ -145,7 +189,6 @@ sub checkBuilds {
# outside the transaction in case it aborts or something.
foreach my $build (@buildsStarted) {
my $id = $build->id;
print "starting build $id (", $build->project->name, ":", $build->jobset->name, ':', $build->job->name, ") on ", $build->system, "\n";
eval {
my $logfile = $build->logfile;
my $child = fork();

View file

@ -61,6 +61,7 @@ create table Jobsets (
emailOverride text not null,
keepnr integer not null default 3,
checkInterval integer not null default 300, -- minimum time in seconds between polls (0 = disable polling)
schedulingShares integer not null default 100,
primary key (project, name),
foreign key (project) references Projects(name) on delete cascade on update cascade
#ifdef SQLITE

2
src/sql/upgrade-21.sql Normal file
View file

@ -0,0 +1,2 @@
alter table Jobsets
add column schedulingShares integer not null default 100;