hydra-queue-runner: Improved scheduling

Each jobset now has a "scheduling share" that determines how much of
the build farm's time it is entitled to.  For instance, if a jobset
has 100 shares and the total number of shares of all jobsets is 1000,
it's entitled to 10% of the build farm's time.  When there is a free
build slot for a given system type, the queue runner will select the
jobset that is furthest below its scheduling share over a certain time
window (currently, the last day).  Withing that jobset, it will pick
the build with the highest priority.

So meta.schedulingPriority now only determines the order of builds
within a jobset, not between jobsets.  This makes it much easier to
prioritise one jobset over another (e.g. nixpkgs:trunk over
nixpkgs:stdenv).
This commit is contained in:
Eelco Dolstra 2013-09-21 14:47:52 +00:00
parent 7efe793ee6
commit 4ed877360b
10 changed files with 164 additions and 66 deletions

View file

@ -50,7 +50,9 @@ sub jobset_GET {
$c->stash->{evals} = getEvals($self, $c, scalar $c->stash->{jobset}->jobsetevals, 0, 10); $c->stash->{evals} = getEvals($self, $c, scalar $c->stash->{jobset}->jobsetevals, 0, 10);
($c->stash->{latestEval}) = $c->stash->{jobset}->jobsetevals->search({}, { rows => 1, order_by => ["id desc"] }); $c->stash->{latestEval} = $c->stash->{jobset}->jobsetevals->search({}, { rows => 1, order_by => ["id desc"] })->single;
$c->stash->{totalShares} = getTotalShares($c->model('DB')->schema);
$self->status_ok( $self->status_ok(
$c, $c,
@ -161,22 +163,22 @@ sub jobs_tab : Chained('jobsetChain') PathPart('jobs-tab') Args(0) {
my @builds = $eval->builds->search( my @builds = $eval->builds->search(
{ job => { ilike => $filter } }, { job => { ilike => $filter } },
{ columns => ['id', 'job', 'finished', 'buildstatus'] }); { columns => ['id', 'job', 'finished', 'buildstatus'] });
foreach my $b (@builds) { foreach my $b (@builds) {
my $jobName = $b->get_column('job'); my $jobName = $b->get_column('job');
$evals->{$eval->id}->{$jobName} = $evals->{$eval->id}->{$jobName} =
{ id => $b->id, finished => $b->finished, buildstatus => $b->buildstatus }; { id => $b->id, finished => $b->finished, buildstatus => $b->buildstatus };
$jobs{$jobName} = 1; $jobs{$jobName} = 1;
$nrBuilds++; $nrBuilds++;
} }
last if $nrBuilds >= 10000; last if $nrBuilds >= 10000;
} }
if ($c->request->params->{showInactive}) { if ($c->request->params->{showInactive}) {
$c->stash->{showInactive} = 1; $c->stash->{showInactive} = 1;
foreach my $job ($c->stash->{jobset}->jobs->search({ name => { ilike => $filter } })) { foreach my $job ($c->stash->{jobset}->jobs->search({ name => { ilike => $filter } })) {
next if defined $jobs{$job->name}; next if defined $jobs{$job->name};
$c->stash->{inactiveJobs}->{$job->name} = $jobs{$job->name} = 1; $c->stash->{inactiveJobs}->{$job->name} = $jobs{$job->name} = 1;
} }
} }
$c->stash->{evals} = $evals; $c->stash->{evals} = $evals;
@ -209,6 +211,7 @@ sub edit : Chained('jobsetChain') PathPart Args(0) {
$c->stash->{template} = 'edit-jobset.tt'; $c->stash->{template} = 'edit-jobset.tt';
$c->stash->{edit} = 1; $c->stash->{edit} = 1;
$c->stash->{totalShares} = getTotalShares($c->model('DB')->schema);
} }
@ -287,6 +290,7 @@ sub updateJobset {
, keepnr => int(trim($c->stash->{params}->{keepnr})) , keepnr => int(trim($c->stash->{params}->{keepnr}))
, checkinterval => int(trim($c->stash->{params}->{checkinterval})) , checkinterval => int(trim($c->stash->{params}->{checkinterval}))
, triggertime => $enabled ? $jobset->triggertime // time() : undef , triggertime => $enabled ? $jobset->triggertime // time() : undef
, schedulingshares => int($c->stash->{params}->{schedulingshares})
}); });
# Process the inputs of this jobset. # Process the inputs of this jobset.

View file

@ -201,6 +201,7 @@ sub create_jobset : Chained('projectChain') PathPart('create-jobset') Args(0) {
$c->stash->{template} = 'edit-jobset.tt'; $c->stash->{template} = 'edit-jobset.tt';
$c->stash->{create} = 1; $c->stash->{create} = 1;
$c->stash->{edit} = 1; $c->stash->{edit} = 1;
$c->stash->{totalShares} = getTotalShares($c->model('DB')->schema);
} }

View file

@ -20,7 +20,8 @@ our @EXPORT = qw(
getMainOutput getMainOutput
getEvals getMachines getEvals getMachines
pathIsInsidePrefix pathIsInsidePrefix
captureStdoutStderr run grab); captureStdoutStderr run grab
getTotalShares);
sub getHydraHome { sub getHydraHome {
@ -533,4 +534,12 @@ sub grab {
} }
sub getTotalShares {
my ($db) = @_;
return $db->resultset('Jobsets')->search(
{ 'project.enabled' => 1, 'me.enabled' => 1 },
{ join => 'project', select => { sum => 'schedulingshares' }, as => 'sum' })->single->get_column('sum');
}
1; 1;

View file

@ -15,6 +15,18 @@ use warnings;
use base 'DBIx::Class::Core'; use base 'DBIx::Class::Core';
=head1 COMPONENTS LOADED
=over 4
=item * L<Hydra::Component::ToJSON>
=back
=cut
__PACKAGE__->load_components("+Hydra::Component::ToJSON");
=head1 TABLE: C<CachedDarcsInputs> =head1 TABLE: C<CachedDarcsInputs>
=cut =cut
@ -28,11 +40,6 @@ __PACKAGE__->table("CachedDarcsInputs");
data_type: 'text' data_type: 'text'
is_nullable: 0 is_nullable: 0
=head2 branch
data_type: 'text'
is_nullable: 0
=head2 revision =head2 revision
data_type: 'text' data_type: 'text'
@ -48,6 +55,11 @@ __PACKAGE__->table("CachedDarcsInputs");
data_type: 'text' data_type: 'text'
is_nullable: 0 is_nullable: 0
=head2 revcount
data_type: 'integer'
is_nullable: 0
=cut =cut
__PACKAGE__->add_columns( __PACKAGE__->add_columns(
@ -55,12 +67,12 @@ __PACKAGE__->add_columns(
{ data_type => "text", is_nullable => 0 }, { data_type => "text", is_nullable => 0 },
"revision", "revision",
{ data_type => "text", is_nullable => 0 }, { data_type => "text", is_nullable => 0 },
"revcount",
{ data_type => "integer", is_nullable => 0 },
"sha256hash", "sha256hash",
{ data_type => "text", is_nullable => 0 }, { data_type => "text", is_nullable => 0 },
"storepath", "storepath",
{ data_type => "text", is_nullable => 0 }, { data_type => "text", is_nullable => 0 },
"revcount",
{ data_type => "integer", is_nullable => 0 },
); );
=head1 PRIMARY KEY =head1 PRIMARY KEY
@ -69,8 +81,6 @@ __PACKAGE__->add_columns(
=item * L</uri> =item * L</uri>
=item * L</branch>
=item * L</revision> =item * L</revision>
=back =back
@ -80,7 +90,9 @@ __PACKAGE__->add_columns(
__PACKAGE__->set_primary_key("uri", "revision"); __PACKAGE__->set_primary_key("uri", "revision");
# Created by DBIx::Class::Schema::Loader v0.07014 @ 2011-12-05 14:15:43 # Created by DBIx::Class::Schema::Loader v0.07033 @ 2013-09-20 11:08:50
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:fx3yosWMmJ+MnvL/dSWtFA # DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:Yl1slt3SAizijgu0KUTn0A
# You can replace this text with custom code or comments, and it will be preserved on regeneration
1; 1;

View file

@ -118,6 +118,12 @@ __PACKAGE__->table("Jobsets");
default_value: 300 default_value: 300
is_nullable: 0 is_nullable: 0
=head2 schedulingshares
data_type: 'integer'
default_value: 100
is_nullable: 0
=cut =cut
__PACKAGE__->add_columns( __PACKAGE__->add_columns(
@ -151,6 +157,8 @@ __PACKAGE__->add_columns(
{ data_type => "integer", default_value => 3, is_nullable => 0 }, { data_type => "integer", default_value => 3, is_nullable => 0 },
"checkinterval", "checkinterval",
{ data_type => "integer", default_value => 300, is_nullable => 0 }, { data_type => "integer", default_value => 300, is_nullable => 0 },
"schedulingshares",
{ data_type => "integer", default_value => 100, is_nullable => 0 },
); );
=head1 PRIMARY KEY =head1 PRIMARY KEY
@ -272,7 +280,7 @@ __PACKAGE__->belongs_to(
); );
# Created by DBIx::Class::Schema::Loader v0.07033 @ 2013-06-13 01:54:50 # Created by DBIx::Class::Schema::Loader v0.07033 @ 2013-09-20 12:15:23
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:tsGR8MhZRIUeNwpcVczMUw # DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:pD6tGW0Ob3fuA1p0uQnBWw
1; 1;

View file

@ -1,5 +1,6 @@
[% WRAPPER layout.tt title=(create ? "Create jobset in project $project.name" : "Editing jobset $project.name:$jobset.name") %] [% WRAPPER layout.tt title=(create ? "Create jobset in project $project.name" : "Editing jobset $project.name:$jobset.name") %]
[% PROCESS common.tt %] [% PROCESS common.tt %]
[% USE format %]
[% BLOCK renderJobsetInputAlt %] [% BLOCK renderJobsetInputAlt %]
<button type="button" class="btn btn-warning" onclick='$(this).parents(".inputalt").remove()'><i class="icon-trash icon-white"></i></button> <button type="button" class="btn btn-warning" onclick='$(this).parents(".inputalt").remove()'><i class="icon-trash icon-white"></i></button>
@ -94,6 +95,18 @@
</div> </div>
</div> </div>
<div class="control-group">
<label class="control-label">Scheduling shares</label>
<div class="controls">
<div class="input-append">
<input type="number" class="span3" name="schedulingshares" [% HTML.attributes(value => jobset.schedulingshares) %]/>
</div>
[% IF totalShares %]
<span class="help-inline">([% f = format("%.2f"); f(jobset.schedulingshares / totalShares * 100) %]% out of [% totalShares %] shares)</span>
[% END %]
</div>
</div>
<div class="control-group"> <div class="control-group">
<div class="controls"> <div class="controls">
<label class="checkbox"> <label class="checkbox">

View file

@ -1,5 +1,6 @@
[% WRAPPER layout.tt title="Jobset $project.name:$jobset.name" %] [% WRAPPER layout.tt title="Jobset $project.name:$jobset.name" %]
[% PROCESS common.tt %] [% PROCESS common.tt %]
[% USE format %]
[% BLOCK renderJobsetInput %] [% BLOCK renderJobsetInput %]
@ -121,6 +122,10 @@
<th>Check interval:</th> <th>Check interval:</th>
<td>[% jobset.checkinterval || "<em>disabled</em>" %]</td> <td>[% jobset.checkinterval || "<em>disabled</em>" %]</td>
</tr> </tr>
<tr>
<th>Scheduling shares:</th>
<td>[% jobset.schedulingshares %] [% IF totalShares %] ([% f = format("%.2f"); f(jobset.schedulingshares / totalShares * 100) %]% out of [% totalShares %] shares)[% END %]</td>
</tr>
<tr> <tr>
<th>Enable email notification:</th> <th>Enable email notification:</th>
<td>[% jobset.enableemail ? "Yes" : "No" %]</td> <td>[% jobset.enableemail ? "Yes" : "No" %]</td>

View file

@ -28,7 +28,7 @@ sub unlockDeadBuilds {
my $pid = $build->locker; my $pid = $build->locker;
my $unlock = 0; my $unlock = 0;
if ($pid == $$) { if ($pid == $$) {
if (!defined $lastTime || $build->starttime < $lastTime - 300) { if (!defined $lastTime || $build->starttime < $lastTime - 600) {
$unlock = 1; $unlock = 1;
} }
} elsif (kill(0, $pid) != 1) { # see if we can signal the process } elsif (kill(0, $pid) != 1) { # see if we can signal the process
@ -70,27 +70,29 @@ sub checkBuilds {
my %maxConcurrent; my %maxConcurrent;
foreach my $machineName (keys %{$machines}) { foreach my $machineName (keys %{$machines}) {
foreach my $system (${$machines}{$machineName}{'systemTypes'}) { foreach my $system (@{${$machines}{$machineName}{'systemTypes'}}) {
$maxConcurrent{$system} = (${$machines}{$machineName}{'maxJobs'} or 0) + ($maxConcurrent{$system} or 0) $maxConcurrent{$system} = (${$machines}{$machineName}{'maxJobs'} or 0) + ($maxConcurrent{$system} or 0)
} }
} }
txn_do($db, sub { txn_do($db, sub {
# Cache scheduled by derivation path to speed up # Cache scheduled builds by derivation path to speed up
# findBuildDependencyInQueue. # findBuildDependencyInQueue.
my $buildsByDrv = {}; my $buildsByDrv = {};
$buildsByDrv->{$_->drvpath} = $_->id $buildsByDrv->{$_->drvpath} = $_->id
foreach $db->resultset('Builds')->search({ finished => 0, enabled => 1 }, { join => ['project'] }); foreach $db->resultset('Builds')->search({ finished => 0 }, { join => ['project'] });
# Get the system types for the runnable builds. # Get the system types for the runnable builds.
my @systemTypes = $db->resultset('Builds')->search( my @systemTypes = $db->resultset('Builds')->search(
{ finished => 0, busy => 0, enabled => 1 }, { finished => 0, busy => 0 },
{ join => ['project'], select => ['system'], as => ['system'], distinct => 1 }); { join => ['project'], select => ['system'], as => ['system'], distinct => 1 });
# Get the total number of scheduling shares.
my $totalShares = getTotalShares($db);
# For each system type, select up to the maximum number of # For each system type, select up to the maximum number of
# concurrent build for that system type. Choose the highest # concurrent build for that system type.
# priority builds first, then the oldest builds.
foreach my $system (@systemTypes) { foreach my $system (@systemTypes) {
# How many builds are already currently executing for this # How many builds are already currently executing for this
# system type? # system type?
@ -101,42 +103,84 @@ sub checkBuilds {
my $max = defined $systemTypeInfo ? $systemTypeInfo->maxconcurrent : $maxConcurrent{$system->system} // 2; my $max = defined $systemTypeInfo ? $systemTypeInfo->maxconcurrent : $maxConcurrent{$system->system} // 2;
my $extraAllowed = $max - $nrActive; my $extraAllowed = $max - $nrActive;
$extraAllowed = 0 if $extraAllowed < 0; next if $extraAllowed <= 0;
# Select the highest-priority builds to start. print STDERR "starting at most $extraAllowed builds for system ${\$system->system}\n";
my @builds = $extraAllowed == 0 ? () : $db->resultset('Builds')->search(
{ finished => 0, busy => 0, system => $system->system, enabled => 1 },
{ join => ['project'], order_by => ["priority DESC", "id"] });
my $started = 0; j: while ($extraAllowed-- > 0) {
foreach my $build (@builds) {
# Find a dependency of $build that has no queued my @runnableJobsets = $db->resultset('Builds')->search(
# dependencies itself. This isn't strictly necessary, { finished => 0, busy => 0, system => $system->system },
# but it ensures that Nix builds are done as part of { select => ['project', 'jobset'], distinct => 1 });
# their corresponding Hydra builds, rather than as a
# dependency of some other Hydra build. next if @runnableJobsets == 0;
while (my $dep = findBuildDependencyInQueue($buildsByDrv, $build)) {
$build = $dep; my $windowSize = 24 * 3600;
my $totalWindowSize = $windowSize * $max;
my @res;
foreach my $b (@runnableJobsets) {
my $jobset = $db->resultset('Jobsets')->find($b->get_column('project'), $b->get_column('jobset')) or die;
my $duration = $jobset->builds->search(
{ },
{ where => \ ("(finished = 0 or (me.stoptime >= " . (time() - $windowSize) . "))")
, join => 'buildsteps'
, select => \ "sum(coalesce(buildsteps.stoptime, ${\time}) - buildsteps.starttime)"
, as => "sum" })->single->get_column("sum") // 0;
# Add a 30s penalty for each started build. This
# is to account for jobsets that have running
# builds but no build steps yet.
$duration += $jobset->builds->search({ finished => 0, busy => 1 })->count * 30;
my $share = $jobset->schedulingshares;
my $delta = ($share / $totalShares) - ($duration / $totalWindowSize);
#printf STDERR "%s:%s: %d s, %.3f%%, allowance = %.3f%%\n", $jobset->get_column('project'), $jobset->name, $duration, $duration / $totalWindowSize, $delta;
push @res, { jobset => $jobset, delta => $delta };
} }
next if $build->busy;
my $logfile = getcwd . "/logs/" . $build->id; foreach my $r (sort { $b->{delta} <=> $a->{delta} } @res) {
mkdir(dirname $logfile); my $jobset = $r->{jobset};
unlink($logfile); #print STDERR "selected ", $jobset->get_column('project'), ':', $jobset->name, "\n";
$build->update(
{ busy => 1
, locker => $$
, logfile => $logfile
, starttime => time()
});
push @buildsStarted, $build;
last if ++$started >= $extraAllowed; # Select the highest-priority build for this jobset.
} my @builds = $jobset->builds->search(
{ finished => 0, busy => 0, system => $system->system },
{ order_by => ["priority DESC", "id"] });
if ($started > 0) { foreach my $build (@builds) {
print STDERR "system type `", $system->system, # Find a dependency of $build that has no queued
"': $nrActive active, $max allowed, started $started builds\n"; # dependencies itself. This isn't strictly necessary,
# but it ensures that Nix builds are done as part of
# their corresponding Hydra builds, rather than as a
# dependency of some other Hydra build.
while (my $dep = findBuildDependencyInQueue($buildsByDrv, $build)) {
$build = $dep;
}
next if $build->busy;
printf STDERR "starting build %d (%s:%s:%s) on %s (jobset allowance = %.3f%%)\n",
$build->id, $build->project->name, $build->jobset->name, $build->job->name, $build->system, $r->{delta};
my $logfile = getcwd . "/logs/" . $build->id;
mkdir(dirname $logfile);
unlink($logfile);
$build->update(
{ busy => 1
, locker => $$
, logfile => $logfile
, starttime => time()
});
push @buildsStarted, $build;
next j;
}
}
last; # nothing found, give up on this system type
} }
} }
}); });
@ -145,7 +189,6 @@ sub checkBuilds {
# outside the transaction in case it aborts or something. # outside the transaction in case it aborts or something.
foreach my $build (@buildsStarted) { foreach my $build (@buildsStarted) {
my $id = $build->id; my $id = $build->id;
print "starting build $id (", $build->project->name, ":", $build->jobset->name, ':', $build->job->name, ") on ", $build->system, "\n";
eval { eval {
my $logfile = $build->logfile; my $logfile = $build->logfile;
my $child = fork(); my $child = fork();

View file

@ -61,6 +61,7 @@ create table Jobsets (
emailOverride text not null, emailOverride text not null,
keepnr integer not null default 3, keepnr integer not null default 3,
checkInterval integer not null default 300, -- minimum time in seconds between polls (0 = disable polling) checkInterval integer not null default 300, -- minimum time in seconds between polls (0 = disable polling)
schedulingShares integer not null default 100,
primary key (project, name), primary key (project, name),
foreign key (project) references Projects(name) on delete cascade on update cascade foreign key (project) references Projects(name) on delete cascade on update cascade
#ifdef SQLITE #ifdef SQLITE

2
src/sql/upgrade-21.sql Normal file
View file

@ -0,0 +1,2 @@
alter table Jobsets
add column schedulingShares integer not null default 100;