forked from lix-project/lix
* build-remote.pl: Pick machines in a round-robin order, rather than
giving jobs to the first machine until it hits its job limit, then the second machine and so on. This should improve utilisation of the Hydra build farm a lot. Also take an optional speed factor into account to cause fast machines to be preferred over slower machines with a similar load.
This commit is contained in:
parent
57e0d73c77
commit
86408b3f47
1 changed files with 67 additions and 23 deletions
|
@ -47,24 +47,21 @@ decline if !defined $conf || ! -e $conf;
|
||||||
my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
|
my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
|
||||||
|
|
||||||
|
|
||||||
# Otherwise find a willing remote machine.
|
|
||||||
my @machines;
|
|
||||||
my %curJobs;
|
|
||||||
|
|
||||||
|
|
||||||
# Read the list of machines.
|
# Read the list of machines.
|
||||||
|
my @machines;
|
||||||
open CONF, "< $conf" or die;
|
open CONF, "< $conf" or die;
|
||||||
|
|
||||||
while (<CONF>) {
|
while (<CONF>) {
|
||||||
chomp;
|
chomp;
|
||||||
s/\#.*$//g;
|
s/\#.*$//g;
|
||||||
next if /^\s*$/;
|
next if /^\s*$/;
|
||||||
/^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/ or die;
|
/^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)(\s+([0-9\.]+))?\s*$/ or die;
|
||||||
push @machines,
|
push @machines,
|
||||||
{ hostName => $1
|
{ hostName => $1
|
||||||
, systemTypes => [split(/,/, $2)]
|
, systemTypes => [split(/,/, $2)]
|
||||||
, sshKeys => $3
|
, sshKeys => $3
|
||||||
, maxJobs => $4
|
, maxJobs => $4
|
||||||
|
, speedFactor => 1.0 * ($6 || 1)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,38 +74,53 @@ open MAINLOCK, ">>$mainLock" or die;
|
||||||
flock(MAINLOCK, LOCK_EX) or die;
|
flock(MAINLOCK, LOCK_EX) or die;
|
||||||
|
|
||||||
|
|
||||||
# Find a suitable system.
|
sub openSlotLock {
|
||||||
|
my ($machine, $slot) = @_;
|
||||||
|
my $slotLockFn = "$currentLoad/" . (join '+', @{$machine->{systemTypes}}) . "-" . $machine->{hostName} . "-$slot";
|
||||||
|
my $slotLock = new IO::Handle;
|
||||||
|
open $slotLock, ">>$slotLockFn" or die;
|
||||||
|
return $slotLock;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Find all machine that can execute this build, i.e., that support
|
||||||
|
# builds for the given platform and are not at their job limit.
|
||||||
my $rightType = 0;
|
my $rightType = 0;
|
||||||
my $machine;
|
my @available = ();
|
||||||
my $slotLock;
|
|
||||||
LOOP: foreach my $cur (@machines) {
|
LOOP: foreach my $cur (@machines) {
|
||||||
print STDERR @{$cur->{systemTypes}}, "\n";
|
|
||||||
if (grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) {
|
if (grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) {
|
||||||
$rightType = 1;
|
$rightType = 1;
|
||||||
|
|
||||||
# We have a machine of the right type. Try to get a lock on
|
# We have a machine of the right type. Determine the load on
|
||||||
# one of the machine's lock files.
|
# the machine.
|
||||||
my $slot = 0;
|
my $slot = 0;
|
||||||
|
my $load = 0;
|
||||||
|
my $free;
|
||||||
while ($slot < $cur->{maxJobs}) {
|
while ($slot < $cur->{maxJobs}) {
|
||||||
my $slotLockFn = "$currentLoad/" . (join '+', @{$cur->{systemTypes}}) . "-" . $cur->{hostName} . "-$slot";
|
my $slotLock = openSlotLock($cur, $slot);
|
||||||
$slotLock = new IO::Handle;
|
|
||||||
open $slotLock, ">>$slotLockFn" or die;
|
|
||||||
if (flock($slotLock, LOCK_EX | LOCK_NB)) {
|
if (flock($slotLock, LOCK_EX | LOCK_NB)) {
|
||||||
utime undef, undef, $slotLock;
|
$free = $slot unless defined $free;
|
||||||
$machine = $cur;
|
flock($slotLock, LOCK_UN) or die;
|
||||||
last LOOP;
|
} else {
|
||||||
|
$load++;
|
||||||
}
|
}
|
||||||
close $slotLock;
|
close $slotLock;
|
||||||
$slot++;
|
$slot++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
push @available, { machine => $cur, load => $load, free => $free }
|
||||||
|
if $load < $cur->{maxJobs};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
close MAINLOCK;
|
if (defined $ENV{NIX_DEBUG_HOOK}) {
|
||||||
|
print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
|
||||||
|
foreach @available;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Didn't find one? Then decline or postpone.
|
# Didn't find any available machine? Then decline or postpone.
|
||||||
if (!defined $machine) {
|
if (scalar @available == 0) {
|
||||||
# Postpone if we have a machine of the right type, except if the
|
# Postpone if we have a machine of the right type, except if the
|
||||||
# local system can and wants to do the build.
|
# local system can and wants to do the build.
|
||||||
if ($rightType && !$canBuildLocally) {
|
if ($rightType && !$canBuildLocally) {
|
||||||
|
@ -119,8 +131,40 @@ if (!defined $machine) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Yes we did, accept.
|
|
||||||
|
# Prioritise the available machines as follows:
|
||||||
|
# - First by load divided by speed factor, rounded to the nearest
|
||||||
|
# integer. This causes fast machines to be preferred over slow
|
||||||
|
# machines with similar loads.
|
||||||
|
# - Then by speed factor.
|
||||||
|
# - Finally by load.
|
||||||
|
sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
|
||||||
|
@available = sort
|
||||||
|
{ lf($a) <=> lf($b)
|
||||||
|
|| $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
|
||||||
|
|| $a->{load} <=> $b->{load}
|
||||||
|
} @available;
|
||||||
|
|
||||||
|
|
||||||
|
# Select the best available machine and lock a free slot.
|
||||||
|
my $selected = $available[0];
|
||||||
|
my $machine = $selected->{machine};
|
||||||
|
|
||||||
|
my $slotLock = openSlotLock($machine, $selected->{free});
|
||||||
|
flock($slotLock, LOCK_EX | LOCK_NB) or die;
|
||||||
|
|
||||||
|
close MAINLOCK;
|
||||||
|
|
||||||
|
|
||||||
|
# Tell Nix we've accepted the build.
|
||||||
sendReply "accept";
|
sendReply "accept";
|
||||||
|
if (defined $ENV{NIX_DEBUG_HOOK}) {
|
||||||
|
my $hostName = $machine->{hostName};
|
||||||
|
my $sp = $machine->{speedFactor};
|
||||||
|
print STDERR "building `$drvPath' on `$hostName' - $sp - " . $selected->{free} . "\n";
|
||||||
|
sleep 10;
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
my $x = <STDIN>;
|
my $x = <STDIN>;
|
||||||
chomp $x;
|
chomp $x;
|
||||||
|
|
||||||
|
@ -129,7 +173,7 @@ if ($x ne "okay") {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Do the actual job.
|
# Do the actual build.
|
||||||
my $hostName = $machine->{hostName};
|
my $hostName = $machine->{hostName};
|
||||||
print STDERR "building `$drvPath' on `$hostName'\n";
|
print STDERR "building `$drvPath' on `$hostName'\n";
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue