Keep track of failed paths in the Hydra database
I.e. don't use Nix's failed paths feature anymore. Easier to keep everything in one place.
This commit is contained in:
parent
c68036f8b0
commit
6d738a31bf
|
@ -780,7 +780,7 @@ void State::builder(Step::ptr step, MachineReservation::ptr reservation)
|
|||
auto store = openStore(); // FIXME: pool
|
||||
doBuildStep(store, step, reservation->machine);
|
||||
} catch (std::exception & e) {
|
||||
printMsg(lvlError, format("build thread for ‘%1%’: %2%") % step->drvPath % e.what());
|
||||
printMsg(lvlError, format("error building ‘%1%’: %2%") % step->drvPath % e.what());
|
||||
// FIXME: put step back in runnable and retry
|
||||
}
|
||||
|
||||
|
@ -828,35 +828,55 @@ void State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
printMsg(lvlInfo, format("performing build step ‘%1%’ (needed by %2% builds)") % step->drvPath % dependents.size());
|
||||
}
|
||||
|
||||
/* Create a build step record indicating that we started
|
||||
building. Also, mark the selected build as busy. */
|
||||
auto conn(dbPool.get());
|
||||
|
||||
RemoteResult result;
|
||||
BuildResult res;
|
||||
int stepNr = 0;
|
||||
|
||||
result.startTime = time(0);
|
||||
int stepNr;
|
||||
|
||||
/* If any of the outputs have previously failed, then don't
|
||||
retry. */
|
||||
bool cachedFailure = false;
|
||||
{
|
||||
pqxx::work txn(*conn);
|
||||
stepNr = createBuildStep(txn, result.startTime, build, step, machine->sshName, bssBusy);
|
||||
txn.parameterized("update Builds set busy = 1 where id = $1")(build->id).exec();
|
||||
txn.commit();
|
||||
for (auto & path : outputPaths(step->drv))
|
||||
if (!txn.parameterized("select 1 from FailedPaths where path = $1")(path).exec().empty()) {
|
||||
cachedFailure = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
buildRemote(store, machine->sshName, machine->sshKey, step->drvPath, step->drv, logDir, result);
|
||||
} catch (Error & e) {
|
||||
result.status = RemoteResult::rrMiscFailure;
|
||||
result.errorMsg = e.msg();
|
||||
printMsg(lvlError, format("ERROR: %1%") % e.msg());
|
||||
abort();
|
||||
if (cachedFailure)
|
||||
result.status = RemoteResult::rrPermanentFailure;
|
||||
else {
|
||||
|
||||
/* Create a build step record indicating that we started
|
||||
building. Also, mark the selected build as busy. */
|
||||
{
|
||||
pqxx::work txn(*conn);
|
||||
stepNr = createBuildStep(txn, result.startTime, build, step, machine->sshName, bssBusy);
|
||||
txn.parameterized("update Builds set busy = 1 where id = $1")(build->id).exec();
|
||||
txn.commit();
|
||||
}
|
||||
|
||||
try {
|
||||
buildRemote(store, machine->sshName, machine->sshKey, step->drvPath, step->drv, logDir, result);
|
||||
} catch (Error & e) {
|
||||
result.status = RemoteResult::rrMiscFailure;
|
||||
result.errorMsg = e.msg();
|
||||
printMsg(lvlError, format("ERROR: %1%") % e.msg());
|
||||
abort();
|
||||
}
|
||||
|
||||
if (result.status == RemoteResult::rrSuccess) res = getBuildResult(store, step->drv);
|
||||
|
||||
// FIXME: handle failed-with-output
|
||||
}
|
||||
|
||||
if (!result.stopTime) result.stopTime = time(0);
|
||||
|
||||
BuildResult res;
|
||||
if (result.status == RemoteResult::rrSuccess) res = getBuildResult(store, step->drv);
|
||||
|
||||
// FIXME: handle failed-with-output
|
||||
|
||||
/* Remove this step. After this, incoming builds that depend on
|
||||
drvPath will either see that the output paths exist, or will
|
||||
create a new build step for drvPath. The latter is fine - it
|
||||
|
@ -894,26 +914,42 @@ void State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
|||
markSucceededBuild(txn, build2, res, false, result.startTime, result.stopTime);
|
||||
|
||||
} else {
|
||||
/* Create failed build steps for every build that depends
|
||||
on this. */
|
||||
for (auto build2 : dependents) {
|
||||
if (build == build2) continue;
|
||||
createBuildStep(txn, result.stopTime, build2, step, machine->sshName, bssFailed, result.errorMsg, build->id);
|
||||
}
|
||||
/* Failure case. */
|
||||
|
||||
finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssFailed, result.errorMsg);
|
||||
/* For regular failures, we don't care about the error
|
||||
message. */
|
||||
if (result.status != RemoteResult::rrMiscFailure) result.errorMsg = "";
|
||||
|
||||
if (!cachedFailure) {
|
||||
|
||||
/* Create failed build steps for every build that depends
|
||||
on this. */
|
||||
for (auto build2 : dependents) {
|
||||
if (build == build2) continue;
|
||||
createBuildStep(txn, result.stopTime, build2, step, machine->sshName, bssFailed, result.errorMsg, build->id);
|
||||
}
|
||||
|
||||
finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssFailed, result.errorMsg);
|
||||
}
|
||||
|
||||
/* Mark all builds that depend on this derivation as failed. */
|
||||
for (auto build2 : dependents) {
|
||||
printMsg(lvlError, format("marking build %1% as failed") % build2->id);
|
||||
txn.parameterized
|
||||
("update Builds set finished = 1, busy = 0, isCachedBuild = 0, buildStatus = $2, startTime = $3, stopTime = $4 where id = $1")
|
||||
("update Builds set finished = 1, busy = 0, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5 where id = $1")
|
||||
(build2->id)
|
||||
((int) (build2->drvPath == step->drvPath ? bsFailed : bsDepFailed))
|
||||
(result.startTime)
|
||||
(result.stopTime).exec();
|
||||
(result.stopTime)
|
||||
(cachedFailure ? 1 : 0).exec();
|
||||
build2->finishedInDB = true; // FIXME: txn might fail
|
||||
}
|
||||
|
||||
/* Remember failed paths in the database so that they
|
||||
won't be built again. */
|
||||
if (!cachedFailure && result.status == RemoteResult::rrPermanentFailure)
|
||||
for (auto & path : outputPaths(step->drv))
|
||||
txn.parameterized("insert into FailedPaths values ($1)")(path).exec();
|
||||
}
|
||||
|
||||
txn.commit();
|
||||
|
|
|
@ -45,7 +45,7 @@ sub clear_queue_non_current : Chained('admin') PathPart('clear-queue-non-current
|
|||
|
||||
sub clearfailedcache : Chained('admin') PathPart('clear-failed-cache') Args(0) {
|
||||
my ($self, $c) = @_;
|
||||
my $r = `nix-store --clear-failed-paths '*'`;
|
||||
$c->model('DB::FailedPaths')->delete;
|
||||
$c->res->redirect($c->request->referer // "/");
|
||||
}
|
||||
|
||||
|
|
|
@ -465,9 +465,10 @@ sub restartBuilds($$) {
|
|||
# !!! Should do this in a trigger.
|
||||
$db->resultset('JobsetEvals')->search({ build => \@buildIds }, { join => 'buildIds' })->update({ nrsucceeded => undef });
|
||||
|
||||
# Clear Nix's negative failure cache.
|
||||
# Clear the failed paths cache.
|
||||
# FIXME: Add this to the API.
|
||||
system("nix-store", "--clear-failed-paths", @paths);
|
||||
# FIXME: clear the dependencies?
|
||||
$db->resultset('FailedPaths')->search({ path => [ @paths ]})->delete;
|
||||
});
|
||||
|
||||
return scalar(@buildIds);
|
||||
|
|
65
src/lib/Hydra/Schema/FailedPaths.pm
Normal file
65
src/lib/Hydra/Schema/FailedPaths.pm
Normal file
|
@ -0,0 +1,65 @@
|
|||
use utf8;
|
||||
package Hydra::Schema::FailedPaths;
|
||||
|
||||
# Created by DBIx::Class::Schema::Loader
|
||||
# DO NOT MODIFY THE FIRST PART OF THIS FILE
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Hydra::Schema::FailedPaths
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use base 'DBIx::Class::Core';
|
||||
|
||||
=head1 COMPONENTS LOADED
|
||||
|
||||
=over 4
|
||||
|
||||
=item * L<Hydra::Component::ToJSON>
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
__PACKAGE__->load_components("+Hydra::Component::ToJSON");
|
||||
|
||||
=head1 TABLE: C<FailedPaths>
|
||||
|
||||
=cut
|
||||
|
||||
__PACKAGE__->table("FailedPaths");
|
||||
|
||||
=head1 ACCESSORS
|
||||
|
||||
=head2 path
|
||||
|
||||
data_type: 'text'
|
||||
is_nullable: 0
|
||||
|
||||
=cut
|
||||
|
||||
__PACKAGE__->add_columns("path", { data_type => "text", is_nullable => 0 });
|
||||
|
||||
=head1 PRIMARY KEY
|
||||
|
||||
=over 4
|
||||
|
||||
=item * L</path>
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
__PACKAGE__->set_primary_key("path");
|
||||
|
||||
|
||||
# Created by DBIx::Class::Schema::Loader v0.07033 @ 2015-06-10 14:48:16
|
||||
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:WFgjfjH+szE6Ntcicmaflw
|
||||
|
||||
|
||||
# You can replace this text with custom code or comments, and it will be preserved on regeneration
|
||||
1;
|
|
@ -511,6 +511,22 @@ create table StarredJobs (
|
|||
);
|
||||
|
||||
|
||||
-- The output paths that have permanently failed.
|
||||
create table FailedPaths (
|
||||
path text primary key not null
|
||||
);
|
||||
|
||||
#ifdef POSTGRESQL
|
||||
|
||||
-- Needed because Postgres doesn't have "ignore duplicate" or upsert
|
||||
-- yet.
|
||||
create rule IdempotentInsert as on insert to FailedPaths
|
||||
where exists (select 1 from FailedPaths where path = new.path)
|
||||
do instead nothing;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
-- Cache of the number of finished builds.
|
||||
create table NrBuilds (
|
||||
what text primary key not null,
|
||||
|
|
Loading…
Reference in a new issue