forked from lix-project/hydra
Keep track of failed paths in the Hydra database
I.e. don't use Nix's failed paths feature anymore. Easier to keep everything in one place.
This commit is contained in:
parent
c68036f8b0
commit
6d738a31bf
|
@ -780,7 +780,7 @@ void State::builder(Step::ptr step, MachineReservation::ptr reservation)
|
||||||
auto store = openStore(); // FIXME: pool
|
auto store = openStore(); // FIXME: pool
|
||||||
doBuildStep(store, step, reservation->machine);
|
doBuildStep(store, step, reservation->machine);
|
||||||
} catch (std::exception & e) {
|
} catch (std::exception & e) {
|
||||||
printMsg(lvlError, format("build thread for ‘%1%’: %2%") % step->drvPath % e.what());
|
printMsg(lvlError, format("error building ‘%1%’: %2%") % step->drvPath % e.what());
|
||||||
// FIXME: put step back in runnable and retry
|
// FIXME: put step back in runnable and retry
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -828,35 +828,55 @@ void State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
printMsg(lvlInfo, format("performing build step ‘%1%’ (needed by %2% builds)") % step->drvPath % dependents.size());
|
printMsg(lvlInfo, format("performing build step ‘%1%’ (needed by %2% builds)") % step->drvPath % dependents.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create a build step record indicating that we started
|
|
||||||
building. Also, mark the selected build as busy. */
|
|
||||||
auto conn(dbPool.get());
|
auto conn(dbPool.get());
|
||||||
|
|
||||||
RemoteResult result;
|
RemoteResult result;
|
||||||
|
BuildResult res;
|
||||||
|
int stepNr = 0;
|
||||||
|
|
||||||
result.startTime = time(0);
|
result.startTime = time(0);
|
||||||
int stepNr;
|
|
||||||
|
/* If any of the outputs have previously failed, then don't
|
||||||
|
retry. */
|
||||||
|
bool cachedFailure = false;
|
||||||
{
|
{
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(*conn);
|
||||||
stepNr = createBuildStep(txn, result.startTime, build, step, machine->sshName, bssBusy);
|
for (auto & path : outputPaths(step->drv))
|
||||||
txn.parameterized("update Builds set busy = 1 where id = $1")(build->id).exec();
|
if (!txn.parameterized("select 1 from FailedPaths where path = $1")(path).exec().empty()) {
|
||||||
txn.commit();
|
cachedFailure = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
if (cachedFailure)
|
||||||
buildRemote(store, machine->sshName, machine->sshKey, step->drvPath, step->drv, logDir, result);
|
result.status = RemoteResult::rrPermanentFailure;
|
||||||
} catch (Error & e) {
|
else {
|
||||||
result.status = RemoteResult::rrMiscFailure;
|
|
||||||
result.errorMsg = e.msg();
|
/* Create a build step record indicating that we started
|
||||||
printMsg(lvlError, format("ERROR: %1%") % e.msg());
|
building. Also, mark the selected build as busy. */
|
||||||
abort();
|
{
|
||||||
|
pqxx::work txn(*conn);
|
||||||
|
stepNr = createBuildStep(txn, result.startTime, build, step, machine->sshName, bssBusy);
|
||||||
|
txn.parameterized("update Builds set busy = 1 where id = $1")(build->id).exec();
|
||||||
|
txn.commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
buildRemote(store, machine->sshName, machine->sshKey, step->drvPath, step->drv, logDir, result);
|
||||||
|
} catch (Error & e) {
|
||||||
|
result.status = RemoteResult::rrMiscFailure;
|
||||||
|
result.errorMsg = e.msg();
|
||||||
|
printMsg(lvlError, format("ERROR: %1%") % e.msg());
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.status == RemoteResult::rrSuccess) res = getBuildResult(store, step->drv);
|
||||||
|
|
||||||
|
// FIXME: handle failed-with-output
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!result.stopTime) result.stopTime = time(0);
|
if (!result.stopTime) result.stopTime = time(0);
|
||||||
|
|
||||||
BuildResult res;
|
|
||||||
if (result.status == RemoteResult::rrSuccess) res = getBuildResult(store, step->drv);
|
|
||||||
|
|
||||||
// FIXME: handle failed-with-output
|
|
||||||
|
|
||||||
/* Remove this step. After this, incoming builds that depend on
|
/* Remove this step. After this, incoming builds that depend on
|
||||||
drvPath will either see that the output paths exist, or will
|
drvPath will either see that the output paths exist, or will
|
||||||
create a new build step for drvPath. The latter is fine - it
|
create a new build step for drvPath. The latter is fine - it
|
||||||
|
@ -894,26 +914,42 @@ void State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
markSucceededBuild(txn, build2, res, false, result.startTime, result.stopTime);
|
markSucceededBuild(txn, build2, res, false, result.startTime, result.stopTime);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
/* Create failed build steps for every build that depends
|
/* Failure case. */
|
||||||
on this. */
|
|
||||||
for (auto build2 : dependents) {
|
|
||||||
if (build == build2) continue;
|
|
||||||
createBuildStep(txn, result.stopTime, build2, step, machine->sshName, bssFailed, result.errorMsg, build->id);
|
|
||||||
}
|
|
||||||
|
|
||||||
finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssFailed, result.errorMsg);
|
/* For regular failures, we don't care about the error
|
||||||
|
message. */
|
||||||
|
if (result.status != RemoteResult::rrMiscFailure) result.errorMsg = "";
|
||||||
|
|
||||||
|
if (!cachedFailure) {
|
||||||
|
|
||||||
|
/* Create failed build steps for every build that depends
|
||||||
|
on this. */
|
||||||
|
for (auto build2 : dependents) {
|
||||||
|
if (build == build2) continue;
|
||||||
|
createBuildStep(txn, result.stopTime, build2, step, machine->sshName, bssFailed, result.errorMsg, build->id);
|
||||||
|
}
|
||||||
|
|
||||||
|
finishBuildStep(txn, result.startTime, result.stopTime, build->id, stepNr, machine->sshName, bssFailed, result.errorMsg);
|
||||||
|
}
|
||||||
|
|
||||||
/* Mark all builds that depend on this derivation as failed. */
|
/* Mark all builds that depend on this derivation as failed. */
|
||||||
for (auto build2 : dependents) {
|
for (auto build2 : dependents) {
|
||||||
printMsg(lvlError, format("marking build %1% as failed") % build2->id);
|
printMsg(lvlError, format("marking build %1% as failed") % build2->id);
|
||||||
txn.parameterized
|
txn.parameterized
|
||||||
("update Builds set finished = 1, busy = 0, isCachedBuild = 0, buildStatus = $2, startTime = $3, stopTime = $4 where id = $1")
|
("update Builds set finished = 1, busy = 0, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5 where id = $1")
|
||||||
(build2->id)
|
(build2->id)
|
||||||
((int) (build2->drvPath == step->drvPath ? bsFailed : bsDepFailed))
|
((int) (build2->drvPath == step->drvPath ? bsFailed : bsDepFailed))
|
||||||
(result.startTime)
|
(result.startTime)
|
||||||
(result.stopTime).exec();
|
(result.stopTime)
|
||||||
|
(cachedFailure ? 1 : 0).exec();
|
||||||
build2->finishedInDB = true; // FIXME: txn might fail
|
build2->finishedInDB = true; // FIXME: txn might fail
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Remember failed paths in the database so that they
|
||||||
|
won't be built again. */
|
||||||
|
if (!cachedFailure && result.status == RemoteResult::rrPermanentFailure)
|
||||||
|
for (auto & path : outputPaths(step->drv))
|
||||||
|
txn.parameterized("insert into FailedPaths values ($1)")(path).exec();
|
||||||
}
|
}
|
||||||
|
|
||||||
txn.commit();
|
txn.commit();
|
||||||
|
|
|
@ -45,7 +45,7 @@ sub clear_queue_non_current : Chained('admin') PathPart('clear-queue-non-current
|
||||||
|
|
||||||
sub clearfailedcache : Chained('admin') PathPart('clear-failed-cache') Args(0) {
|
sub clearfailedcache : Chained('admin') PathPart('clear-failed-cache') Args(0) {
|
||||||
my ($self, $c) = @_;
|
my ($self, $c) = @_;
|
||||||
my $r = `nix-store --clear-failed-paths '*'`;
|
$c->model('DB::FailedPaths')->delete;
|
||||||
$c->res->redirect($c->request->referer // "/");
|
$c->res->redirect($c->request->referer // "/");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -465,9 +465,10 @@ sub restartBuilds($$) {
|
||||||
# !!! Should do this in a trigger.
|
# !!! Should do this in a trigger.
|
||||||
$db->resultset('JobsetEvals')->search({ build => \@buildIds }, { join => 'buildIds' })->update({ nrsucceeded => undef });
|
$db->resultset('JobsetEvals')->search({ build => \@buildIds }, { join => 'buildIds' })->update({ nrsucceeded => undef });
|
||||||
|
|
||||||
# Clear Nix's negative failure cache.
|
# Clear the failed paths cache.
|
||||||
# FIXME: Add this to the API.
|
# FIXME: Add this to the API.
|
||||||
system("nix-store", "--clear-failed-paths", @paths);
|
# FIXME: clear the dependencies?
|
||||||
|
$db->resultset('FailedPaths')->search({ path => [ @paths ]})->delete;
|
||||||
});
|
});
|
||||||
|
|
||||||
return scalar(@buildIds);
|
return scalar(@buildIds);
|
||||||
|
|
65
src/lib/Hydra/Schema/FailedPaths.pm
Normal file
65
src/lib/Hydra/Schema/FailedPaths.pm
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
use utf8;
|
||||||
|
package Hydra::Schema::FailedPaths;
|
||||||
|
|
||||||
|
# Created by DBIx::Class::Schema::Loader
|
||||||
|
# DO NOT MODIFY THE FIRST PART OF THIS FILE
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
Hydra::Schema::FailedPaths
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
use base 'DBIx::Class::Core';
|
||||||
|
|
||||||
|
=head1 COMPONENTS LOADED
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item * L<Hydra::Component::ToJSON>
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
__PACKAGE__->load_components("+Hydra::Component::ToJSON");
|
||||||
|
|
||||||
|
=head1 TABLE: C<FailedPaths>
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
__PACKAGE__->table("FailedPaths");
|
||||||
|
|
||||||
|
=head1 ACCESSORS
|
||||||
|
|
||||||
|
=head2 path
|
||||||
|
|
||||||
|
data_type: 'text'
|
||||||
|
is_nullable: 0
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
__PACKAGE__->add_columns("path", { data_type => "text", is_nullable => 0 });
|
||||||
|
|
||||||
|
=head1 PRIMARY KEY
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item * L</path>
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
__PACKAGE__->set_primary_key("path");
|
||||||
|
|
||||||
|
|
||||||
|
# Created by DBIx::Class::Schema::Loader v0.07033 @ 2015-06-10 14:48:16
|
||||||
|
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:WFgjfjH+szE6Ntcicmaflw
|
||||||
|
|
||||||
|
|
||||||
|
# You can replace this text with custom code or comments, and it will be preserved on regeneration
|
||||||
|
1;
|
|
@ -511,6 +511,22 @@ create table StarredJobs (
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
-- The output paths that have permanently failed.
|
||||||
|
create table FailedPaths (
|
||||||
|
path text primary key not null
|
||||||
|
);
|
||||||
|
|
||||||
|
#ifdef POSTGRESQL
|
||||||
|
|
||||||
|
-- Needed because Postgres doesn't have "ignore duplicate" or upsert
|
||||||
|
-- yet.
|
||||||
|
create rule IdempotentInsert as on insert to FailedPaths
|
||||||
|
where exists (select 1 from FailedPaths where path = new.path)
|
||||||
|
do instead nothing;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
-- Cache of the number of finished builds.
|
-- Cache of the number of finished builds.
|
||||||
create table NrBuilds (
|
create table NrBuilds (
|
||||||
what text primary key not null,
|
what text primary key not null,
|
||||||
|
|
Loading…
Reference in a new issue