Allow determinism checking for entire jobsets
Setting xxx-jobset-repeats = patchelf:master:2 will cause Hydra to perform every build step in the specified jobset 2 additional times (i.e. 3 times in total). Non-determinism is not fatal unless the derivation has the attribute "isDeterministic = true"; we just note the lack of determinism in the Hydra database. This will allow us to get stats about the (lack of) reproducibility of all of Nixpkgs.
This commit is contained in:
parent
8bb36e79bd
commit
f6081668dc
9 changed files with 98 additions and 36 deletions
|
@ -116,7 +116,7 @@ static void copyClosureTo(ref<Store> destStore,
|
||||||
|
|
||||||
void State::buildRemote(ref<Store> destStore,
|
void State::buildRemote(ref<Store> destStore,
|
||||||
Machine::ptr machine, Step::ptr step,
|
Machine::ptr machine, Step::ptr step,
|
||||||
unsigned int maxSilentTime, unsigned int buildTimeout,
|
unsigned int maxSilentTime, unsigned int buildTimeout, unsigned int repeats,
|
||||||
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep)
|
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep)
|
||||||
{
|
{
|
||||||
assert(BuildResult::TimedOut == 8);
|
assert(BuildResult::TimedOut == 8);
|
||||||
|
@ -263,9 +263,10 @@ void State::buildRemote(ref<Store> destStore,
|
||||||
to << maxSilentTime << buildTimeout;
|
to << maxSilentTime << buildTimeout;
|
||||||
if (GET_PROTOCOL_MINOR(remoteVersion) >= 2)
|
if (GET_PROTOCOL_MINOR(remoteVersion) >= 2)
|
||||||
to << 64 * 1024 * 1024; // == maxLogSize
|
to << 64 * 1024 * 1024; // == maxLogSize
|
||||||
if (GET_PROTOCOL_MINOR(remoteVersion) >= 3)
|
if (GET_PROTOCOL_MINOR(remoteVersion) >= 3) {
|
||||||
// FIXME: make the number of repeats configurable.
|
to << repeats // == build-repeat
|
||||||
to << (step->isDeterministic ? 1 : 0);
|
<< step->isDeterministic; // == enforce-determinism
|
||||||
|
}
|
||||||
to.flush();
|
to.flush();
|
||||||
|
|
||||||
result.startTime = time(0);
|
result.startTime = time(0);
|
||||||
|
@ -295,6 +296,10 @@ void State::buildRemote(ref<Store> destStore,
|
||||||
result.stepStatus = bsSuccess;
|
result.stepStatus = bsSuccess;
|
||||||
} else {
|
} else {
|
||||||
result.errorMsg = readString(from);
|
result.errorMsg = readString(from);
|
||||||
|
if (GET_PROTOCOL_MINOR(remoteVersion) >= 3) {
|
||||||
|
result.timesBuilt = readInt(from);
|
||||||
|
result.isNonDeterministic = readInt(from);
|
||||||
|
}
|
||||||
switch ((BuildResult::Status) res) {
|
switch ((BuildResult::Status) res) {
|
||||||
case BuildResult::Built:
|
case BuildResult::Built:
|
||||||
result.stepStatus = bsSuccess;
|
result.stepStatus = bsSuccess;
|
||||||
|
|
|
@ -86,6 +86,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
BuildID buildId;
|
BuildID buildId;
|
||||||
Path buildDrvPath;
|
Path buildDrvPath;
|
||||||
unsigned int maxSilentTime, buildTimeout;
|
unsigned int maxSilentTime, buildTimeout;
|
||||||
|
unsigned int repeats = step->isDeterministic ? 1 : 0;
|
||||||
|
|
||||||
{
|
{
|
||||||
std::set<Build::ptr> dependents;
|
std::set<Build::ptr> dependents;
|
||||||
|
@ -113,6 +114,11 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
build = build2;
|
build = build2;
|
||||||
enqueueNotificationItem({NotificationItem::Type::BuildStarted, build->id});
|
enqueueNotificationItem({NotificationItem::Type::BuildStarted, build->id});
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
auto i = jobsetRepeats.find(std::make_pair(build2->projectName, build2->jobsetName));
|
||||||
|
if (i != jobsetRepeats.end())
|
||||||
|
repeats = std::max(repeats, i->second);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!build) build = *dependents.begin();
|
if (!build) build = *dependents.begin();
|
||||||
|
|
||||||
|
@ -121,8 +127,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
maxSilentTime = build->maxSilentTime;
|
maxSilentTime = build->maxSilentTime;
|
||||||
buildTimeout = build->buildTimeout;
|
buildTimeout = build->buildTimeout;
|
||||||
|
|
||||||
printMsg(lvlInfo, format("performing step ‘%1%’ on ‘%2%’ (needed by build %3% and %4% others)")
|
printInfo("performing step ‘%s’ %d times on ‘%s’ (needed by build %d and %d others)",
|
||||||
% step->drvPath % machine->sshName % buildId % (dependents.size() - 1));
|
step->drvPath, repeats + 1, machine->sshName, buildId, (dependents.size() - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool quit = buildId == buildOne && step->drvPath == buildDrvPath;
|
bool quit = buildId == buildOne && step->drvPath == buildDrvPath;
|
||||||
|
@ -162,7 +168,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
/* Do the build. */
|
/* Do the build. */
|
||||||
try {
|
try {
|
||||||
/* FIXME: referring builds may have conflicting timeouts. */
|
/* FIXME: referring builds may have conflicting timeouts. */
|
||||||
buildRemote(destStore, machine, step, maxSilentTime, buildTimeout, result, activeStep);
|
buildRemote(destStore, machine, step, maxSilentTime, buildTimeout, repeats, result, activeStep);
|
||||||
} catch (NoTokens & e) {
|
} catch (NoTokens & e) {
|
||||||
result.stepStatus = bsNarSizeLimitExceeded;
|
result.stepStatus = bsNarSizeLimitExceeded;
|
||||||
} catch (Error & e) {
|
} catch (Error & e) {
|
||||||
|
@ -224,8 +230,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
auto mc = startDbUpdate();
|
auto mc = startDbUpdate();
|
||||||
{
|
{
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(*conn);
|
||||||
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead, buildId,
|
finishBuildStep(txn, result, buildId, stepNr, machine->sshName);
|
||||||
stepNr, machine->sshName, result.stepStatus, result.errorMsg);
|
|
||||||
txn.commit();
|
txn.commit();
|
||||||
}
|
}
|
||||||
stepFinished = true;
|
stepFinished = true;
|
||||||
|
@ -279,8 +284,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
|
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(*conn);
|
||||||
|
|
||||||
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead,
|
finishBuildStep(txn, result, buildId, stepNr, machine->sshName);
|
||||||
buildId, stepNr, machine->sshName, bsSuccess);
|
|
||||||
|
|
||||||
for (auto & b : direct) {
|
for (auto & b : direct) {
|
||||||
printMsg(lvlInfo, format("marking build %1% as succeeded") % b->id);
|
printMsg(lvlInfo, format("marking build %1% as succeeded") % b->id);
|
||||||
|
@ -386,8 +390,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
|
|
||||||
if (result.stepStatus != bsCachedFailure && !stepFinished) {
|
if (result.stepStatus != bsCachedFailure && !stepFinished) {
|
||||||
assert(stepNr);
|
assert(stepNr);
|
||||||
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead,
|
finishBuildStep(txn, result, buildId, stepNr, machine->sshName);
|
||||||
buildId, stepNr, machine->sshName, result.stepStatus, result.errorMsg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Mark all builds that depend on this derivation as failed. */
|
/* Mark all builds that depend on this derivation as failed. */
|
||||||
|
|
|
@ -264,20 +264,21 @@ unsigned int State::createBuildStep(pqxx::work & txn, time_t startTime, BuildID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void State::finishBuildStep(pqxx::work & txn, time_t startTime, time_t stopTime, unsigned int overhead,
|
void State::finishBuildStep(pqxx::work & txn, const RemoteResult & result,
|
||||||
BuildID buildId, unsigned int stepNr, const std::string & machine, BuildStatus status,
|
BuildID buildId, unsigned int stepNr, const std::string & machine)
|
||||||
const std::string & errorMsg, BuildID propagatedFrom)
|
|
||||||
{
|
{
|
||||||
assert(startTime);
|
assert(result.startTime);
|
||||||
assert(stopTime);
|
assert(result.stopTime);
|
||||||
txn.parameterized
|
txn.parameterized
|
||||||
("update BuildSteps set busy = 0, status = $1, propagatedFrom = $4, errorMsg = $5, startTime = $6, stopTime = $7, machine = $8, overhead = $9 where build = $2 and stepnr = $3")
|
("update BuildSteps set busy = 0, status = $1, errorMsg = $4, startTime = $5, stopTime = $6, machine = $7, overhead = $8, timesBuilt = $9, isNonDeterministic = $10 where build = $2 and stepnr = $3")
|
||||||
((int) status)(buildId)(stepNr)
|
((int) result.stepStatus)(buildId)(stepNr)
|
||||||
(propagatedFrom, propagatedFrom != 0)
|
(result.errorMsg, result.errorMsg != "")
|
||||||
(errorMsg, errorMsg != "")
|
(result.startTime)(result.stopTime)
|
||||||
(startTime)(stopTime)
|
|
||||||
(machine, machine != "")
|
(machine, machine != "")
|
||||||
(overhead, overhead != 0).exec();
|
(result.overhead, result.overhead != 0)
|
||||||
|
(result.timesBuilt, result.timesBuilt > 0)
|
||||||
|
(result.isNonDeterministic, result.timesBuilt > 1)
|
||||||
|
.exec();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -809,6 +810,13 @@ void State::run(BuildID buildOne)
|
||||||
|
|
||||||
useSubstitutes = isTrue(hydraConfig["use-substitutes"]);
|
useSubstitutes = isTrue(hydraConfig["use-substitutes"]);
|
||||||
|
|
||||||
|
// FIXME: hacky mechanism for configuring determinism checks.
|
||||||
|
for (auto & s : tokenizeString<Strings>(hydraConfig["xxx-jobset-repeats"])) {
|
||||||
|
auto s2 = tokenizeString<std::vector<std::string>>(s, ":");
|
||||||
|
if (s2.size() != 3) throw Error("bad value in xxx-jobset-repeats");
|
||||||
|
jobsetRepeats.emplace(std::make_pair(s2[0], s2[1]), std::stoi(s2[2]));
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
auto conn(dbPool.get());
|
auto conn(dbPool.get());
|
||||||
clearBusy(*conn, 0);
|
clearBusy(*conn, 0);
|
||||||
|
|
|
@ -48,6 +48,9 @@ struct RemoteResult
|
||||||
bool canCache = false; // for bsFailed
|
bool canCache = false; // for bsFailed
|
||||||
std::string errorMsg; // for bsAborted
|
std::string errorMsg; // for bsAborted
|
||||||
|
|
||||||
|
unsigned int timesBuilt = 0;
|
||||||
|
bool isNonDeterministic = false;
|
||||||
|
|
||||||
time_t startTime = 0, stopTime = 0;
|
time_t startTime = 0, stopTime = 0;
|
||||||
unsigned int overhead = 0;
|
unsigned int overhead = 0;
|
||||||
nix::Path logFile;
|
nix::Path logFile;
|
||||||
|
@ -414,6 +417,10 @@ private:
|
||||||
from showing up as busy until the queue runner is restarted. */
|
from showing up as busy until the queue runner is restarted. */
|
||||||
nix::Sync<std::set<std::pair<BuildID, int>>> orphanedSteps;
|
nix::Sync<std::set<std::pair<BuildID, int>>> orphanedSteps;
|
||||||
|
|
||||||
|
/* How often the build steps of a jobset should be repeated in
|
||||||
|
order to detect non-determinism. */
|
||||||
|
std::map<std::pair<std::string, std::string>, unsigned int> jobsetRepeats;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
State();
|
State();
|
||||||
|
|
||||||
|
@ -437,10 +444,8 @@ private:
|
||||||
const std::string & machine, BuildStatus status, const std::string & errorMsg = "",
|
const std::string & machine, BuildStatus status, const std::string & errorMsg = "",
|
||||||
BuildID propagatedFrom = 0);
|
BuildID propagatedFrom = 0);
|
||||||
|
|
||||||
void finishBuildStep(pqxx::work & txn, time_t startTime, time_t stopTime,
|
void finishBuildStep(pqxx::work & txn, const RemoteResult & result, BuildID buildId, unsigned int stepNr,
|
||||||
unsigned int overhead, BuildID buildId, unsigned int stepNr,
|
const std::string & machine);
|
||||||
const std::string & machine, BuildStatus status, const std::string & errorMsg = "",
|
|
||||||
BuildID propagatedFrom = 0);
|
|
||||||
|
|
||||||
int createSubstitutionStep(pqxx::work & txn, time_t startTime, time_t stopTime,
|
int createSubstitutionStep(pqxx::work & txn, time_t startTime, time_t stopTime,
|
||||||
Build::ptr build, const nix::Path & drvPath, const std::string & outputName, const nix::Path & storePath);
|
Build::ptr build, const nix::Path & drvPath, const std::string & outputName, const nix::Path & storePath);
|
||||||
|
@ -492,6 +497,7 @@ private:
|
||||||
void buildRemote(nix::ref<nix::Store> destStore,
|
void buildRemote(nix::ref<nix::Store> destStore,
|
||||||
Machine::ptr machine, Step::ptr step,
|
Machine::ptr machine, Step::ptr step,
|
||||||
unsigned int maxSilentTime, unsigned int buildTimeout,
|
unsigned int maxSilentTime, unsigned int buildTimeout,
|
||||||
|
unsigned int repeats,
|
||||||
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep);
|
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep);
|
||||||
|
|
||||||
void markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
void markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
||||||
|
|
|
@ -103,6 +103,16 @@ __PACKAGE__->table("BuildSteps");
|
||||||
data_type: 'integer'
|
data_type: 'integer'
|
||||||
is_nullable: 1
|
is_nullable: 1
|
||||||
|
|
||||||
|
=head2 timesbuilt
|
||||||
|
|
||||||
|
data_type: 'integer'
|
||||||
|
is_nullable: 1
|
||||||
|
|
||||||
|
=head2 isnondeterministic
|
||||||
|
|
||||||
|
data_type: 'boolean'
|
||||||
|
is_nullable: 1
|
||||||
|
|
||||||
=cut
|
=cut
|
||||||
|
|
||||||
__PACKAGE__->add_columns(
|
__PACKAGE__->add_columns(
|
||||||
|
@ -132,6 +142,10 @@ __PACKAGE__->add_columns(
|
||||||
{ data_type => "integer", is_foreign_key => 1, is_nullable => 1 },
|
{ data_type => "integer", is_foreign_key => 1, is_nullable => 1 },
|
||||||
"overhead",
|
"overhead",
|
||||||
{ data_type => "integer", is_nullable => 1 },
|
{ data_type => "integer", is_nullable => 1 },
|
||||||
|
"timesbuilt",
|
||||||
|
{ data_type => "integer", is_nullable => 1 },
|
||||||
|
"isnondeterministic",
|
||||||
|
{ data_type => "boolean", is_nullable => 1 },
|
||||||
);
|
);
|
||||||
|
|
||||||
=head1 PRIMARY KEY
|
=head1 PRIMARY KEY
|
||||||
|
@ -201,8 +215,8 @@ __PACKAGE__->belongs_to(
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
# Created by DBIx::Class::Schema::Loader v0.07043 @ 2016-02-16 18:04:52
|
# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-12-07 13:48:19
|
||||||
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:TRALbEoaF/OIOyERYCyxkw
|
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:3FYkqSUfgWmiqZzmX8J4TA
|
||||||
|
|
||||||
my %hint = (
|
my %hint = (
|
||||||
columns => [
|
columns => [
|
||||||
|
|
|
@ -50,13 +50,20 @@ FOR step IN steps; IF step.busy; busy = 1; END; END;
|
||||||
END %]
|
END %]
|
||||||
</td>
|
</td>
|
||||||
<td>[% IF step.busy == 1 || ((step.machine || step.starttime) && (step.status == 0 || step.status == 1 || step.status == 3 || step.status == 4 || step.status == 7)); INCLUDE renderMachineName machine=step.machine; ELSE; "<em>n/a</em>"; END %]</td>
|
<td>[% IF step.busy == 1 || ((step.machine || step.starttime) && (step.status == 0 || step.status == 1 || step.status == 3 || step.status == 4 || step.status == 7)); INCLUDE renderMachineName machine=step.machine; ELSE; "<em>n/a</em>"; END %]</td>
|
||||||
<td>
|
<td class="step-status">
|
||||||
[% IF step.busy == 1 %]
|
[% IF step.busy == 1 %]
|
||||||
<strong>Building</strong>
|
<strong>Building</strong>
|
||||||
[% ELSIF step.status == 0 %]
|
[% ELSIF step.status == 0 %]
|
||||||
Succeeded
|
[% IF step.isnondeterministic %]
|
||||||
|
<span class="warn">Succeeded with non-determistic result</span>
|
||||||
|
[% ELSE %]
|
||||||
|
Succeeded
|
||||||
|
[% END %]
|
||||||
|
[% IF step.timesbuilt > 1 %]
|
||||||
|
([% step.timesbuilt %] times)
|
||||||
|
[% END %]
|
||||||
[% ELSIF step.status == 3 %]
|
[% ELSIF step.status == 3 %]
|
||||||
<span class="error"><strong>Aborted</strong>[% IF step.errormsg %]: [% HTML.escape(step.errormsg); END %]</span>
|
<span class="error">Aborted</span>[% IF step.errormsg %]: <em>[% HTML.escape(step.errormsg) %]</em>[% END %]
|
||||||
[% ELSIF step.status == 4 %]
|
[% ELSIF step.status == 4 %]
|
||||||
<span class="error">Cancelled</span>
|
<span class="error">Cancelled</span>
|
||||||
[% ELSIF step.status == 7 %]
|
[% ELSIF step.status == 7 %]
|
||||||
|
@ -70,9 +77,9 @@ FOR step IN steps; IF step.busy; busy = 1; END; END;
|
||||||
[% ELSIF step.status == 11 %]
|
[% ELSIF step.status == 11 %]
|
||||||
<span class="error">Output limit exceeded</span>
|
<span class="error">Output limit exceeded</span>
|
||||||
[% ELSIF step.status == 12 %]
|
[% ELSIF step.status == 12 %]
|
||||||
<span class="error">Non-deterministic build</span>
|
<span class="error">Non-determinism detected</span> [% IF step.timesbuilt %] after [% step.timesbuilt %] times[% END %]
|
||||||
[% ELSIF step.errormsg %]
|
[% ELSIF step.errormsg %]
|
||||||
<span class="error">Failed: [% HTML.escape(step.errormsg) %]</span>
|
<span class="error">Failed</span>: <em>[% HTML.escape(step.errormsg) %]</em>
|
||||||
[% ELSE %]
|
[% ELSE %]
|
||||||
<span class="error">Failed</span>
|
<span class="error">Failed</span>
|
||||||
[% END %]
|
[% END %]
|
||||||
|
@ -137,7 +144,7 @@ FOR step IN steps; IF step.busy; busy = 1; END; END;
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
[% INCLUDE renderBuildStatusIcon size=128, build=build %]
|
[% INCLUDE renderBuildStatusIcon size=128 build=build %]
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
<table class="info-table">
|
<table class="info-table">
|
||||||
|
|
|
@ -132,3 +132,13 @@ div.flot-tooltip {
|
||||||
opacity: 0.80;
|
opacity: 0.80;
|
||||||
z-index: 100;
|
z-index: 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
td.step-status span.error {
|
||||||
|
color: red;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
td.step-status span.warn {
|
||||||
|
color: #aaaa00;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
|
@ -289,6 +289,12 @@ create table BuildSteps (
|
||||||
-- Time in milliseconds spend copying stuff from/to build machines.
|
-- Time in milliseconds spend copying stuff from/to build machines.
|
||||||
overhead integer,
|
overhead integer,
|
||||||
|
|
||||||
|
-- How many times this build step was done (for checking determinism).
|
||||||
|
timesBuilt integer,
|
||||||
|
|
||||||
|
-- Whether this build step produced different results when repeated.
|
||||||
|
isNonDeterministic boolean,
|
||||||
|
|
||||||
primary key (build, stepnr),
|
primary key (build, stepnr),
|
||||||
foreign key (build) references Builds(id) on delete cascade,
|
foreign key (build) references Builds(id) on delete cascade,
|
||||||
foreign key (propagatedFrom) references Builds(id) on delete cascade
|
foreign key (propagatedFrom) references Builds(id) on delete cascade
|
||||||
|
|
3
src/sql/upgrade-52.sql
Normal file
3
src/sql/upgrade-52.sql
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
alter table BuildSteps
|
||||||
|
add column timesBuilt integer,
|
||||||
|
add column isNonDeterministic boolean;
|
Loading…
Reference in a new issue