Make the output size limit configurable
The maximum output size per build step (as the sum of the NARs of each output) can be set via hydra.conf, e.g. max-output-size = 1000000000 The default is 2 GiB. Also refactored the build error / status handling a bit.
This commit is contained in:
parent
dc790c5f7e
commit
4151be7e69
|
@ -261,21 +261,74 @@ void State::buildRemote(ref<Store> destStore,
|
||||||
if (sendDerivation) {
|
if (sendDerivation) {
|
||||||
if (res) {
|
if (res) {
|
||||||
result.errorMsg = (format("%1% on ‘%2%’") % readString(from) % machine->sshName).str();
|
result.errorMsg = (format("%1% on ‘%2%’") % readString(from) % machine->sshName).str();
|
||||||
if (res == 100) result.status = BuildResult::PermanentFailure;
|
if (res == 100) {
|
||||||
else if (res == 101) result.status = BuildResult::TimedOut;
|
result.stepStatus = bsFailed;
|
||||||
else result.status = BuildResult::MiscFailure;
|
result.canCache = true;
|
||||||
|
}
|
||||||
|
else if (res == 101) {
|
||||||
|
result.stepStatus = bsTimedOut;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result.stepStatus = bsAborted;
|
||||||
|
result.canRetry = true;
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
result.status = BuildResult::Built;
|
result.stepStatus = bsSuccess;
|
||||||
} else {
|
} else {
|
||||||
result.status = (BuildResult::Status) res;
|
|
||||||
result.errorMsg = readString(from);
|
result.errorMsg = readString(from);
|
||||||
if (!result.success()) return;
|
switch ((BuildResult::Status) res) {
|
||||||
|
case BuildResult::Built:
|
||||||
|
result.stepStatus = bsSuccess;
|
||||||
|
break;
|
||||||
|
case BuildResult::Substituted:
|
||||||
|
case BuildResult::AlreadyValid:
|
||||||
|
result.stepStatus = bsSuccess;
|
||||||
|
result.isCached = true;
|
||||||
|
break;
|
||||||
|
case BuildResult::PermanentFailure:
|
||||||
|
result.stepStatus = bsFailed;
|
||||||
|
result.canCache = true;
|
||||||
|
result.errorMsg = "";
|
||||||
|
break;
|
||||||
|
case BuildResult::InputRejected:
|
||||||
|
case BuildResult::OutputRejected:
|
||||||
|
result.stepStatus = bsFailed;
|
||||||
|
result.canCache = true;
|
||||||
|
break;
|
||||||
|
case BuildResult::TransientFailure:
|
||||||
|
result.stepStatus = bsFailed;
|
||||||
|
result.canRetry = true;
|
||||||
|
result.errorMsg = "";
|
||||||
|
break;
|
||||||
|
case BuildResult::CachedFailure: // cached on the build machine
|
||||||
|
result.stepStatus = bsCachedFailure;
|
||||||
|
result.canCache = true;
|
||||||
|
result.errorMsg = "";
|
||||||
|
break;
|
||||||
|
case BuildResult::TimedOut:
|
||||||
|
result.stepStatus = bsTimedOut;
|
||||||
|
result.errorMsg = "";
|
||||||
|
break;
|
||||||
|
case BuildResult::MiscFailure:
|
||||||
|
result.stepStatus = bsAborted;
|
||||||
|
result.canRetry = true;
|
||||||
|
break;
|
||||||
|
case BuildResult::LogLimitExceeded:
|
||||||
|
result.stepStatus = bsLogLimitExceeded;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
result.stepStatus = bsAborted;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
if (result.stepStatus != bsSuccess) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
result.errorMsg = "";
|
||||||
|
|
||||||
/* If the path was substituted or already valid, then we didn't
|
/* If the path was substituted or already valid, then we didn't
|
||||||
get a build log. */
|
get a build log. */
|
||||||
if (result.status == BuildResult::Substituted || result.status == BuildResult::AlreadyValid) {
|
if (result.isCached) {
|
||||||
printMsg(lvlInfo, format("outputs of ‘%1%’ substituted or already valid on ‘%2%’") % step->drvPath % machine->sshName);
|
printMsg(lvlInfo, format("outputs of ‘%1%’ substituted or already valid on ‘%2%’") % step->drvPath % machine->sshName);
|
||||||
unlink(result.logFile.c_str());
|
unlink(result.logFile.c_str());
|
||||||
result.logFile = "";
|
result.logFile = "";
|
||||||
|
@ -303,6 +356,11 @@ void State::buildRemote(ref<Store> destStore,
|
||||||
totalNarSize += readLongLong(from);
|
totalNarSize += readLongLong(from);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (totalNarSize > maxOutputSize) {
|
||||||
|
result.stepStatus = bsNarSizeLimitExceeded;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
printMsg(lvlDebug, format("copying outputs of ‘%s’ from ‘%s’ (%d bytes)")
|
printMsg(lvlDebug, format("copying outputs of ‘%s’ from ‘%s’ (%d bytes)")
|
||||||
% step->drvPath % machine->sshName % totalNarSize);
|
% step->drvPath % machine->sshName % totalNarSize);
|
||||||
|
|
||||||
|
|
|
@ -105,10 +105,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
|
|
||||||
/* If any of the outputs have previously failed, then don't bother
|
/* If any of the outputs have previously failed, then don't bother
|
||||||
building again. */
|
building again. */
|
||||||
bool cachedFailure = checkCachedFailure(step, *conn);
|
if (checkCachedFailure(step, *conn))
|
||||||
|
result.stepStatus = bsCachedFailure;
|
||||||
if (cachedFailure)
|
|
||||||
result.status = BuildResult::CachedFailure;
|
|
||||||
else {
|
else {
|
||||||
|
|
||||||
/* Create a build step record indicating that we started
|
/* Create a build step record indicating that we started
|
||||||
|
@ -124,12 +122,14 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
try {
|
try {
|
||||||
/* FIXME: referring builds may have conflicting timeouts. */
|
/* FIXME: referring builds may have conflicting timeouts. */
|
||||||
buildRemote(destStore, machine, step, build->maxSilentTime, build->buildTimeout, result);
|
buildRemote(destStore, machine, step, build->maxSilentTime, build->buildTimeout, result);
|
||||||
|
} catch (NoTokens & e) {
|
||||||
|
result.stepStatus = bsNarSizeLimitExceeded;
|
||||||
} catch (Error & e) {
|
} catch (Error & e) {
|
||||||
result.status = BuildResult::MiscFailure;
|
result.stepStatus = bsAborted;
|
||||||
result.errorMsg = e.msg();
|
result.errorMsg = e.msg();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.success())
|
if (result.stepStatus == bsSuccess)
|
||||||
res = getBuildOutput(destStore, ref<FSAccessor>(result.accessor), step->drv);
|
res = getBuildOutput(destStore, ref<FSAccessor>(result.accessor), step->drv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -159,7 +159,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
|
|
||||||
/* The step had a hopefully temporary failure (e.g. network
|
/* The step had a hopefully temporary failure (e.g. network
|
||||||
issue). Retry a number of times. */
|
issue). Retry a number of times. */
|
||||||
if (result.canRetry()) {
|
if (result.canRetry) {
|
||||||
printMsg(lvlError, format("possibly transient failure building ‘%1%’ on ‘%2%’: %3%")
|
printMsg(lvlError, format("possibly transient failure building ‘%1%’ on ‘%2%’: %3%")
|
||||||
% step->drvPath % machine->sshName % result.errorMsg);
|
% step->drvPath % machine->sshName % result.errorMsg);
|
||||||
bool retry;
|
bool retry;
|
||||||
|
@ -178,7 +178,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.success()) {
|
if (result.stepStatus == bsSuccess) {
|
||||||
|
|
||||||
/* Register success in the database for all Build objects that
|
/* Register success in the database for all Build objects that
|
||||||
have this step as the top-level step. Since the queue
|
have this step as the top-level step. Since the queue
|
||||||
|
@ -225,7 +225,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
build->id, stepNr, machine->sshName, bsSuccess);
|
build->id, stepNr, machine->sshName, bsSuccess);
|
||||||
|
|
||||||
for (auto & b : direct)
|
for (auto & b : direct)
|
||||||
markSucceededBuild(txn, b, res, build != b || result.status != BuildResult::Built,
|
markSucceededBuild(txn, b, res, build != b || result.isCached,
|
||||||
result.startTime, result.stopTime);
|
result.startTime, result.stopTime);
|
||||||
|
|
||||||
txn.commit();
|
txn.commit();
|
||||||
|
@ -309,38 +309,27 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
|
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(*conn);
|
||||||
|
|
||||||
BuildStatus buildStatus =
|
|
||||||
result.status == BuildResult::TimedOut ? bsTimedOut :
|
|
||||||
result.status == BuildResult::LogLimitExceeded ? bsLogLimitExceeded :
|
|
||||||
result.canRetry() ? bsAborted :
|
|
||||||
bsFailed;
|
|
||||||
|
|
||||||
/* For standard failures, we don't care about the error
|
/* For standard failures, we don't care about the error
|
||||||
message. */
|
message. */
|
||||||
if (result.status == BuildResult::PermanentFailure ||
|
if (result.stepStatus != bsAborted)
|
||||||
result.status == BuildResult::TransientFailure ||
|
|
||||||
result.status == BuildResult::CachedFailure ||
|
|
||||||
result.status == BuildResult::TimedOut ||
|
|
||||||
result.status == BuildResult::LogLimitExceeded)
|
|
||||||
result.errorMsg = "";
|
result.errorMsg = "";
|
||||||
|
|
||||||
/* Create failed build steps for every build that depends
|
/* Create failed build steps for every build that
|
||||||
on this. For cached failures, only create a step for
|
depends on this, except when this step is cached
|
||||||
builds that don't have this step as top-level
|
and is the top-level of that build (since then it's
|
||||||
(otherwise the user won't be able to see what caused
|
redundant with the build's isCachedBuild field). */
|
||||||
the build to fail). */
|
|
||||||
for (auto & build2 : indirect) {
|
for (auto & build2 : indirect) {
|
||||||
if ((cachedFailure && build2->drvPath == step->drvPath) ||
|
if ((result.stepStatus == bsCachedFailure && build2->drvPath == step->drvPath) ||
|
||||||
(!cachedFailure && build == build2) ||
|
(result.stepStatus != bsCachedFailure && build == build2) ||
|
||||||
build2->finishedInDB)
|
build2->finishedInDB)
|
||||||
continue;
|
continue;
|
||||||
createBuildStep(txn, 0, build2, step, machine->sshName,
|
createBuildStep(txn, 0, build2, step, machine->sshName,
|
||||||
buildStatus, result.errorMsg, build == build2 ? 0 : build->id);
|
result.stepStatus, result.errorMsg, build == build2 ? 0 : build->id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cachedFailure)
|
if (result.stepStatus != bsCachedFailure)
|
||||||
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead,
|
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead,
|
||||||
build->id, stepNr, machine->sshName, buildStatus, result.errorMsg);
|
build->id, stepNr, machine->sshName, result.stepStatus, result.errorMsg);
|
||||||
|
|
||||||
/* Mark all builds that depend on this derivation as failed. */
|
/* Mark all builds that depend on this derivation as failed. */
|
||||||
for (auto & build2 : indirect) {
|
for (auto & build2 : indirect) {
|
||||||
|
@ -349,16 +338,16 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
txn.parameterized
|
txn.parameterized
|
||||||
("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5 where id = $1 and finished = 0")
|
("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5 where id = $1 and finished = 0")
|
||||||
(build2->id)
|
(build2->id)
|
||||||
((int) (build2->drvPath != step->drvPath && buildStatus == bsFailed ? bsDepFailed : buildStatus))
|
((int) (build2->drvPath != step->drvPath && result.buildStatus() == bsFailed ? bsDepFailed : result.buildStatus()))
|
||||||
(result.startTime)
|
(result.startTime)
|
||||||
(result.stopTime)
|
(result.stopTime)
|
||||||
(cachedFailure ? 1 : 0).exec();
|
(result.stepStatus == bsCachedFailure ? 1 : 0).exec();
|
||||||
nrBuildsDone++;
|
nrBuildsDone++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remember failed paths in the database so that they
|
/* Remember failed paths in the database so that they
|
||||||
won't be built again. */
|
won't be built again. */
|
||||||
if (!cachedFailure && result.status == BuildResult::PermanentFailure)
|
if (result.stepStatus != bsCachedFailure && result.canCache)
|
||||||
for (auto & path : step->drv.outputPaths())
|
for (auto & path : step->drv.outputPaths())
|
||||||
txn.parameterized("insert into FailedPaths values ($1)")(path).exec();
|
txn.parameterized("insert into FailedPaths values ($1)")(path).exec();
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,11 @@ State::State()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::string s = hydraConfig["max-output-size"];
|
||||||
|
if (s != "") string2Int(s, maxOutputSize);
|
||||||
|
}
|
||||||
|
|
||||||
logDir = canonPath(hydraData + "/build-logs");
|
logDir = canonPath(hydraData + "/build-logs");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,20 +33,27 @@ typedef enum {
|
||||||
bsCachedFailure = 8, // steps only
|
bsCachedFailure = 8, // steps only
|
||||||
bsUnsupported = 9,
|
bsUnsupported = 9,
|
||||||
bsLogLimitExceeded = 10,
|
bsLogLimitExceeded = 10,
|
||||||
|
bsNarSizeLimitExceeded = 11,
|
||||||
bsBusy = 100, // not stored
|
bsBusy = 100, // not stored
|
||||||
} BuildStatus;
|
} BuildStatus;
|
||||||
|
|
||||||
|
|
||||||
struct RemoteResult : nix::BuildResult
|
struct RemoteResult
|
||||||
{
|
{
|
||||||
|
BuildStatus stepStatus = bsAborted;
|
||||||
|
bool canRetry = false; // for bsAborted
|
||||||
|
bool isCached = false; // for bsSucceed
|
||||||
|
bool canCache = false; // for bsFailed
|
||||||
|
std::string errorMsg; // for bsAborted
|
||||||
|
|
||||||
time_t startTime = 0, stopTime = 0;
|
time_t startTime = 0, stopTime = 0;
|
||||||
unsigned int overhead = 0;
|
unsigned int overhead = 0;
|
||||||
nix::Path logFile;
|
nix::Path logFile;
|
||||||
std::shared_ptr<nix::FSAccessor> accessor;
|
std::shared_ptr<nix::FSAccessor> accessor;
|
||||||
|
|
||||||
bool canRetry()
|
BuildStatus buildStatus()
|
||||||
{
|
{
|
||||||
return status == TransientFailure || status == MiscFailure;
|
return stepStatus == bsCachedFailure ? bsFailed : stepStatus;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -350,6 +357,8 @@ private:
|
||||||
tokens are available. */
|
tokens are available. */
|
||||||
nix::TokenServer memoryTokens;
|
nix::TokenServer memoryTokens;
|
||||||
|
|
||||||
|
size_t maxOutputSize = 2ULL << 30;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
State();
|
State();
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,8 @@ FOR step IN steps; IF step.busy; busy = 1; END; END;
|
||||||
<span class="error">Unsupported system type</span>
|
<span class="error">Unsupported system type</span>
|
||||||
[% ELSIF step.status == 10 %]
|
[% ELSIF step.status == 10 %]
|
||||||
<span class="error">Log limit exceeded</span>
|
<span class="error">Log limit exceeded</span>
|
||||||
|
[% ELSIF step.status == 11 %]
|
||||||
|
<span class="error">Output limit exceeded</span>
|
||||||
[% ELSIF step.errormsg %]
|
[% ELSIF step.errormsg %]
|
||||||
<span class="error">Failed: [% HTML.escape(step.errormsg) %]</span>
|
<span class="error">Failed: [% HTML.escape(step.errormsg) %]</span>
|
||||||
[% ELSE %]
|
[% ELSE %]
|
||||||
|
|
|
@ -207,6 +207,8 @@ BLOCK renderBuildStatusIcon;
|
||||||
<img src="[% c.uri_for("/static/images/warning_${size}.png") %]" alt="Timed out" class="build-status" />
|
<img src="[% c.uri_for("/static/images/warning_${size}.png") %]" alt="Timed out" class="build-status" />
|
||||||
[% ELSIF buildstatus == 10 %]
|
[% ELSIF buildstatus == 10 %]
|
||||||
<img src="[% c.uri_for("/static/images/warning_${size}.png") %]" alt="Log limit exceeded" class="build-status" />
|
<img src="[% c.uri_for("/static/images/warning_${size}.png") %]" alt="Log limit exceeded" class="build-status" />
|
||||||
|
[% ELSIF buildstatus == 11 %]
|
||||||
|
<img src="[% c.uri_for("/static/images/warning_${size}.png") %]" alt="Output size limit exceeded" class="build-status" />
|
||||||
[% ELSE %]
|
[% ELSE %]
|
||||||
<img src="[% c.uri_for("/static/images/error_${size}.png") %]" alt="Failed" class="build-status" />
|
<img src="[% c.uri_for("/static/images/error_${size}.png") %]" alt="Failed" class="build-status" />
|
||||||
[% END;
|
[% END;
|
||||||
|
@ -236,6 +238,8 @@ BLOCK renderStatus;
|
||||||
<span class="error">Unsupported system type</span>
|
<span class="error">Unsupported system type</span>
|
||||||
[% ELSIF buildstatus == 10 %]
|
[% ELSIF buildstatus == 10 %]
|
||||||
<span class="error">Log limit exceeded</span>
|
<span class="error">Log limit exceeded</span>
|
||||||
|
[% ELSIF buildstatus == 11 %]
|
||||||
|
<span class="error">Output limit exceeded</span>
|
||||||
[% ELSE %]
|
[% ELSE %]
|
||||||
<span class="error">Aborted</span>
|
<span class="error">Aborted</span>
|
||||||
(Hydra failure; see <a href="#nix-error">below</a>)
|
(Hydra failure; see <a href="#nix-error">below</a>)
|
||||||
|
|
|
@ -192,6 +192,7 @@ create table Builds (
|
||||||
-- 8 = cached failure [steps only; builds use isCachedBuild]
|
-- 8 = cached failure [steps only; builds use isCachedBuild]
|
||||||
-- 9 = unsupported system type
|
-- 9 = unsupported system type
|
||||||
-- 10 = log limit exceeded
|
-- 10 = log limit exceeded
|
||||||
|
-- 11 = NAR size limit exceeded
|
||||||
buildStatus integer,
|
buildStatus integer,
|
||||||
|
|
||||||
size bigint,
|
size bigint,
|
||||||
|
|
Loading…
Reference in a new issue