Merge pull request #6284 from obsidiansystems/generlized-derivation-type

Generalize DerivationType in preparation for impure derivations
This commit is contained in:
Théophane Hufschmitt 2022-03-18 16:36:32 +01:00 committed by GitHub
commit 8ad485ea89
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 159 additions and 114 deletions

View file

@ -300,7 +300,7 @@ connected:
std::set<Realisation> missingRealisations;
StorePathSet missingPaths;
if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && !derivationHasKnownOutputPaths(drv.type())) {
if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && !drv.type().hasKnownOutputPaths()) {
for (auto & outputName : wantedOutputs) {
auto thisOutputHash = outputHashes.at(outputName);
auto thisOutputId = DrvOutput{ thisOutputHash, outputName };

View file

@ -1194,32 +1194,26 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * *
for (auto & i : outputs) {
drv.env[i] = "";
drv.outputs.insert_or_assign(i,
DerivationOutput::InputAddressed {
.path = StorePath::dummy,
});
DerivationOutput::Deferred { });
}
// Regular, non-CA derivation should always return a single hash and not
// hash per output.
auto hashModulo = hashDerivationModulo(*state.store, Derivation(drv), true);
auto hashModulo = hashDerivationModulo(*state.store, drv, true);
std::visit(overloaded {
[&](const DrvHash & drvHash) {
auto & h = drvHash.hash;
switch (drvHash.kind) {
case DrvHash::Kind::Deferred:
for (auto & i : outputs) {
drv.outputs.insert_or_assign(i,
DerivationOutput::Deferred { });
}
/* Outputs already deferred, nothing to do */
break;
case DrvHash::Kind::Regular:
for (auto & i : outputs) {
auto outPath = state.store->makeOutputPath(i, h, drvName);
drv.env[i] = state.store->printStorePath(outPath);
drv.outputs.insert_or_assign(i,
DerivationOutput::InputAddressed {
.path = std::move(outPath),
});
for (auto & [outputName, output] : drv.outputs) {
auto outPath = state.store->makeOutputPath(outputName, h, drvName);
drv.env[outputName] = state.store->printStorePath(outPath);
output = DerivationOutput::InputAddressed {
.path = std::move(outPath),
};
}
break;
}
@ -1241,12 +1235,9 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * *
/* Optimisation, but required in read-only mode! because in that
case we don't actually write store derivations, so we can't
read them later.
However, we don't bother doing this for floating CA derivations because
their "hash modulo" is indeterminate until built. */
if (drv.type() != DerivationType::CAFloating) {
auto h = hashDerivationModulo(*state.store, Derivation(drv), false);
read them later. */
{
auto h = hashDerivationModulo(*state.store, drv, false);
drvHashes.lock()->insert_or_assign(drvPath, h);
}

View file

@ -204,7 +204,7 @@ void DerivationGoal::haveDerivation()
{
trace("have derivation");
if (drv->type() == DerivationType::CAFloating)
if (!drv->type().hasKnownOutputPaths())
settings.requireExperimentalFeature(Xp::CaDerivations);
retrySubstitution = false;
@ -440,9 +440,28 @@ void DerivationGoal::inputsRealised()
if (useDerivation) {
auto & fullDrv = *dynamic_cast<Derivation *>(drv.get());
if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) &&
((!fullDrv.inputDrvs.empty() && derivationIsCA(fullDrv.type()))
|| fullDrv.type() == DerivationType::DeferredInputAddressed)) {
auto drvType = fullDrv.type();
bool resolveDrv = std::visit(overloaded {
[&](const DerivationType::InputAddressed & ia) {
/* must resolve if deferred. */
return ia.deferred;
},
[&](const DerivationType::ContentAddressed & ca) {
return !fullDrv.inputDrvs.empty() && (
ca.fixed
/* Can optionally resolve if fixed, which is good
for avoiding unnecessary rebuilds. */
? settings.isExperimentalFeatureEnabled(Xp::CaDerivations)
/* Must resolve if floating and there are any inputs
drvs. */
: true);
},
}, drvType.raw());
if (resolveDrv)
{
settings.requireExperimentalFeature(Xp::CaDerivations);
/* We are be able to resolve this derivation based on the
now-known results of dependencies. If so, we become a stub goal
aliasing that resolved derivation goal */
@ -501,7 +520,7 @@ void DerivationGoal::inputsRealised()
/* Don't repeat fixed-output derivations since they're already
verified by their output hash.*/
nrRounds = derivationIsFixed(derivationType) ? 1 : settings.buildRepeat + 1;
nrRounds = derivationType.isFixed() ? 1 : settings.buildRepeat + 1;
/* Okay, try to build. Note that here we don't wait for a build
slot to become available, since we don't need one if there is a
@ -908,7 +927,7 @@ void DerivationGoal::buildDone()
st =
dynamic_cast<NotDeterministic*>(&e) ? BuildResult::NotDeterministic :
statusOk(status) ? BuildResult::OutputRejected :
derivationIsImpure(derivationType) || diskFull ? BuildResult::TransientFailure :
derivationType.isImpure() || diskFull ? BuildResult::TransientFailure :
BuildResult::PermanentFailure;
}
@ -1221,7 +1240,7 @@ void DerivationGoal::flushLine()
std::map<std::string, std::optional<StorePath>> DerivationGoal::queryPartialDerivationOutputMap()
{
if (!useDerivation || drv->type() != DerivationType::CAFloating) {
if (!useDerivation || drv->type().hasKnownOutputPaths()) {
std::map<std::string, std::optional<StorePath>> res;
for (auto & [name, output] : drv->outputs)
res.insert_or_assign(name, output.path(worker.store, drv->name, name));
@ -1233,7 +1252,7 @@ std::map<std::string, std::optional<StorePath>> DerivationGoal::queryPartialDeri
OutputPathMap DerivationGoal::queryDerivationOutputMap()
{
if (!useDerivation || drv->type() != DerivationType::CAFloating) {
if (!useDerivation || drv->type().hasKnownOutputPaths()) {
OutputPathMap res;
for (auto & [name, output] : drv->outputsAndOptPaths(worker.store))
res.insert_or_assign(name, *output.second);

View file

@ -395,7 +395,7 @@ void LocalDerivationGoal::startBuilder()
else if (settings.sandboxMode == smDisabled)
useChroot = false;
else if (settings.sandboxMode == smRelaxed)
useChroot = !(derivationIsImpure(derivationType)) && !noChroot;
useChroot = !(derivationType.isImpure()) && !noChroot;
}
auto & localStore = getLocalStore();
@ -608,7 +608,7 @@ void LocalDerivationGoal::startBuilder()
"nogroup:x:65534:\n", sandboxGid()));
/* Create /etc/hosts with localhost entry. */
if (!(derivationIsImpure(derivationType)))
if (!(derivationType.isImpure()))
writeFile(chrootRootDir + "/etc/hosts", "127.0.0.1 localhost\n::1 localhost\n");
/* Make the closure of the inputs available in the chroot,
@ -796,7 +796,7 @@ void LocalDerivationGoal::startBuilder()
us.
*/
if (!(derivationIsImpure(derivationType)))
if (!(derivationType.isImpure()))
privateNetwork = true;
userNamespaceSync.create();
@ -1049,7 +1049,7 @@ void LocalDerivationGoal::initEnv()
derivation, tell the builder, so that for instance `fetchurl'
can skip checking the output. On older Nixes, this environment
variable won't be set, so `fetchurl' will do the check. */
if (derivationIsFixed(derivationType)) env["NIX_OUTPUT_CHECKED"] = "1";
if (derivationType.isFixed()) env["NIX_OUTPUT_CHECKED"] = "1";
/* *Only* if this is a fixed-output derivation, propagate the
values of the environment variables specified in the
@ -1060,7 +1060,7 @@ void LocalDerivationGoal::initEnv()
to the builder is generally impure, but the output of
fixed-output derivations is by definition pure (since we
already know the cryptographic hash of the output). */
if (derivationIsImpure(derivationType)) {
if (derivationType.isImpure()) {
for (auto & i : parsedDrv->getStringsAttr("impureEnvVars").value_or(Strings()))
env[i] = getEnv(i).value_or("");
}
@ -1674,7 +1674,7 @@ void LocalDerivationGoal::runChild()
/* Fixed-output derivations typically need to access the
network, so give them access to /etc/resolv.conf and so
on. */
if (derivationIsImpure(derivationType)) {
if (derivationType.isImpure()) {
// Only use nss functions to resolve hosts and
// services. Dont use it for anything else that may
// be configured for this system. This limits the
@ -1918,7 +1918,7 @@ void LocalDerivationGoal::runChild()
sandboxProfile += "(import \"sandbox-defaults.sb\")\n";
if (derivationIsImpure(derivationType))
if (derivationType.isImpure())
sandboxProfile += "(import \"sandbox-network.sb\")\n";
/* Add the output paths we'll use at build-time to the chroot */

View file

@ -560,6 +560,8 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
BuildMode buildMode = (BuildMode) readInt(from);
logger->startWork();
auto drvType = drv.type();
/* Content-addressed derivations are trustless because their output paths
are verified by their content alone, so any derivation is free to
try to produce such a path.
@ -592,12 +594,12 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
derivations, we throw out the precomputed output paths and just
store the hashes, so there aren't two competing sources of truth an
attacker could exploit. */
if (drv.type() == DerivationType::InputAddressed && !trusted)
if (!(drvType.isCA() || trusted))
throw Error("you are not privileged to build input-addressed derivations");
/* Make sure that the non-input-addressed derivations that got this far
are in fact content-addressed if we don't trust them. */
assert(derivationIsCA(drv.type()) || trusted);
assert(drvType.isCA() || trusted);
/* Recompute the derivation path when we cannot trust the original. */
if (!trusted) {
@ -606,7 +608,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
original not-necessarily-resolved derivation to verify the drv
derivation as adequate claim to the input-addressed output
paths. */
assert(derivationIsCA(drv.type()));
assert(drvType.isCA());
Derivation drv2;
static_cast<BasicDerivation &>(drv2) = drv;

View file

@ -36,47 +36,46 @@ StorePath DerivationOutput::CAFixed::path(const Store & store, std::string_view
}
bool derivationIsCA(DerivationType dt) {
switch (dt) {
case DerivationType::InputAddressed: return false;
case DerivationType::CAFixed: return true;
case DerivationType::CAFloating: return true;
case DerivationType::DeferredInputAddressed: return false;
};
// Since enums can have non-variant values, but making a `default:` would
// disable exhaustiveness warnings.
assert(false);
bool DerivationType::isCA() const {
/* Normally we do the full `std::visit` to make sure we have
exhaustively handled all variants, but so long as there is a
variant called `ContentAddressed`, it must be the only one for
which `isCA` is true for this to make sense!. */
return std::holds_alternative<ContentAddressed>(raw());
}
bool derivationIsFixed(DerivationType dt) {
switch (dt) {
case DerivationType::InputAddressed: return false;
case DerivationType::CAFixed: return true;
case DerivationType::CAFloating: return false;
case DerivationType::DeferredInputAddressed: return false;
};
assert(false);
bool DerivationType::isFixed() const {
return std::visit(overloaded {
[](const InputAddressed & ia) {
return false;
},
[](const ContentAddressed & ca) {
return ca.fixed;
},
}, raw());
}
bool derivationHasKnownOutputPaths(DerivationType dt) {
switch (dt) {
case DerivationType::InputAddressed: return true;
case DerivationType::CAFixed: return true;
case DerivationType::CAFloating: return false;
case DerivationType::DeferredInputAddressed: return false;
};
assert(false);
bool DerivationType::hasKnownOutputPaths() const {
return std::visit(overloaded {
[](const InputAddressed & ia) {
return !ia.deferred;
},
[](const ContentAddressed & ca) {
return ca.fixed;
},
}, raw());
}
bool derivationIsImpure(DerivationType dt) {
switch (dt) {
case DerivationType::InputAddressed: return false;
case DerivationType::CAFixed: return true;
case DerivationType::CAFloating: return false;
case DerivationType::DeferredInputAddressed: return false;
};
assert(false);
bool DerivationType::isImpure() const {
return std::visit(overloaded {
[](const InputAddressed & ia) {
return false;
},
[](const ContentAddressed & ca) {
return !ca.pure;
},
}, raw());
}
@ -439,18 +438,28 @@ DerivationType BasicDerivation::type() const
if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
throw Error("Must have at least one output");
} else if (! inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
return DerivationType::InputAddressed;
return DerivationType::InputAddressed {
.deferred = false,
};
} else if (inputAddressedOutputs.empty() && ! fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
if (fixedCAOutputs.size() > 1)
// FIXME: Experimental feature?
throw Error("Only one fixed output is allowed for now");
if (*fixedCAOutputs.begin() != "out")
throw Error("Single fixed output must be named \"out\"");
return DerivationType::CAFixed;
return DerivationType::ContentAddressed {
.pure = false,
.fixed = true,
};
} else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && ! floatingCAOutputs.empty() && deferredIAOutputs.empty()) {
return DerivationType::CAFloating;
return DerivationType::ContentAddressed {
.pure = true,
.fixed = false,
};
} else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && !deferredIAOutputs.empty()) {
return DerivationType::DeferredInputAddressed;
return DerivationType::InputAddressed {
.deferred = true,
};
} else {
throw Error("Can't mix derivation output types");
}
@ -502,10 +511,10 @@ static const DrvHashModulo pathDerivationModulo(Store & store, const StorePath &
*/
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
{
auto kind = DrvHash::Kind::Regular;
auto type = drv.type();
/* Return a fixed hash for fixed-output derivations. */
switch (drv.type()) {
case DerivationType::CAFixed: {
if (type.isFixed()) {
std::map<std::string, Hash> outputHashes;
for (const auto & i : drv.outputs) {
auto & dof = std::get<DerivationOutput::CAFixed>(i.second.raw());
@ -517,14 +526,19 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m
}
return outputHashes;
}
case DerivationType::CAFloating:
kind = DrvHash::Kind::Deferred;
break;
case DerivationType::InputAddressed:
break;
case DerivationType::DeferredInputAddressed:
break;
}
auto kind = std::visit(overloaded {
[](const DerivationType::InputAddressed & ia) {
/* This might be a "pesimistically" deferred output, so we don't
"taint" the kind yet. */
return DrvHash::Kind::Regular;
},
[](const DerivationType::ContentAddressed & ca) {
return ca.fixed
? DrvHash::Kind::Regular
: DrvHash::Kind::Deferred;
},
}, drv.type().raw());
/* For other derivations, replace the inputs paths with recursive
calls to this function. */

View file

@ -85,30 +85,50 @@ typedef std::map<std::string, std::pair<DerivationOutput, std::optional<StorePat
output IDs we are interested in. */
typedef std::map<StorePath, StringSet> DerivationInputs;
enum struct DerivationType : uint8_t {
InputAddressed,
DeferredInputAddressed,
CAFixed,
CAFloating,
struct DerivationType_InputAddressed {
bool deferred;
};
/* Do the outputs of the derivation have paths calculated from their content,
or from the derivation itself? */
bool derivationIsCA(DerivationType);
struct DerivationType_ContentAddressed {
bool pure;
bool fixed;
};
/* Is the content of the outputs fixed a-priori via a hash? Never true for
non-CA derivations. */
bool derivationIsFixed(DerivationType);
typedef std::variant<
DerivationType_InputAddressed,
DerivationType_ContentAddressed
> _DerivationTypeRaw;
/* Is the derivation impure and needs to access non-deterministic resources, or
pure and can be sandboxed? Note that whether or not we actually sandbox the
derivation is controlled separately. Never true for non-CA derivations. */
bool derivationIsImpure(DerivationType);
struct DerivationType : _DerivationTypeRaw {
using Raw = _DerivationTypeRaw;
using Raw::Raw;
using InputAddressed = DerivationType_InputAddressed;
using ContentAddressed = DerivationType_ContentAddressed;
/* Does the derivation knows its own output paths?
* Only true when there's no floating-ca derivation involved in the closure.
*/
bool derivationHasKnownOutputPaths(DerivationType);
/* Do the outputs of the derivation have paths calculated from their content,
or from the derivation itself? */
bool isCA() const;
/* Is the content of the outputs fixed a-priori via a hash? Never true for
non-CA derivations. */
bool isFixed() const;
/* Is the derivation impure and needs to access non-deterministic resources, or
pure and can be sandboxed? Note that whether or not we actually sandbox the
derivation is controlled separately. Never true for non-CA derivations. */
bool isImpure() const;
/* Does the derivation knows its own output paths?
Only true when there's no floating-ca derivation involved in the
closure, or if fixed output.
*/
bool hasKnownOutputPaths() const;
inline const Raw & raw() const {
return static_cast<const Raw &>(*this);
}
};
struct BasicDerivation
{
@ -189,11 +209,11 @@ typedef std::map<std::string, Hash> CaOutputHashes;
struct DrvHash {
Hash hash;
enum struct Kind {
enum struct Kind: bool {
// Statically determined derivations.
// This hash will be directly used to compute the output paths
Regular,
// Floating-output derivations (and their dependencies).
// Floating-output derivations (and their reverse dependencies).
Deferred,
};

View file

@ -718,6 +718,7 @@ void LocalStore::checkDerivationOutputs(const StorePath & drvPath, const Derivat
/* Nothing to check */
},
[&](const DerivationOutput::Deferred &) {
/* Nothing to check */
},
}, i.second.raw());
}

View file

@ -93,7 +93,7 @@ StringSet ParsedDerivation::getRequiredSystemFeatures() const
StringSet res;
for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings()))
res.insert(i);
if (!derivationHasKnownOutputPaths(drv.type()))
if (!drv.type().hasKnownOutputPaths())
res.insert("ca-derivations");
return res;
}

View file

@ -201,9 +201,7 @@ static StorePath getDerivationEnvironment(ref<Store> store, ref<Store> evalStore
}
} else {
for (auto & output : drv.outputs) {
output.second = DerivationOutput::InputAddressed {
.path = StorePath::dummy,
};
output.second = DerivationOutput::Deferred { };
drv.env[output.first] = "";
}
auto h0 = hashDerivationModulo(*evalStore, drv, true);