From a544ed7684bdf5fa3c0b78d40913f5be3f73f5a7 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Fri, 18 Mar 2022 00:36:52 +0000 Subject: [PATCH] Generalize `DerivationType` in preparation for impure derivations --- src/build-remote/build-remote.cc | 2 +- src/libexpr/primops.cc | 7 +- src/libstore/build/derivation-goal.cc | 35 ++++-- src/libstore/build/local-derivation-goal.cc | 14 +-- src/libstore/daemon.cc | 8 +- src/libstore/derivations.cc | 112 +++++++++++--------- src/libstore/derivations.hh | 62 +++++++---- src/libstore/local-store.cc | 1 + src/libstore/parsed-derivations.cc | 2 +- 9 files changed, 148 insertions(+), 95 deletions(-) diff --git a/src/build-remote/build-remote.cc b/src/build-remote/build-remote.cc index 8c9133c17..ff8ba2724 100644 --- a/src/build-remote/build-remote.cc +++ b/src/build-remote/build-remote.cc @@ -300,7 +300,7 @@ connected: std::set missingRealisations; StorePathSet missingPaths; - if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && !derivationHasKnownOutputPaths(drv.type())) { + if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && !drv.type().hasKnownOutputPaths()) { for (auto & outputName : wantedOutputs) { auto thisOutputHash = outputHashes.at(outputName); auto thisOutputId = DrvOutput{ thisOutputHash, outputName }; diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 2dc33103d..bfa18f898 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1235,11 +1235,8 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * * /* Optimisation, but required in read-only mode! because in that case we don't actually write store derivations, so we can't - read them later. - - However, we don't bother doing this for floating CA derivations because - their "hash modulo" is indeterminate until built. */ - if (drv.type() != DerivationType::CAFloating) { + read them later. */ + { auto h = hashDerivationModulo(*state.store, drv, false); drvHashes.lock()->insert_or_assign(drvPath, h); } diff --git a/src/libstore/build/derivation-goal.cc b/src/libstore/build/derivation-goal.cc index afed9bf16..40c445836 100644 --- a/src/libstore/build/derivation-goal.cc +++ b/src/libstore/build/derivation-goal.cc @@ -204,7 +204,7 @@ void DerivationGoal::haveDerivation() { trace("have derivation"); - if (drv->type() == DerivationType::CAFloating) + if (!drv->type().hasKnownOutputPaths()) settings.requireExperimentalFeature(Xp::CaDerivations); retrySubstitution = false; @@ -440,9 +440,28 @@ void DerivationGoal::inputsRealised() if (useDerivation) { auto & fullDrv = *dynamic_cast(drv.get()); - if (settings.isExperimentalFeatureEnabled(Xp::CaDerivations) && - ((!fullDrv.inputDrvs.empty() && derivationIsCA(fullDrv.type())) - || fullDrv.type() == DerivationType::DeferredInputAddressed)) { + auto drvType = fullDrv.type(); + bool resolveDrv = std::visit(overloaded { + [&](const DerivationType::InputAddressed & ia) { + /* must resolve if deferred. */ + return ia.deferred; + }, + [&](const DerivationType::ContentAddressed & ca) { + return !fullDrv.inputDrvs.empty() && ( + ca.fixed + /* Can optionally resolve if fixed, which is good + for avoiding unnecessary rebuilds. */ + ? settings.isExperimentalFeatureEnabled(Xp::CaDerivations) + /* Must resolve if floating and there are any inputs + drvs. */ + : true); + }, + }, drvType.raw()); + + if (resolveDrv) + { + settings.requireExperimentalFeature(Xp::CaDerivations); + /* We are be able to resolve this derivation based on the now-known results of dependencies. If so, we become a stub goal aliasing that resolved derivation goal */ @@ -501,7 +520,7 @@ void DerivationGoal::inputsRealised() /* Don't repeat fixed-output derivations since they're already verified by their output hash.*/ - nrRounds = derivationIsFixed(derivationType) ? 1 : settings.buildRepeat + 1; + nrRounds = derivationType.isFixed() ? 1 : settings.buildRepeat + 1; /* Okay, try to build. Note that here we don't wait for a build slot to become available, since we don't need one if there is a @@ -908,7 +927,7 @@ void DerivationGoal::buildDone() st = dynamic_cast(&e) ? BuildResult::NotDeterministic : statusOk(status) ? BuildResult::OutputRejected : - derivationIsImpure(derivationType) || diskFull ? BuildResult::TransientFailure : + derivationType.isImpure() || diskFull ? BuildResult::TransientFailure : BuildResult::PermanentFailure; } @@ -1221,7 +1240,7 @@ void DerivationGoal::flushLine() std::map> DerivationGoal::queryPartialDerivationOutputMap() { - if (!useDerivation || drv->type() != DerivationType::CAFloating) { + if (!useDerivation || drv->type().hasKnownOutputPaths()) { std::map> res; for (auto & [name, output] : drv->outputs) res.insert_or_assign(name, output.path(worker.store, drv->name, name)); @@ -1233,7 +1252,7 @@ std::map> DerivationGoal::queryPartialDeri OutputPathMap DerivationGoal::queryDerivationOutputMap() { - if (!useDerivation || drv->type() != DerivationType::CAFloating) { + if (!useDerivation || drv->type().hasKnownOutputPaths()) { OutputPathMap res; for (auto & [name, output] : drv->outputsAndOptPaths(worker.store)) res.insert_or_assign(name, *output.second); diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 8a301d4ac..75e7e6ca3 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -395,7 +395,7 @@ void LocalDerivationGoal::startBuilder() else if (settings.sandboxMode == smDisabled) useChroot = false; else if (settings.sandboxMode == smRelaxed) - useChroot = !(derivationIsImpure(derivationType)) && !noChroot; + useChroot = !(derivationType.isImpure()) && !noChroot; } auto & localStore = getLocalStore(); @@ -608,7 +608,7 @@ void LocalDerivationGoal::startBuilder() "nogroup:x:65534:\n", sandboxGid())); /* Create /etc/hosts with localhost entry. */ - if (!(derivationIsImpure(derivationType))) + if (!(derivationType.isImpure())) writeFile(chrootRootDir + "/etc/hosts", "127.0.0.1 localhost\n::1 localhost\n"); /* Make the closure of the inputs available in the chroot, @@ -796,7 +796,7 @@ void LocalDerivationGoal::startBuilder() us. */ - if (!(derivationIsImpure(derivationType))) + if (!(derivationType.isImpure())) privateNetwork = true; userNamespaceSync.create(); @@ -1049,7 +1049,7 @@ void LocalDerivationGoal::initEnv() derivation, tell the builder, so that for instance `fetchurl' can skip checking the output. On older Nixes, this environment variable won't be set, so `fetchurl' will do the check. */ - if (derivationIsFixed(derivationType)) env["NIX_OUTPUT_CHECKED"] = "1"; + if (derivationType.isFixed()) env["NIX_OUTPUT_CHECKED"] = "1"; /* *Only* if this is a fixed-output derivation, propagate the values of the environment variables specified in the @@ -1060,7 +1060,7 @@ void LocalDerivationGoal::initEnv() to the builder is generally impure, but the output of fixed-output derivations is by definition pure (since we already know the cryptographic hash of the output). */ - if (derivationIsImpure(derivationType)) { + if (derivationType.isImpure()) { for (auto & i : parsedDrv->getStringsAttr("impureEnvVars").value_or(Strings())) env[i] = getEnv(i).value_or(""); } @@ -1674,7 +1674,7 @@ void LocalDerivationGoal::runChild() /* Fixed-output derivations typically need to access the network, so give them access to /etc/resolv.conf and so on. */ - if (derivationIsImpure(derivationType)) { + if (derivationType.isImpure()) { // Only use nss functions to resolve hosts and // services. Don’t use it for anything else that may // be configured for this system. This limits the @@ -1918,7 +1918,7 @@ void LocalDerivationGoal::runChild() sandboxProfile += "(import \"sandbox-defaults.sb\")\n"; - if (derivationIsImpure(derivationType)) + if (derivationType.isImpure()) sandboxProfile += "(import \"sandbox-network.sb\")\n"; /* Add the output paths we'll use at build-time to the chroot */ diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index 9f21ecf36..de69b50ee 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -560,6 +560,8 @@ static void performOp(TunnelLogger * logger, ref store, BuildMode buildMode = (BuildMode) readInt(from); logger->startWork(); + auto drvType = drv.type(); + /* Content-addressed derivations are trustless because their output paths are verified by their content alone, so any derivation is free to try to produce such a path. @@ -592,12 +594,12 @@ static void performOp(TunnelLogger * logger, ref store, derivations, we throw out the precomputed output paths and just store the hashes, so there aren't two competing sources of truth an attacker could exploit. */ - if (drv.type() == DerivationType::InputAddressed && !trusted) + if (!(drvType.isCA() || trusted)) throw Error("you are not privileged to build input-addressed derivations"); /* Make sure that the non-input-addressed derivations that got this far are in fact content-addressed if we don't trust them. */ - assert(derivationIsCA(drv.type()) || trusted); + assert(drvType.isCA() || trusted); /* Recompute the derivation path when we cannot trust the original. */ if (!trusted) { @@ -606,7 +608,7 @@ static void performOp(TunnelLogger * logger, ref store, original not-necessarily-resolved derivation to verify the drv derivation as adequate claim to the input-addressed output paths. */ - assert(derivationIsCA(drv.type())); + assert(drvType.isCA()); Derivation drv2; static_cast(drv2) = drv; diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 3dd3a78d8..7fed80387 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -36,47 +36,46 @@ StorePath DerivationOutput::CAFixed::path(const Store & store, std::string_view } -bool derivationIsCA(DerivationType dt) { - switch (dt) { - case DerivationType::InputAddressed: return false; - case DerivationType::CAFixed: return true; - case DerivationType::CAFloating: return true; - case DerivationType::DeferredInputAddressed: return false; - }; - // Since enums can have non-variant values, but making a `default:` would - // disable exhaustiveness warnings. - assert(false); +bool DerivationType::isCA() const { + /* Normally we do the full `std::visit` to make sure we have + exhaustively handled all variants, but so long as there is a + variant called `ContentAddressed`, it must be the only one for + which `isCA` is true for this to make sense!. */ + return std::holds_alternative(raw()); } -bool derivationIsFixed(DerivationType dt) { - switch (dt) { - case DerivationType::InputAddressed: return false; - case DerivationType::CAFixed: return true; - case DerivationType::CAFloating: return false; - case DerivationType::DeferredInputAddressed: return false; - }; - assert(false); +bool DerivationType::isFixed() const { + return std::visit(overloaded { + [](const InputAddressed & ia) { + return false; + }, + [](const ContentAddressed & ca) { + return ca.fixed; + }, + }, raw()); } -bool derivationHasKnownOutputPaths(DerivationType dt) { - switch (dt) { - case DerivationType::InputAddressed: return true; - case DerivationType::CAFixed: return true; - case DerivationType::CAFloating: return false; - case DerivationType::DeferredInputAddressed: return false; - }; - assert(false); +bool DerivationType::hasKnownOutputPaths() const { + return std::visit(overloaded { + [](const InputAddressed & ia) { + return !ia.deferred; + }, + [](const ContentAddressed & ca) { + return ca.fixed; + }, + }, raw()); } -bool derivationIsImpure(DerivationType dt) { - switch (dt) { - case DerivationType::InputAddressed: return false; - case DerivationType::CAFixed: return true; - case DerivationType::CAFloating: return false; - case DerivationType::DeferredInputAddressed: return false; - }; - assert(false); +bool DerivationType::isImpure() const { + return std::visit(overloaded { + [](const InputAddressed & ia) { + return false; + }, + [](const ContentAddressed & ca) { + return !ca.pure; + }, + }, raw()); } @@ -439,18 +438,28 @@ DerivationType BasicDerivation::type() const if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) { throw Error("Must have at least one output"); } else if (! inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) { - return DerivationType::InputAddressed; + return DerivationType::InputAddressed { + .deferred = false, + }; } else if (inputAddressedOutputs.empty() && ! fixedCAOutputs.empty() && floatingCAOutputs.empty() && deferredIAOutputs.empty()) { if (fixedCAOutputs.size() > 1) // FIXME: Experimental feature? throw Error("Only one fixed output is allowed for now"); if (*fixedCAOutputs.begin() != "out") throw Error("Single fixed output must be named \"out\""); - return DerivationType::CAFixed; + return DerivationType::ContentAddressed { + .pure = false, + .fixed = true, + }; } else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && ! floatingCAOutputs.empty() && deferredIAOutputs.empty()) { - return DerivationType::CAFloating; + return DerivationType::ContentAddressed { + .pure = true, + .fixed = false, + }; } else if (inputAddressedOutputs.empty() && fixedCAOutputs.empty() && floatingCAOutputs.empty() && !deferredIAOutputs.empty()) { - return DerivationType::DeferredInputAddressed; + return DerivationType::InputAddressed { + .deferred = true, + }; } else { throw Error("Can't mix derivation output types"); } @@ -502,10 +511,10 @@ static const DrvHashModulo pathDerivationModulo(Store & store, const StorePath & */ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs) { - auto kind = DrvHash::Kind::Regular; + auto type = drv.type(); + /* Return a fixed hash for fixed-output derivations. */ - switch (drv.type()) { - case DerivationType::CAFixed: { + if (type.isFixed()) { std::map outputHashes; for (const auto & i : drv.outputs) { auto & dof = std::get(i.second.raw()); @@ -517,14 +526,19 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m } return outputHashes; } - case DerivationType::CAFloating: - kind = DrvHash::Kind::Deferred; - break; - case DerivationType::InputAddressed: - break; - case DerivationType::DeferredInputAddressed: - break; - } + + auto kind = std::visit(overloaded { + [](const DerivationType::InputAddressed & ia) { + /* This might be a "pesimistically" deferred output, so we don't + "taint" the kind yet. */ + return DrvHash::Kind::Regular; + }, + [](const DerivationType::ContentAddressed & ca) { + return ca.fixed + ? DrvHash::Kind::Regular + : DrvHash::Kind::Deferred; + }, + }, drv.type().raw()); /* For other derivations, replace the inputs paths with recursive calls to this function. */ diff --git a/src/libstore/derivations.hh b/src/libstore/derivations.hh index 6b92b2f21..8dea90abf 100644 --- a/src/libstore/derivations.hh +++ b/src/libstore/derivations.hh @@ -85,30 +85,50 @@ typedef std::map DerivationInputs; -enum struct DerivationType : uint8_t { - InputAddressed, - DeferredInputAddressed, - CAFixed, - CAFloating, +struct DerivationType_InputAddressed { + bool deferred; }; -/* Do the outputs of the derivation have paths calculated from their content, - or from the derivation itself? */ -bool derivationIsCA(DerivationType); +struct DerivationType_ContentAddressed { + bool pure; + bool fixed; +}; -/* Is the content of the outputs fixed a-priori via a hash? Never true for - non-CA derivations. */ -bool derivationIsFixed(DerivationType); +typedef std::variant< + DerivationType_InputAddressed, + DerivationType_ContentAddressed +> _DerivationTypeRaw; -/* Is the derivation impure and needs to access non-deterministic resources, or - pure and can be sandboxed? Note that whether or not we actually sandbox the - derivation is controlled separately. Never true for non-CA derivations. */ -bool derivationIsImpure(DerivationType); +struct DerivationType : _DerivationTypeRaw { + using Raw = _DerivationTypeRaw; + using Raw::Raw; + using InputAddressed = DerivationType_InputAddressed; + using ContentAddressed = DerivationType_ContentAddressed; -/* Does the derivation knows its own output paths? - * Only true when there's no floating-ca derivation involved in the closure. - */ -bool derivationHasKnownOutputPaths(DerivationType); + + /* Do the outputs of the derivation have paths calculated from their content, + or from the derivation itself? */ + bool isCA() const; + + /* Is the content of the outputs fixed a-priori via a hash? Never true for + non-CA derivations. */ + bool isFixed() const; + + /* Is the derivation impure and needs to access non-deterministic resources, or + pure and can be sandboxed? Note that whether or not we actually sandbox the + derivation is controlled separately. Never true for non-CA derivations. */ + bool isImpure() const; + + /* Does the derivation knows its own output paths? + Only true when there's no floating-ca derivation involved in the + closure, or if fixed output. + */ + bool hasKnownOutputPaths() const; + + inline const Raw & raw() const { + return static_cast(*this); + } +}; struct BasicDerivation { @@ -189,11 +209,11 @@ typedef std::map CaOutputHashes; struct DrvHash { Hash hash; - enum struct Kind { + enum struct Kind: bool { // Statically determined derivations. // This hash will be directly used to compute the output paths Regular, - // Floating-output derivations (and their dependencies). + // Floating-output derivations (and their reverse dependencies). Deferred, }; diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 75726a1ab..46a547db1 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -718,6 +718,7 @@ void LocalStore::checkDerivationOutputs(const StorePath & drvPath, const Derivat /* Nothing to check */ }, [&](const DerivationOutput::Deferred &) { + /* Nothing to check */ }, }, i.second.raw()); } diff --git a/src/libstore/parsed-derivations.cc b/src/libstore/parsed-derivations.cc index 8c65053e4..f2288a04e 100644 --- a/src/libstore/parsed-derivations.cc +++ b/src/libstore/parsed-derivations.cc @@ -93,7 +93,7 @@ StringSet ParsedDerivation::getRequiredSystemFeatures() const StringSet res; for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings())) res.insert(i); - if (!derivationHasKnownOutputPaths(drv.type())) + if (!drv.type().hasKnownOutputPaths()) res.insert("ca-derivations"); return res; }