From bab1cda0e6c30e25460b5a9c809589d3948f35df Mon Sep 17 00:00:00 2001 From: regnat Date: Wed, 9 Dec 2020 16:56:56 +0100 Subject: [PATCH] Use the hash modulo in the derivation outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than storing the derivation outputs as `drvPath!outputName` internally, store them as `drvHashModulo!outputName` (or `outputHash!outputName` for fixed-output derivations). This makes the storage slightly more opaque, but enables an earlier cutoff in cases where a fixed-output dependency changes (but keeps the same output hash) − same as what we already do for input-addressed derivations. --- src/libexpr/primops.cc | 2 +- src/libstore/build/derivation-goal.cc | 28 ++++-------- src/libstore/derivations.cc | 49 +++++++++++++++------ src/libstore/derivations.hh | 22 ++++------ src/libstore/local-store.cc | 61 ++++++++++++--------------- src/libstore/realisation.cc | 10 ++--- src/libstore/realisation.hh | 10 +++-- 7 files changed, 93 insertions(+), 89 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 41f06c219..d059e3daf 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1107,7 +1107,7 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * * // Shouldn't happen as the toplevel derivation is not CA. assert(false); }, - [&](UnknownHashes) { + [&](DeferredHash _) { for (auto & i : outputs) { drv.outputs.insert_or_assign(i, DerivationOutput { diff --git a/src/libstore/build/derivation-goal.cc b/src/libstore/build/derivation-goal.cc index b7bf866eb..54b37553a 100644 --- a/src/libstore/build/derivation-goal.cc +++ b/src/libstore/build/derivation-goal.cc @@ -504,9 +504,6 @@ void DerivationGoal::inputsRealised() Derivation drvResolved { *std::move(attempt) }; auto pathResolved = writeDerivation(worker.store, drvResolved); - /* Add to memotable to speed up downstream goal's queries with the - original derivation. */ - drvPathResolutions.lock()->insert_or_assign(drvPath, pathResolved); auto msg = fmt("Resolved derivation: '%s' -> '%s'", worker.store.printStorePath(drvPath), @@ -2097,15 +2094,15 @@ struct RestrictedStore : public LocalFSStore, public virtual RestrictedStoreConf void registerDrvOutput(const Realisation & info) override { - if (!goal.isAllowed(info.id.drvPath)) - throw InvalidPath("cannot register unknown drv output '%s' in recursive Nix", printStorePath(info.id.drvPath)); + // XXX: Should we check for something here? Probably, but I'm not sure + // how next->registerDrvOutput(info); } std::optional queryRealisation(const DrvOutput & id) override { - if (!goal.isAllowed(id.drvPath)) - throw InvalidPath("cannot query the output info for unknown derivation '%s' in recursive Nix", printStorePath(id.drvPath)); + // XXX: Should we check for something here? Probably, but I'm not sure + // how return next->queryRealisation(id); } @@ -3394,23 +3391,14 @@ void DerivationGoal::registerOutputs() means it's safe to link the derivation to the output hash. We must do that for floating CA derivations, which otherwise couldn't be cached, but it's fine to do in all cases. */ - bool isCaFloating = drv->type() == DerivationType::CAFloating; - auto drvPathResolved = drvPath; - if (!useDerivation && isCaFloating) { - /* Once a floating CA derivations reaches this point, it - must already be resolved, so we don't bother trying to - downcast drv to get would would just be an empty - inputDrvs field. */ - Derivation drv2 { *drv }; - drvPathResolved = writeDerivation(worker.store, drv2); - } - - if (settings.isExperimentalFeatureEnabled("ca-derivations")) + if (settings.isExperimentalFeatureEnabled("ca-derivations")) { + auto outputHashes = staticOutputHashes(worker.store, *drv); for (auto& [outputName, newInfo] : infos) worker.store.registerDrvOutput(Realisation{ - .id = DrvOutput{drvPathResolved, outputName}, + .id = DrvOutput{outputHashes.at(outputName), outputName}, .outPath = newInfo.path}); + } } diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 231ca26c2..5bcc7f012 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -496,10 +496,9 @@ static const DrvHashModulo pathDerivationModulo(Store & store, const StorePath & */ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs) { + bool isDeferred = false; /* Return a fixed hash for fixed-output derivations. */ switch (drv.type()) { - case DerivationType::CAFloating: - return UnknownHashes {}; case DerivationType::CAFixed: { std::map outputHashes; for (const auto & i : drv.outputs) { @@ -512,6 +511,9 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m } return outputHashes; } + case DerivationType::CAFloating: + isDeferred = true; + break; case DerivationType::InputAddressed: break; case DerivationType::DeferredInputAddressed: @@ -522,13 +524,16 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m calls to this function. */ std::map inputs2; for (auto & i : drv.inputDrvs) { - bool hasUnknownHash = false; const auto & res = pathDerivationModulo(store, i.first); std::visit(overloaded { // Regular non-CA derivation, replace derivation [&](Hash drvHash) { inputs2.insert_or_assign(drvHash.to_string(Base16, false), i.second); }, + [&](DeferredHash deferredHash) { + isDeferred = true; + inputs2.insert_or_assign(deferredHash.hash.to_string(Base16, false), i.second); + }, // CA derivation's output hashes [&](CaOutputHashes outputHashes) { std::set justOut = { "out" }; @@ -540,16 +545,37 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m justOut); } }, - [&](UnknownHashes) { - hasUnknownHash = true; - }, }, res); - if (hasUnknownHash) { - return UnknownHashes {}; - } } - return hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2)); + auto hash = hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2)); + + if (isDeferred) + return DeferredHash { hash }; + else + return hash; +} + + +std::map staticOutputHashes(Store& store, const Derivation& drv) +{ + std::map res; + std::visit(overloaded { + [&](Hash drvHash) { + for (auto & outputName : drv.outputNames()) { + res.insert({outputName, drvHash}); + } + }, + [&](DeferredHash deferredHash) { + for (auto & outputName : drv.outputNames()) { + res.insert({outputName, deferredHash.hash}); + } + }, + [&](CaOutputHashes outputHashes) { + res = outputHashes; + }, + }, hashDerivationModulo(store, drv, true)); + return res; } @@ -719,9 +745,6 @@ static void rewriteDerivation(Store & store, BasicDerivation & drv, const String } - -Sync drvPathResolutions; - std::optional Derivation::tryResolve(Store & store) { BasicDerivation resolved { *this }; diff --git a/src/libstore/derivations.hh b/src/libstore/derivations.hh index b966d6d90..4e5985fab 100644 --- a/src/libstore/derivations.hh +++ b/src/libstore/derivations.hh @@ -18,8 +18,6 @@ namespace nix { /* The traditional non-fixed-output derivation type. */ struct DerivationOutputInputAddressed { - /* Will need to become `std::optional` once input-addressed - derivations are allowed to depend on cont-addressed derivations */ StorePath path; }; @@ -174,12 +172,12 @@ std::string outputPathName(std::string_view drvName, std::string_view outputName // whose output hashes are always known since they are fixed up-front. typedef std::map CaOutputHashes; -struct UnknownHashes {}; +struct DeferredHash { Hash hash; }; typedef std::variant< Hash, // regular DRV normalized hash CaOutputHashes, // Fixed-output derivation hashes - UnknownHashes // Deferred hashes for floating outputs drvs and their dependencies + DeferredHash // Deferred hashes for floating outputs drvs and their dependencies > DrvHashModulo; /* Returns hashes with the details of fixed-output subderivations @@ -207,22 +205,18 @@ typedef std::variant< */ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs); +/* + Return a map associating each output to a hash that uniquely identifies its + derivation (modulo the self-references). + */ +std::map staticOutputHashes(Store& store, const Derivation& drv); + /* Memoisation of hashDerivationModulo(). */ typedef std::map DrvHashes; // FIXME: global, though at least thread-safe. extern Sync drvHashes; -/* Memoisation of `readDerivation(..).resove()`. */ -typedef std::map< - StorePath, - std::optional -> DrvPathResolutions; - -// FIXME: global, though at least thread-safe. -// FIXME: arguably overlaps with hashDerivationModulo memo table. -extern Sync drvPathResolutions; - bool wantOutput(const string & output, const std::set & wanted); struct Source; diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 69ab821d9..1539c94e2 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -659,7 +659,7 @@ void LocalStore::registerDrvOutput(const Realisation & info) auto state(_state.lock()); retrySQLite([&]() { state->stmts->RegisterRealisedOutput.use() - (info.id.drvPath.to_string()) + (info.id.strHash()) (info.id.outputName) (printStorePath(info.outPath)) .exec(); @@ -879,17 +879,18 @@ StorePathSet LocalStore::queryValidDerivers(const StorePath & path) // Try to resolve the derivation at path `original`, with a caching layer // to make it more efficient -std::optional cachedResolve( - LocalStore & store, - const StorePath & original) +std::optional cachedResolve( + LocalStore& store, + const StorePath& original) { + // This is quite dirty and leaky, but will disappear once #4340 is merged + static Sync>> resolutionsCache; { - auto resolutions = drvPathResolutions.lock(); - auto resolvedPathOptIter = resolutions->find(original); - if (resolvedPathOptIter != resolutions->end()) { - auto & [_, resolvedPathOpt] = *resolvedPathOptIter; - if (resolvedPathOpt) - return resolvedPathOpt; + auto resolutions = resolutionsCache.lock(); + auto resolvedDrvIter = resolutions->find(original); + if (resolvedDrvIter != resolutions->end()) { + auto & [_, resolvedDrv] = *resolvedDrvIter; + return *resolvedDrv; } } @@ -898,12 +899,9 @@ std::optional cachedResolve( auto attempt = drv.tryResolve(store); if (!attempt) return std::nullopt; - /* Just compute store path */ - auto pathResolved = - writeDerivation(store, *std::move(attempt), NoRepair, true); /* Store in memo table. */ - drvPathResolutions.lock()->insert_or_assign(original, pathResolved); - return pathResolved; + resolutionsCache.lock()->insert_or_assign(original, *attempt); + return *attempt; } std::map> @@ -933,26 +931,24 @@ LocalStore::queryPartialDerivationOutputMap(const StorePath& path_) auto drv = readDerivation(path); - for (auto & output : drv.outputsAndOptPaths(*this)) { - outputs.emplace(output.first, std::nullopt); - } - auto resolvedDrv = cachedResolve(*this, path); - if (!resolvedDrv) + if (!resolvedDrv) { + for (auto& [outputName, _] : drv.outputsAndOptPaths(*this)) { + if (!outputs.count(outputName)) + outputs.emplace(outputName, std::nullopt); + } return outputs; + } - retrySQLite([&]() { - auto state(_state.lock()); - path = *resolvedDrv; - auto useQueryDerivationOutputs{ - state->stmts->QueryAllRealisedOutputs.use()(path.to_string())}; - - while (useQueryDerivationOutputs.next()) - outputs.insert_or_assign( - useQueryDerivationOutputs.getStr(0), - parseStorePath(useQueryDerivationOutputs.getStr(1))); - }); + auto resolvedDrvHashes = staticOutputHashes(*this, *resolvedDrv); + for (auto& [outputName, hash] : resolvedDrvHashes) { + auto realisation = queryRealisation(DrvOutput{hash, outputName}); + if (realisation) + outputs.insert_or_assign(outputName, realisation->outPath); + else + outputs.insert_or_assign(outputName, std::nullopt); + } return outputs; } @@ -1695,12 +1691,11 @@ std::optional LocalStore::queryRealisation( typedef std::optional Ret; return retrySQLite([&]() -> Ret { auto state(_state.lock()); - auto use(state->stmts->QueryRealisedOutput.use()(id.drvPath.to_string())( + auto use(state->stmts->QueryRealisedOutput.use()(id.strHash())( id.outputName)); if (!use.next()) return std::nullopt; auto outputPath = parseStorePath(use.getStr(0)); - auto resolvedDrv = StorePath(use.getStr(1)); return Ret{ Realisation{.id = id, .outPath = outputPath}}; }); diff --git a/src/libstore/realisation.cc b/src/libstore/realisation.cc index 47db1ec9f..47ad90eee 100644 --- a/src/libstore/realisation.cc +++ b/src/libstore/realisation.cc @@ -7,18 +7,18 @@ namespace nix { MakeError(InvalidDerivationOutputId, Error); DrvOutput DrvOutput::parse(const std::string &strRep) { - const auto &[rawPath, outputs] = parsePathWithOutputs(strRep); - if (outputs.size() != 1) + size_t n = strRep.find("!"); + if (n == strRep.npos) throw InvalidDerivationOutputId("Invalid derivation output id %s", strRep); return DrvOutput{ - .drvPath = StorePath(rawPath), - .outputName = *outputs.begin(), + .drvHash = Hash::parseAnyPrefixed(strRep.substr(0, n)), + .outputName = strRep.substr(n+1), }; } std::string DrvOutput::to_string() const { - return std::string(drvPath.to_string()) + "!" + outputName; + return strHash() + "!" + outputName; } nlohmann::json Realisation::toJSON() const { diff --git a/src/libstore/realisation.hh b/src/libstore/realisation.hh index 08579b739..4b8ead3c5 100644 --- a/src/libstore/realisation.hh +++ b/src/libstore/realisation.hh @@ -6,11 +6,15 @@ namespace nix { struct DrvOutput { - StorePath drvPath; + // The hash modulo of the derivation + Hash drvHash; std::string outputName; std::string to_string() const; + std::string strHash() const + { return drvHash.to_string(Base16, true); } + static DrvOutput parse(const std::string &); bool operator<(const DrvOutput& other) const { return to_pair() < other.to_pair(); } @@ -18,8 +22,8 @@ struct DrvOutput { private: // Just to make comparison operators easier to write - std::pair to_pair() const - { return std::make_pair(drvPath, outputName); } + std::pair to_pair() const + { return std::make_pair(drvHash, outputName); } }; struct Realisation {