From f1cf3ab870343a6894c08e2bb893ea69badfc397 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 19 Mar 2020 00:37:57 -0400 Subject: [PATCH 1/4] hashDerivationModulo: Generalize for multiple fixed ouputs per drv See documentattion in header and comments in implementation for details. This is actually done in preparation for floating ca derivations, not multi-output fixed ca derivations, but the distinction doesn't yet mattter. Thanks @cole-h for finding and fixing a bunch of typos. --- src/libexpr/primops.cc | 4 +- src/libstore/derivations.cc | 94 +++++++++++++++++++++++++------------ src/libstore/derivations.hh | 33 ++++++++++++- src/libstore/local-store.cc | 4 +- 4 files changed, 100 insertions(+), 35 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 8de234951..c9a16784e 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -742,7 +742,9 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * * DerivationOutput(StorePath::dummy.clone(), "", "")); } - Hash h = hashDerivationModulo(*state.store, Derivation(drv), true); + // Regular, non-CA derivation should always return a single hash and not + // hash per output. + Hash h = std::get<0>(hashDerivationModulo(*state.store, Derivation(drv), true)); for (auto & i : outputs) { auto outPath = state.store->makeOutputPath(i, h, drvName); diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 205b90e55..13f2b4770 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -338,49 +338,81 @@ bool BasicDerivation::isFixedOutput() const DrvHashes drvHashes; +/* pathDerivationModulo and hashDerivationModulo are mutually recursive + */ -/* Returns the hash of a derivation modulo fixed-output - subderivations. A fixed-output derivation is a derivation with one - output (`out') for which an expected hash and hash algorithm are - specified (using the `outputHash' and `outputHashAlgo' - attributes). We don't want changes to such derivations to - propagate upwards through the dependency graph, changing output - paths everywhere. +/* Look up the derivation by value and memoize the + `hashDerivationModulo` call. + */ +static DrvHashModulo & pathDerivationModulo(Store & store, const StorePath & drvPath) +{ + auto h = drvHashes.find(drvPath); + if (h == drvHashes.end()) { + assert(store.isValidPath(drvPath)); + // Cache it + h = drvHashes.insert_or_assign( + drvPath.clone(), + hashDerivationModulo( + store, + readDerivation( + store, + store.toRealPath(store.printStorePath(drvPath))), + false)).first; + } + return h->second; +} - For instance, if we change the url in a call to the `fetchurl' - function, we do not want to rebuild everything depending on it - (after all, (the hash of) the file being downloaded is unchanged). - So the *output paths* should not change. On the other hand, the - *derivation paths* should change to reflect the new dependency - graph. +/* See the header for interface details. These are the implementation details. - That's what this function does: it returns a hash which is just the - hash of the derivation ATerm, except that any input derivation - paths have been replaced by the result of a recursive call to this - function, and that for fixed-output derivations we return a hash of - its output path. */ -Hash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs) + For fixed ouput derivations, each hash in the map is not the + corresponding output's content hash, but a hash of that hash along + with other constant data. The key point is that the value is a pure + function of the output's contents, and there are no preimage attacks + spoofing an either an output's contents for a derivation, or + derivation for an output's contents. + + For regular derivations, it looks up each subderivation from its hash + and recurs. If the subderivation is also regular, it simply + substitutes the derivation path with its hash. If the subderivation + is fixed-output, however, it takes each output hash and pretends it + is a derivation hash producing a single "out" output. This is so we + don't leak the provenance of fixed outputs, reducing pointless cache + misses as the build itself won't know this. + */ +DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs) { /* Return a fixed hash for fixed-output derivations. */ if (drv.isFixedOutput()) { - DerivationOutputs::const_iterator i = drv.outputs.begin(); - return hashString(htSHA256, "fixed:out:" - + i->second.hashAlgo + ":" - + i->second.hash + ":" - + store.printStorePath(i->second.path)); + std::map outputHashes; + for (const auto & i : drv.outputs) { + const Hash h = hashString(htSHA256, "fixed:out:" + + i.second.hashAlgo + ":" + + i.second.hash + ":" + + store.printStorePath(i.second.path)); + outputHashes.insert_or_assign(std::string(i.first), std::move(h)); + } + return outputHashes; } /* For other derivations, replace the inputs paths with recursive - calls to this function.*/ + calls to this function. */ std::map inputs2; for (auto & i : drv.inputDrvs) { - auto h = drvHashes.find(i.first); - if (h == drvHashes.end()) { - assert(store.isValidPath(i.first)); - h = drvHashes.insert_or_assign(i.first.clone(), hashDerivationModulo(store, - readDerivation(store, store.toRealPath(store.printStorePath(i.first))), false)).first; + const auto res = pathDerivationModulo(store, i.first); + if (const Hash *pval = std::get_if<0>(&res)) { + // regular non-CA derivation, replace derivation + inputs2.insert_or_assign(pval->to_string(Base16, false), i.second); + } else if (const std::map *pval = std::get_if<1>(&res)) { + // CA derivation's output hashes + std::set justOut = { std::string("out") }; + for (auto & output : i.second) { + /* Put each one in with a single "out" output.. */ + const auto h = pval->at(output); + inputs2.insert_or_assign( + h.to_string(Base16, false), + justOut); + } } - inputs2.insert_or_assign(h->second.to_string(Base16, false), i.second); } return hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2)); diff --git a/src/libstore/derivations.hh b/src/libstore/derivations.hh index c2df66229..c021bf907 100644 --- a/src/libstore/derivations.hh +++ b/src/libstore/derivations.hh @@ -5,6 +5,7 @@ #include "store-api.hh" #include +#include namespace nix { @@ -87,10 +88,38 @@ Derivation readDerivation(const Store & store, const Path & drvPath); // FIXME: remove bool isDerivation(const string & fileName); -Hash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs); +typedef std::variant< + Hash, // regular DRV normalized hash + std::map // known CA drv's output hashes +> DrvHashModulo; + +/* Returns hashes with the details of fixed-output subderivations + expunged. + + A fixed-output derivation is a derivation whose outputs have a + specified content hash and hash algorithm. (Currently they must have + exactly one output (`out'), which is specified using the `outputHash' + and `outputHashAlgo' attributes, but the algorithm doesn't assume + this). We don't want changes to such derivations to propagate upwards + through the dependency graph, changing output paths everywhere. + + For instance, if we change the url in a call to the `fetchurl' + function, we do not want to rebuild everything depending on it (after + all, (the hash of) the file being downloaded is unchanged). So the + *output paths* should not change. On the other hand, the *derivation + paths* should change to reflect the new dependency graph. + + For fixed output derivations, this returns a map from the names of + each output to hashes unique up to the outputs' contents. + + For regular derivations, it returns a single hash of the derivation + ATerm, after subderivations have been likewise expunged from that + derivation. + */ +DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs); /* Memoisation of hashDerivationModulo(). */ -typedef std::map DrvHashes; +typedef std::map DrvHashes; extern DrvHashes drvHashes; // FIXME: global, not thread-safe diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index cd2e86f29..8639cbf20 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -571,7 +571,9 @@ void LocalStore::checkDerivationOutputs(const StorePath & drvPath, const Derivat } else { - Hash h = hashDerivationModulo(*this, drv, true); + // Regular, non-CA derivation should always return a single hash and not + // hash per output. + Hash h = std::get<0>(hashDerivationModulo(*this, drv, true)); for (auto & i : drv.outputs) check(makeOutputPath(i.first, h, drvName), i.second.path, i.first); } From d5b3328dd1e3de8910b237d54fb9dbf36629870f Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 19 Mar 2020 23:37:52 -0400 Subject: [PATCH 2/4] Apply suggestions from code review Co-Authored-By: Cole Helbling --- src/libstore/derivations.cc | 6 +++--- src/libstore/derivations.hh | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 13f2b4770..4c51bdef3 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -364,12 +364,12 @@ static DrvHashModulo & pathDerivationModulo(Store & store, const StorePath & drv /* See the header for interface details. These are the implementation details. - For fixed ouput derivations, each hash in the map is not the + For fixed-output derivations, each hash in the map is not the corresponding output's content hash, but a hash of that hash along with other constant data. The key point is that the value is a pure function of the output's contents, and there are no preimage attacks - spoofing an either an output's contents for a derivation, or - derivation for an output's contents. + either spoofing an output's contents for a derivation, or + spoofing a derivation for an output's contents. For regular derivations, it looks up each subderivation from its hash and recurs. If the subderivation is also regular, it simply diff --git a/src/libstore/derivations.hh b/src/libstore/derivations.hh index c021bf907..9f8b7a23e 100644 --- a/src/libstore/derivations.hh +++ b/src/libstore/derivations.hh @@ -100,14 +100,14 @@ typedef std::variant< specified content hash and hash algorithm. (Currently they must have exactly one output (`out'), which is specified using the `outputHash' and `outputHashAlgo' attributes, but the algorithm doesn't assume - this). We don't want changes to such derivations to propagate upwards + this.) We don't want changes to such derivations to propagate upwards through the dependency graph, changing output paths everywhere. For instance, if we change the url in a call to the `fetchurl' - function, we do not want to rebuild everything depending on it (after - all, (the hash of) the file being downloaded is unchanged). So the - *output paths* should not change. On the other hand, the *derivation - paths* should change to reflect the new dependency graph. + function, we do not want to rebuild everything depending on it---after + all, (the hash of) the file being downloaded is unchanged. So the + *output paths* should not change. On the other hand, the *derivation + paths* should change to reflect the new dependency graph. For fixed output derivations, this returns a map from the names of each output to hashes unique up to the outputs' contents. From e3173242362c8421429633c0e9f64ab0211760bd Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 19 Mar 2020 23:38:51 -0400 Subject: [PATCH 3/4] Apply suggestions from code review --- src/libstore/derivations.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libstore/derivations.hh b/src/libstore/derivations.hh index 9f8b7a23e..5e708642e 100644 --- a/src/libstore/derivations.hh +++ b/src/libstore/derivations.hh @@ -109,8 +109,8 @@ typedef std::variant< *output paths* should not change. On the other hand, the *derivation paths* should change to reflect the new dependency graph. - For fixed output derivations, this returns a map from the names of - each output to hashes unique up to the outputs' contents. + For fixed-output derivations, this returns a map from the name of + each output to its hash, unique up to the output's contents. For regular derivations, it returns a single hash of the derivation ATerm, after subderivations have been likewise expunged from that From bf9f040112c33213910faef40077122f1932d462 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 21 Jun 2020 16:51:39 +0000 Subject: [PATCH 4/4] Tweak declaration I think this is clearer --- src/libstore/derivations.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 1f64086d7..ea57be0aa 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -404,7 +404,7 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m }, // CA derivation's output hashes [&](CaOutputHashes outputHashes) { - std::set justOut = { std::string("out") }; + std::set justOut = { "out" }; for (auto & output : i.second) { /* Put each one in with a single "out" output.. */ const auto h = outputHashes.at(output);