Merge branch 'multi-output-hashDerivationModulo' of github.com:Ericson2314/nix into misc-ca

This commit is contained in:
Carlo Nucera 2020-07-17 10:28:33 -04:00
commit 5cb840541b
6 changed files with 1895 additions and 38 deletions

1787
local-store-temp.cc Normal file

File diff suppressed because it is too large Load diff

View file

@ -800,7 +800,9 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * *
}); });
} }
Hash h = hashDerivationModulo(*state.store, Derivation(drv), true); // Regular, non-CA derivation should always return a single hash and not
// hash per output.
Hash h = std::get<0>(hashDerivationModulo(*state.store, Derivation(drv), true));
for (auto & i : outputs) { for (auto & i : outputs) {
auto outPath = state.store->makeOutputPath(i, h, drvName); auto outPath = state.store->makeOutputPath(i, h, drvName);

View file

@ -369,50 +369,83 @@ bool BasicDerivation::isFixedOutput() const
DrvHashes drvHashes; DrvHashes drvHashes;
/* pathDerivationModulo and hashDerivationModulo are mutually recursive
*/
/* Returns the hash of a derivation modulo fixed-output /* Look up the derivation by value and memoize the
subderivations. A fixed-output derivation is a derivation with one `hashDerivationModulo` call.
output (`out') for which an expected hash and hash algorithm are */
specified (using the `outputHash' and `outputHashAlgo' static const DrvHashModulo & pathDerivationModulo(Store & store, const StorePath & drvPath)
attributes). We don't want changes to such derivations to {
propagate upwards through the dependency graph, changing output auto h = drvHashes.find(drvPath);
paths everywhere. if (h == drvHashes.end()) {
assert(store.isValidPath(drvPath));
// Cache it
h = drvHashes.insert_or_assign(
drvPath,
hashDerivationModulo(
store,
store.readDerivation(drvPath),
false)).first;
}
return h->second;
}
For instance, if we change the url in a call to the `fetchurl' /* See the header for interface details. These are the implementation details.
function, we do not want to rebuild everything depending on it
(after all, (the hash of) the file being downloaded is unchanged).
So the *output paths* should not change. On the other hand, the
*derivation paths* should change to reflect the new dependency
graph.
That's what this function does: it returns a hash which is just the For fixed-output derivations, each hash in the map is not the
hash of the derivation ATerm, except that any input derivation corresponding output's content hash, but a hash of that hash along
paths have been replaced by the result of a recursive call to this with other constant data. The key point is that the value is a pure
function, and that for fixed-output derivations we return a hash of function of the output's contents, and there are no preimage attacks
its output path. */ either spoofing an output's contents for a derivation, or
Hash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs) spoofing a derivation for an output's contents.
For regular derivations, it looks up each subderivation from its hash
and recurs. If the subderivation is also regular, it simply
substitutes the derivation path with its hash. If the subderivation
is fixed-output, however, it takes each output hash and pretends it
is a derivation hash producing a single "out" output. This is so we
don't leak the provenance of fixed outputs, reducing pointless cache
misses as the build itself won't know this.
*/
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
{ {
/* Return a fixed hash for fixed-output derivations. */ /* Return a fixed hash for fixed-output derivations. */
if (drv.isFixedOutput()) { if (drv.isFixedOutput()) {
DerivationOutputs::const_iterator i = drv.outputs.begin(); std::map<std::string, Hash> outputHashes;
auto hash = std::get<DerivationOutputFixed>(i->second.output); for (const auto & i : drv.outputs) {
return hashString(htSHA256, "fixed:out:" auto & dof = std::get<DerivationOutputFixed>(i.second.output);
+ hash.hash.printMethodAlgo() + ":" auto hash = hashString(htSHA256, "fixed:out:"
+ hash.hash.hash.to_string(Base16, false) + ":" + dof.hash.printMethodAlgo() + ":"
+ store.printStorePath(i->second.path(store, drv.name))); + dof.hash.hash.to_string(Base16, false) + ":"
+ store.printStorePath(i.second.path(store, drv.name)));
outputHashes.insert_or_assign(i.first, std::move(hash));
}
return outputHashes;
} }
/* For other derivations, replace the inputs paths with recursive /* For other derivations, replace the inputs paths with recursive
calls to this function.*/ calls to this function. */
std::map<std::string, StringSet> inputs2; std::map<std::string, StringSet> inputs2;
for (auto & i : drv.inputDrvs) { for (auto & i : drv.inputDrvs) {
auto h = drvHashes.find(i.first); const auto & res = pathDerivationModulo(store, i.first);
if (h == drvHashes.end()) { std::visit(overloaded {
assert(store.isValidPath(i.first)); // Regular non-CA derivation, replace derivation
h = drvHashes.insert_or_assign(i.first, hashDerivationModulo(store, [&](Hash drvHash) {
store.readDerivation(i.first), false)).first; inputs2.insert_or_assign(drvHash.to_string(Base16, false), i.second);
} },
inputs2.insert_or_assign(h->second.to_string(Base16, false), i.second); // CA derivation's output hashes
[&](CaOutputHashes outputHashes) {
std::set<std::string> justOut = { "out" };
for (auto & output : i.second) {
/* Put each one in with a single "out" output.. */
const auto h = outputHashes.at(output);
inputs2.insert_or_assign(
h.to_string(Base16, false),
justOut);
}
},
}, res);
} }
return hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2)); return hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2));

View file

@ -6,6 +6,7 @@
#include "content-address.hh" #include "content-address.hh"
#include <map> #include <map>
#include <variant>
namespace nix { namespace nix {
@ -114,10 +115,42 @@ Derivation readDerivation(const Store & store, const Path & drvPath, std::string
// FIXME: remove // FIXME: remove
bool isDerivation(const string & fileName); bool isDerivation(const string & fileName);
Hash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs); // known CA drv's output hashes, current just for fixed-output derivations
// whose output hashes are always known since they are fixed up-front.
typedef std::map<std::string, Hash> CaOutputHashes;
typedef std::variant<
Hash, // regular DRV normalized hash
CaOutputHashes
> DrvHashModulo;
/* Returns hashes with the details of fixed-output subderivations
expunged.
A fixed-output derivation is a derivation whose outputs have a
specified content hash and hash algorithm. (Currently they must have
exactly one output (`out'), which is specified using the `outputHash'
and `outputHashAlgo' attributes, but the algorithm doesn't assume
this.) We don't want changes to such derivations to propagate upwards
through the dependency graph, changing output paths everywhere.
For instance, if we change the url in a call to the `fetchurl'
function, we do not want to rebuild everything depending on it---after
all, (the hash of) the file being downloaded is unchanged. So the
*output paths* should not change. On the other hand, the *derivation
paths* should change to reflect the new dependency graph.
For fixed-output derivations, this returns a map from the name of
each output to its hash, unique up to the output's contents.
For regular derivations, it returns a single hash of the derivation
ATerm, after subderivations have been likewise expunged from that
derivation.
*/
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs);
/* Memoisation of hashDerivationModulo(). */ /* Memoisation of hashDerivationModulo(). */
typedef std::map<StorePath, Hash> DrvHashes; typedef std::map<StorePath, DrvHashModulo> DrvHashes;
extern DrvHashes drvHashes; // FIXME: global, not thread-safe extern DrvHashes drvHashes; // FIXME: global, not thread-safe

View file

@ -563,7 +563,9 @@ void LocalStore::checkDerivationOutputs(const StorePath & drvPath, const Derivat
} }
else { else {
Hash h = hashDerivationModulo(*this, drv, true); // Regular, non-CA derivation should always return a single hash and not
// hash per output.
Hash h = std::get<0>(hashDerivationModulo(*this, drv, true));
for (auto & i : drv.outputs) for (auto & i : drv.outputs)
check(makeOutputPath(i.first, h, drvName), i.second.path(*this, drv.name), i.first); check(makeOutputPath(i.first, h, drvName), i.second.path(*this, drv.name), i.first);
} }

View file

@ -135,7 +135,7 @@ StorePath getDerivationEnvironment(ref<Store> store, const StorePath & drvPath)
drv.env["_outputs_saved"] = drv.env["outputs"]; drv.env["_outputs_saved"] = drv.env["outputs"];
drv.env["outputs"] = "out"; drv.env["outputs"] = "out";
drv.inputSrcs.insert(std::move(getEnvShPath)); drv.inputSrcs.insert(std::move(getEnvShPath));
Hash h = hashDerivationModulo(*store, drv, true); Hash h = std::get<0>(hashDerivationModulo(*store, drv, true));
auto shellOutPath = store->makeOutputPath("out", h, drvName); auto shellOutPath = store->makeOutputPath("out", h, drvName);
drv.outputs.insert_or_assign("out", DerivationOutput { .output = DerivationOutputInputAddressed { drv.outputs.insert_or_assign("out", DerivationOutput { .output = DerivationOutputInputAddressed {
.path = shellOutPath .path = shellOutPath