Merge branch 'multi-output-hashDerivationModulo' of github.com:Ericson2314/nix into misc-ca

This commit is contained in:
Carlo Nucera 2020-07-17 10:28:33 -04:00
commit 5cb840541b
6 changed files with 1895 additions and 38 deletions

1787
local-store-temp.cc Normal file

File diff suppressed because it is too large Load diff

View file

@ -800,7 +800,9 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * *
});
}
Hash h = hashDerivationModulo(*state.store, Derivation(drv), true);
// Regular, non-CA derivation should always return a single hash and not
// hash per output.
Hash h = std::get<0>(hashDerivationModulo(*state.store, Derivation(drv), true));
for (auto & i : outputs) {
auto outPath = state.store->makeOutputPath(i, h, drvName);

View file

@ -369,50 +369,83 @@ bool BasicDerivation::isFixedOutput() const
DrvHashes drvHashes;
/* pathDerivationModulo and hashDerivationModulo are mutually recursive
*/
/* Returns the hash of a derivation modulo fixed-output
subderivations. A fixed-output derivation is a derivation with one
output (`out') for which an expected hash and hash algorithm are
specified (using the `outputHash' and `outputHashAlgo'
attributes). We don't want changes to such derivations to
propagate upwards through the dependency graph, changing output
paths everywhere.
/* Look up the derivation by value and memoize the
`hashDerivationModulo` call.
*/
static const DrvHashModulo & pathDerivationModulo(Store & store, const StorePath & drvPath)
{
auto h = drvHashes.find(drvPath);
if (h == drvHashes.end()) {
assert(store.isValidPath(drvPath));
// Cache it
h = drvHashes.insert_or_assign(
drvPath,
hashDerivationModulo(
store,
store.readDerivation(drvPath),
false)).first;
}
return h->second;
}
For instance, if we change the url in a call to the `fetchurl'
function, we do not want to rebuild everything depending on it
(after all, (the hash of) the file being downloaded is unchanged).
So the *output paths* should not change. On the other hand, the
*derivation paths* should change to reflect the new dependency
graph.
/* See the header for interface details. These are the implementation details.
That's what this function does: it returns a hash which is just the
hash of the derivation ATerm, except that any input derivation
paths have been replaced by the result of a recursive call to this
function, and that for fixed-output derivations we return a hash of
its output path. */
Hash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
For fixed-output derivations, each hash in the map is not the
corresponding output's content hash, but a hash of that hash along
with other constant data. The key point is that the value is a pure
function of the output's contents, and there are no preimage attacks
either spoofing an output's contents for a derivation, or
spoofing a derivation for an output's contents.
For regular derivations, it looks up each subderivation from its hash
and recurs. If the subderivation is also regular, it simply
substitutes the derivation path with its hash. If the subderivation
is fixed-output, however, it takes each output hash and pretends it
is a derivation hash producing a single "out" output. This is so we
don't leak the provenance of fixed outputs, reducing pointless cache
misses as the build itself won't know this.
*/
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
{
/* Return a fixed hash for fixed-output derivations. */
if (drv.isFixedOutput()) {
DerivationOutputs::const_iterator i = drv.outputs.begin();
auto hash = std::get<DerivationOutputFixed>(i->second.output);
return hashString(htSHA256, "fixed:out:"
+ hash.hash.printMethodAlgo() + ":"
+ hash.hash.hash.to_string(Base16, false) + ":"
+ store.printStorePath(i->second.path(store, drv.name)));
std::map<std::string, Hash> outputHashes;
for (const auto & i : drv.outputs) {
auto & dof = std::get<DerivationOutputFixed>(i.second.output);
auto hash = hashString(htSHA256, "fixed:out:"
+ dof.hash.printMethodAlgo() + ":"
+ dof.hash.hash.to_string(Base16, false) + ":"
+ store.printStorePath(i.second.path(store, drv.name)));
outputHashes.insert_or_assign(i.first, std::move(hash));
}
return outputHashes;
}
/* For other derivations, replace the inputs paths with recursive
calls to this function. */
std::map<std::string, StringSet> inputs2;
for (auto & i : drv.inputDrvs) {
auto h = drvHashes.find(i.first);
if (h == drvHashes.end()) {
assert(store.isValidPath(i.first));
h = drvHashes.insert_or_assign(i.first, hashDerivationModulo(store,
store.readDerivation(i.first), false)).first;
const auto & res = pathDerivationModulo(store, i.first);
std::visit(overloaded {
// Regular non-CA derivation, replace derivation
[&](Hash drvHash) {
inputs2.insert_or_assign(drvHash.to_string(Base16, false), i.second);
},
// CA derivation's output hashes
[&](CaOutputHashes outputHashes) {
std::set<std::string> justOut = { "out" };
for (auto & output : i.second) {
/* Put each one in with a single "out" output.. */
const auto h = outputHashes.at(output);
inputs2.insert_or_assign(
h.to_string(Base16, false),
justOut);
}
inputs2.insert_or_assign(h->second.to_string(Base16, false), i.second);
},
}, res);
}
return hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2));

View file

@ -6,6 +6,7 @@
#include "content-address.hh"
#include <map>
#include <variant>
namespace nix {
@ -114,10 +115,42 @@ Derivation readDerivation(const Store & store, const Path & drvPath, std::string
// FIXME: remove
bool isDerivation(const string & fileName);
Hash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs);
// known CA drv's output hashes, current just for fixed-output derivations
// whose output hashes are always known since they are fixed up-front.
typedef std::map<std::string, Hash> CaOutputHashes;
typedef std::variant<
Hash, // regular DRV normalized hash
CaOutputHashes
> DrvHashModulo;
/* Returns hashes with the details of fixed-output subderivations
expunged.
A fixed-output derivation is a derivation whose outputs have a
specified content hash and hash algorithm. (Currently they must have
exactly one output (`out'), which is specified using the `outputHash'
and `outputHashAlgo' attributes, but the algorithm doesn't assume
this.) We don't want changes to such derivations to propagate upwards
through the dependency graph, changing output paths everywhere.
For instance, if we change the url in a call to the `fetchurl'
function, we do not want to rebuild everything depending on it---after
all, (the hash of) the file being downloaded is unchanged. So the
*output paths* should not change. On the other hand, the *derivation
paths* should change to reflect the new dependency graph.
For fixed-output derivations, this returns a map from the name of
each output to its hash, unique up to the output's contents.
For regular derivations, it returns a single hash of the derivation
ATerm, after subderivations have been likewise expunged from that
derivation.
*/
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs);
/* Memoisation of hashDerivationModulo(). */
typedef std::map<StorePath, Hash> DrvHashes;
typedef std::map<StorePath, DrvHashModulo> DrvHashes;
extern DrvHashes drvHashes; // FIXME: global, not thread-safe

View file

@ -563,7 +563,9 @@ void LocalStore::checkDerivationOutputs(const StorePath & drvPath, const Derivat
}
else {
Hash h = hashDerivationModulo(*this, drv, true);
// Regular, non-CA derivation should always return a single hash and not
// hash per output.
Hash h = std::get<0>(hashDerivationModulo(*this, drv, true));
for (auto & i : drv.outputs)
check(makeOutputPath(i.first, h, drvName), i.second.path(*this, drv.name), i.first);
}

View file

@ -135,7 +135,7 @@ StorePath getDerivationEnvironment(ref<Store> store, const StorePath & drvPath)
drv.env["_outputs_saved"] = drv.env["outputs"];
drv.env["outputs"] = "out";
drv.inputSrcs.insert(std::move(getEnvShPath));
Hash h = hashDerivationModulo(*store, drv, true);
Hash h = std::get<0>(hashDerivationModulo(*store, drv, true));
auto shellOutPath = store->makeOutputPath("out", h, drvName);
drv.outputs.insert_or_assign("out", DerivationOutput { .output = DerivationOutputInputAddressed {
.path = shellOutPath