Use the hash modulo in the derivation outputs

Rather than storing the derivation outputs as `drvPath!outputName` internally,
store them as `drvHashModulo!outputName` (or `outputHash!outputName` for
fixed-output derivations).

This makes the storage slightly more opaque, but enables an earlier
cutoff in cases where a fixed-output dependency changes (but keeps the
same output hash) − same as what we already do for input-addressed
derivations.
This commit is contained in:
regnat 2020-12-09 16:56:56 +01:00
parent 8914e01e37
commit bab1cda0e6
7 changed files with 93 additions and 89 deletions

View file

@ -1107,7 +1107,7 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * *
// Shouldn't happen as the toplevel derivation is not CA.
assert(false);
},
[&](UnknownHashes) {
[&](DeferredHash _) {
for (auto & i : outputs) {
drv.outputs.insert_or_assign(i,
DerivationOutput {

View file

@ -504,9 +504,6 @@ void DerivationGoal::inputsRealised()
Derivation drvResolved { *std::move(attempt) };
auto pathResolved = writeDerivation(worker.store, drvResolved);
/* Add to memotable to speed up downstream goal's queries with the
original derivation. */
drvPathResolutions.lock()->insert_or_assign(drvPath, pathResolved);
auto msg = fmt("Resolved derivation: '%s' -> '%s'",
worker.store.printStorePath(drvPath),
@ -2097,15 +2094,15 @@ struct RestrictedStore : public LocalFSStore, public virtual RestrictedStoreConf
void registerDrvOutput(const Realisation & info) override
{
if (!goal.isAllowed(info.id.drvPath))
throw InvalidPath("cannot register unknown drv output '%s' in recursive Nix", printStorePath(info.id.drvPath));
// XXX: Should we check for something here? Probably, but I'm not sure
// how
next->registerDrvOutput(info);
}
std::optional<const Realisation> queryRealisation(const DrvOutput & id) override
{
if (!goal.isAllowed(id.drvPath))
throw InvalidPath("cannot query the output info for unknown derivation '%s' in recursive Nix", printStorePath(id.drvPath));
// XXX: Should we check for something here? Probably, but I'm not sure
// how
return next->queryRealisation(id);
}
@ -3394,23 +3391,14 @@ void DerivationGoal::registerOutputs()
means it's safe to link the derivation to the output hash. We must do
that for floating CA derivations, which otherwise couldn't be cached,
but it's fine to do in all cases. */
bool isCaFloating = drv->type() == DerivationType::CAFloating;
auto drvPathResolved = drvPath;
if (!useDerivation && isCaFloating) {
/* Once a floating CA derivations reaches this point, it
must already be resolved, so we don't bother trying to
downcast drv to get would would just be an empty
inputDrvs field. */
Derivation drv2 { *drv };
drvPathResolved = writeDerivation(worker.store, drv2);
}
if (settings.isExperimentalFeatureEnabled("ca-derivations"))
if (settings.isExperimentalFeatureEnabled("ca-derivations")) {
auto outputHashes = staticOutputHashes(worker.store, *drv);
for (auto& [outputName, newInfo] : infos)
worker.store.registerDrvOutput(Realisation{
.id = DrvOutput{drvPathResolved, outputName},
.id = DrvOutput{outputHashes.at(outputName), outputName},
.outPath = newInfo.path});
}
}

View file

@ -496,10 +496,9 @@ static const DrvHashModulo pathDerivationModulo(Store & store, const StorePath &
*/
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
{
bool isDeferred = false;
/* Return a fixed hash for fixed-output derivations. */
switch (drv.type()) {
case DerivationType::CAFloating:
return UnknownHashes {};
case DerivationType::CAFixed: {
std::map<std::string, Hash> outputHashes;
for (const auto & i : drv.outputs) {
@ -512,6 +511,9 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m
}
return outputHashes;
}
case DerivationType::CAFloating:
isDeferred = true;
break;
case DerivationType::InputAddressed:
break;
case DerivationType::DeferredInputAddressed:
@ -522,13 +524,16 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m
calls to this function. */
std::map<std::string, StringSet> inputs2;
for (auto & i : drv.inputDrvs) {
bool hasUnknownHash = false;
const auto & res = pathDerivationModulo(store, i.first);
std::visit(overloaded {
// Regular non-CA derivation, replace derivation
[&](Hash drvHash) {
inputs2.insert_or_assign(drvHash.to_string(Base16, false), i.second);
},
[&](DeferredHash deferredHash) {
isDeferred = true;
inputs2.insert_or_assign(deferredHash.hash.to_string(Base16, false), i.second);
},
// CA derivation's output hashes
[&](CaOutputHashes outputHashes) {
std::set<std::string> justOut = { "out" };
@ -540,16 +545,37 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m
justOut);
}
},
[&](UnknownHashes) {
hasUnknownHash = true;
},
}, res);
if (hasUnknownHash) {
return UnknownHashes {};
}
}
return hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2));
auto hash = hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2));
if (isDeferred)
return DeferredHash { hash };
else
return hash;
}
std::map<std::string, Hash> staticOutputHashes(Store& store, const Derivation& drv)
{
std::map<std::string, Hash> res;
std::visit(overloaded {
[&](Hash drvHash) {
for (auto & outputName : drv.outputNames()) {
res.insert({outputName, drvHash});
}
},
[&](DeferredHash deferredHash) {
for (auto & outputName : drv.outputNames()) {
res.insert({outputName, deferredHash.hash});
}
},
[&](CaOutputHashes outputHashes) {
res = outputHashes;
},
}, hashDerivationModulo(store, drv, true));
return res;
}
@ -719,9 +745,6 @@ static void rewriteDerivation(Store & store, BasicDerivation & drv, const String
}
Sync<DrvPathResolutions> drvPathResolutions;
std::optional<BasicDerivation> Derivation::tryResolve(Store & store) {
BasicDerivation resolved { *this };

View file

@ -18,8 +18,6 @@ namespace nix {
/* The traditional non-fixed-output derivation type. */
struct DerivationOutputInputAddressed
{
/* Will need to become `std::optional<StorePath>` once input-addressed
derivations are allowed to depend on cont-addressed derivations */
StorePath path;
};
@ -174,12 +172,12 @@ std::string outputPathName(std::string_view drvName, std::string_view outputName
// whose output hashes are always known since they are fixed up-front.
typedef std::map<std::string, Hash> CaOutputHashes;
struct UnknownHashes {};
struct DeferredHash { Hash hash; };
typedef std::variant<
Hash, // regular DRV normalized hash
CaOutputHashes, // Fixed-output derivation hashes
UnknownHashes // Deferred hashes for floating outputs drvs and their dependencies
DeferredHash // Deferred hashes for floating outputs drvs and their dependencies
> DrvHashModulo;
/* Returns hashes with the details of fixed-output subderivations
@ -207,22 +205,18 @@ typedef std::variant<
*/
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs);
/*
Return a map associating each output to a hash that uniquely identifies its
derivation (modulo the self-references).
*/
std::map<std::string, Hash> staticOutputHashes(Store& store, const Derivation& drv);
/* Memoisation of hashDerivationModulo(). */
typedef std::map<StorePath, DrvHashModulo> DrvHashes;
// FIXME: global, though at least thread-safe.
extern Sync<DrvHashes> drvHashes;
/* Memoisation of `readDerivation(..).resove()`. */
typedef std::map<
StorePath,
std::optional<StorePath>
> DrvPathResolutions;
// FIXME: global, though at least thread-safe.
// FIXME: arguably overlaps with hashDerivationModulo memo table.
extern Sync<DrvPathResolutions> drvPathResolutions;
bool wantOutput(const string & output, const std::set<string> & wanted);
struct Source;

View file

@ -659,7 +659,7 @@ void LocalStore::registerDrvOutput(const Realisation & info)
auto state(_state.lock());
retrySQLite<void>([&]() {
state->stmts->RegisterRealisedOutput.use()
(info.id.drvPath.to_string())
(info.id.strHash())
(info.id.outputName)
(printStorePath(info.outPath))
.exec();
@ -879,17 +879,18 @@ StorePathSet LocalStore::queryValidDerivers(const StorePath & path)
// Try to resolve the derivation at path `original`, with a caching layer
// to make it more efficient
std::optional<StorePath> cachedResolve(
LocalStore & store,
const StorePath & original)
std::optional<Derivation> cachedResolve(
LocalStore& store,
const StorePath& original)
{
// This is quite dirty and leaky, but will disappear once #4340 is merged
static Sync<std::map<StorePath, std::optional<Derivation>>> resolutionsCache;
{
auto resolutions = drvPathResolutions.lock();
auto resolvedPathOptIter = resolutions->find(original);
if (resolvedPathOptIter != resolutions->end()) {
auto & [_, resolvedPathOpt] = *resolvedPathOptIter;
if (resolvedPathOpt)
return resolvedPathOpt;
auto resolutions = resolutionsCache.lock();
auto resolvedDrvIter = resolutions->find(original);
if (resolvedDrvIter != resolutions->end()) {
auto & [_, resolvedDrv] = *resolvedDrvIter;
return *resolvedDrv;
}
}
@ -898,12 +899,9 @@ std::optional<StorePath> cachedResolve(
auto attempt = drv.tryResolve(store);
if (!attempt)
return std::nullopt;
/* Just compute store path */
auto pathResolved =
writeDerivation(store, *std::move(attempt), NoRepair, true);
/* Store in memo table. */
drvPathResolutions.lock()->insert_or_assign(original, pathResolved);
return pathResolved;
resolutionsCache.lock()->insert_or_assign(original, *attempt);
return *attempt;
}
std::map<std::string, std::optional<StorePath>>
@ -933,26 +931,24 @@ LocalStore::queryPartialDerivationOutputMap(const StorePath& path_)
auto drv = readDerivation(path);
for (auto & output : drv.outputsAndOptPaths(*this)) {
outputs.emplace(output.first, std::nullopt);
}
auto resolvedDrv = cachedResolve(*this, path);
if (!resolvedDrv)
if (!resolvedDrv) {
for (auto& [outputName, _] : drv.outputsAndOptPaths(*this)) {
if (!outputs.count(outputName))
outputs.emplace(outputName, std::nullopt);
}
return outputs;
}
retrySQLite<void>([&]() {
auto state(_state.lock());
path = *resolvedDrv;
auto useQueryDerivationOutputs{
state->stmts->QueryAllRealisedOutputs.use()(path.to_string())};
while (useQueryDerivationOutputs.next())
outputs.insert_or_assign(
useQueryDerivationOutputs.getStr(0),
parseStorePath(useQueryDerivationOutputs.getStr(1)));
});
auto resolvedDrvHashes = staticOutputHashes(*this, *resolvedDrv);
for (auto& [outputName, hash] : resolvedDrvHashes) {
auto realisation = queryRealisation(DrvOutput{hash, outputName});
if (realisation)
outputs.insert_or_assign(outputName, realisation->outPath);
else
outputs.insert_or_assign(outputName, std::nullopt);
}
return outputs;
}
@ -1695,12 +1691,11 @@ std::optional<const Realisation> LocalStore::queryRealisation(
typedef std::optional<const Realisation> Ret;
return retrySQLite<Ret>([&]() -> Ret {
auto state(_state.lock());
auto use(state->stmts->QueryRealisedOutput.use()(id.drvPath.to_string())(
auto use(state->stmts->QueryRealisedOutput.use()(id.strHash())(
id.outputName));
if (!use.next())
return std::nullopt;
auto outputPath = parseStorePath(use.getStr(0));
auto resolvedDrv = StorePath(use.getStr(1));
return Ret{
Realisation{.id = id, .outPath = outputPath}};
});

View file

@ -7,18 +7,18 @@ namespace nix {
MakeError(InvalidDerivationOutputId, Error);
DrvOutput DrvOutput::parse(const std::string &strRep) {
const auto &[rawPath, outputs] = parsePathWithOutputs(strRep);
if (outputs.size() != 1)
size_t n = strRep.find("!");
if (n == strRep.npos)
throw InvalidDerivationOutputId("Invalid derivation output id %s", strRep);
return DrvOutput{
.drvPath = StorePath(rawPath),
.outputName = *outputs.begin(),
.drvHash = Hash::parseAnyPrefixed(strRep.substr(0, n)),
.outputName = strRep.substr(n+1),
};
}
std::string DrvOutput::to_string() const {
return std::string(drvPath.to_string()) + "!" + outputName;
return strHash() + "!" + outputName;
}
nlohmann::json Realisation::toJSON() const {

View file

@ -6,11 +6,15 @@
namespace nix {
struct DrvOutput {
StorePath drvPath;
// The hash modulo of the derivation
Hash drvHash;
std::string outputName;
std::string to_string() const;
std::string strHash() const
{ return drvHash.to_string(Base16, true); }
static DrvOutput parse(const std::string &);
bool operator<(const DrvOutput& other) const { return to_pair() < other.to_pair(); }
@ -18,8 +22,8 @@ struct DrvOutput {
private:
// Just to make comparison operators easier to write
std::pair<StorePath, std::string> to_pair() const
{ return std::make_pair(drvPath, outputName); }
std::pair<Hash, std::string> to_pair() const
{ return std::make_pair(drvHash, outputName); }
};
struct Realisation {