Use the hash modulo in the derivation outputs

Rather than storing the derivation outputs as `drvPath!outputName` internally,
store them as `drvHashModulo!outputName` (or `outputHash!outputName` for
fixed-output derivations).

This makes the storage slightly more opaque, but enables an earlier
cutoff in cases where a fixed-output dependency changes (but keeps the
same output hash) − same as what we already do for input-addressed
derivations.
This commit is contained in:
regnat 2020-12-09 16:56:56 +01:00
parent 8914e01e37
commit bab1cda0e6
7 changed files with 93 additions and 89 deletions

View file

@ -1107,7 +1107,7 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * *
// Shouldn't happen as the toplevel derivation is not CA. // Shouldn't happen as the toplevel derivation is not CA.
assert(false); assert(false);
}, },
[&](UnknownHashes) { [&](DeferredHash _) {
for (auto & i : outputs) { for (auto & i : outputs) {
drv.outputs.insert_or_assign(i, drv.outputs.insert_or_assign(i,
DerivationOutput { DerivationOutput {

View file

@ -504,9 +504,6 @@ void DerivationGoal::inputsRealised()
Derivation drvResolved { *std::move(attempt) }; Derivation drvResolved { *std::move(attempt) };
auto pathResolved = writeDerivation(worker.store, drvResolved); auto pathResolved = writeDerivation(worker.store, drvResolved);
/* Add to memotable to speed up downstream goal's queries with the
original derivation. */
drvPathResolutions.lock()->insert_or_assign(drvPath, pathResolved);
auto msg = fmt("Resolved derivation: '%s' -> '%s'", auto msg = fmt("Resolved derivation: '%s' -> '%s'",
worker.store.printStorePath(drvPath), worker.store.printStorePath(drvPath),
@ -2097,15 +2094,15 @@ struct RestrictedStore : public LocalFSStore, public virtual RestrictedStoreConf
void registerDrvOutput(const Realisation & info) override void registerDrvOutput(const Realisation & info) override
{ {
if (!goal.isAllowed(info.id.drvPath)) // XXX: Should we check for something here? Probably, but I'm not sure
throw InvalidPath("cannot register unknown drv output '%s' in recursive Nix", printStorePath(info.id.drvPath)); // how
next->registerDrvOutput(info); next->registerDrvOutput(info);
} }
std::optional<const Realisation> queryRealisation(const DrvOutput & id) override std::optional<const Realisation> queryRealisation(const DrvOutput & id) override
{ {
if (!goal.isAllowed(id.drvPath)) // XXX: Should we check for something here? Probably, but I'm not sure
throw InvalidPath("cannot query the output info for unknown derivation '%s' in recursive Nix", printStorePath(id.drvPath)); // how
return next->queryRealisation(id); return next->queryRealisation(id);
} }
@ -3394,24 +3391,15 @@ void DerivationGoal::registerOutputs()
means it's safe to link the derivation to the output hash. We must do means it's safe to link the derivation to the output hash. We must do
that for floating CA derivations, which otherwise couldn't be cached, that for floating CA derivations, which otherwise couldn't be cached,
but it's fine to do in all cases. */ but it's fine to do in all cases. */
bool isCaFloating = drv->type() == DerivationType::CAFloating;
auto drvPathResolved = drvPath; if (settings.isExperimentalFeatureEnabled("ca-derivations")) {
if (!useDerivation && isCaFloating) { auto outputHashes = staticOutputHashes(worker.store, *drv);
/* Once a floating CA derivations reaches this point, it
must already be resolved, so we don't bother trying to
downcast drv to get would would just be an empty
inputDrvs field. */
Derivation drv2 { *drv };
drvPathResolved = writeDerivation(worker.store, drv2);
}
if (settings.isExperimentalFeatureEnabled("ca-derivations"))
for (auto& [outputName, newInfo] : infos) for (auto& [outputName, newInfo] : infos)
worker.store.registerDrvOutput(Realisation{ worker.store.registerDrvOutput(Realisation{
.id = DrvOutput{drvPathResolved, outputName}, .id = DrvOutput{outputHashes.at(outputName), outputName},
.outPath = newInfo.path}); .outPath = newInfo.path});
} }
}
void DerivationGoal::checkOutputs(const std::map<Path, ValidPathInfo> & outputs) void DerivationGoal::checkOutputs(const std::map<Path, ValidPathInfo> & outputs)

View file

@ -496,10 +496,9 @@ static const DrvHashModulo pathDerivationModulo(Store & store, const StorePath &
*/ */
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs) DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
{ {
bool isDeferred = false;
/* Return a fixed hash for fixed-output derivations. */ /* Return a fixed hash for fixed-output derivations. */
switch (drv.type()) { switch (drv.type()) {
case DerivationType::CAFloating:
return UnknownHashes {};
case DerivationType::CAFixed: { case DerivationType::CAFixed: {
std::map<std::string, Hash> outputHashes; std::map<std::string, Hash> outputHashes;
for (const auto & i : drv.outputs) { for (const auto & i : drv.outputs) {
@ -512,6 +511,9 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m
} }
return outputHashes; return outputHashes;
} }
case DerivationType::CAFloating:
isDeferred = true;
break;
case DerivationType::InputAddressed: case DerivationType::InputAddressed:
break; break;
case DerivationType::DeferredInputAddressed: case DerivationType::DeferredInputAddressed:
@ -522,13 +524,16 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m
calls to this function. */ calls to this function. */
std::map<std::string, StringSet> inputs2; std::map<std::string, StringSet> inputs2;
for (auto & i : drv.inputDrvs) { for (auto & i : drv.inputDrvs) {
bool hasUnknownHash = false;
const auto & res = pathDerivationModulo(store, i.first); const auto & res = pathDerivationModulo(store, i.first);
std::visit(overloaded { std::visit(overloaded {
// Regular non-CA derivation, replace derivation // Regular non-CA derivation, replace derivation
[&](Hash drvHash) { [&](Hash drvHash) {
inputs2.insert_or_assign(drvHash.to_string(Base16, false), i.second); inputs2.insert_or_assign(drvHash.to_string(Base16, false), i.second);
}, },
[&](DeferredHash deferredHash) {
isDeferred = true;
inputs2.insert_or_assign(deferredHash.hash.to_string(Base16, false), i.second);
},
// CA derivation's output hashes // CA derivation's output hashes
[&](CaOutputHashes outputHashes) { [&](CaOutputHashes outputHashes) {
std::set<std::string> justOut = { "out" }; std::set<std::string> justOut = { "out" };
@ -540,16 +545,37 @@ DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool m
justOut); justOut);
} }
}, },
[&](UnknownHashes) {
hasUnknownHash = true;
},
}, res); }, res);
if (hasUnknownHash) {
return UnknownHashes {};
}
} }
return hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2)); auto hash = hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2));
if (isDeferred)
return DeferredHash { hash };
else
return hash;
}
std::map<std::string, Hash> staticOutputHashes(Store& store, const Derivation& drv)
{
std::map<std::string, Hash> res;
std::visit(overloaded {
[&](Hash drvHash) {
for (auto & outputName : drv.outputNames()) {
res.insert({outputName, drvHash});
}
},
[&](DeferredHash deferredHash) {
for (auto & outputName : drv.outputNames()) {
res.insert({outputName, deferredHash.hash});
}
},
[&](CaOutputHashes outputHashes) {
res = outputHashes;
},
}, hashDerivationModulo(store, drv, true));
return res;
} }
@ -719,9 +745,6 @@ static void rewriteDerivation(Store & store, BasicDerivation & drv, const String
} }
Sync<DrvPathResolutions> drvPathResolutions;
std::optional<BasicDerivation> Derivation::tryResolve(Store & store) { std::optional<BasicDerivation> Derivation::tryResolve(Store & store) {
BasicDerivation resolved { *this }; BasicDerivation resolved { *this };

View file

@ -18,8 +18,6 @@ namespace nix {
/* The traditional non-fixed-output derivation type. */ /* The traditional non-fixed-output derivation type. */
struct DerivationOutputInputAddressed struct DerivationOutputInputAddressed
{ {
/* Will need to become `std::optional<StorePath>` once input-addressed
derivations are allowed to depend on cont-addressed derivations */
StorePath path; StorePath path;
}; };
@ -174,12 +172,12 @@ std::string outputPathName(std::string_view drvName, std::string_view outputName
// whose output hashes are always known since they are fixed up-front. // whose output hashes are always known since they are fixed up-front.
typedef std::map<std::string, Hash> CaOutputHashes; typedef std::map<std::string, Hash> CaOutputHashes;
struct UnknownHashes {}; struct DeferredHash { Hash hash; };
typedef std::variant< typedef std::variant<
Hash, // regular DRV normalized hash Hash, // regular DRV normalized hash
CaOutputHashes, // Fixed-output derivation hashes CaOutputHashes, // Fixed-output derivation hashes
UnknownHashes // Deferred hashes for floating outputs drvs and their dependencies DeferredHash // Deferred hashes for floating outputs drvs and their dependencies
> DrvHashModulo; > DrvHashModulo;
/* Returns hashes with the details of fixed-output subderivations /* Returns hashes with the details of fixed-output subderivations
@ -207,22 +205,18 @@ typedef std::variant<
*/ */
DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs); DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs);
/*
Return a map associating each output to a hash that uniquely identifies its
derivation (modulo the self-references).
*/
std::map<std::string, Hash> staticOutputHashes(Store& store, const Derivation& drv);
/* Memoisation of hashDerivationModulo(). */ /* Memoisation of hashDerivationModulo(). */
typedef std::map<StorePath, DrvHashModulo> DrvHashes; typedef std::map<StorePath, DrvHashModulo> DrvHashes;
// FIXME: global, though at least thread-safe. // FIXME: global, though at least thread-safe.
extern Sync<DrvHashes> drvHashes; extern Sync<DrvHashes> drvHashes;
/* Memoisation of `readDerivation(..).resove()`. */
typedef std::map<
StorePath,
std::optional<StorePath>
> DrvPathResolutions;
// FIXME: global, though at least thread-safe.
// FIXME: arguably overlaps with hashDerivationModulo memo table.
extern Sync<DrvPathResolutions> drvPathResolutions;
bool wantOutput(const string & output, const std::set<string> & wanted); bool wantOutput(const string & output, const std::set<string> & wanted);
struct Source; struct Source;

View file

@ -659,7 +659,7 @@ void LocalStore::registerDrvOutput(const Realisation & info)
auto state(_state.lock()); auto state(_state.lock());
retrySQLite<void>([&]() { retrySQLite<void>([&]() {
state->stmts->RegisterRealisedOutput.use() state->stmts->RegisterRealisedOutput.use()
(info.id.drvPath.to_string()) (info.id.strHash())
(info.id.outputName) (info.id.outputName)
(printStorePath(info.outPath)) (printStorePath(info.outPath))
.exec(); .exec();
@ -879,17 +879,18 @@ StorePathSet LocalStore::queryValidDerivers(const StorePath & path)
// Try to resolve the derivation at path `original`, with a caching layer // Try to resolve the derivation at path `original`, with a caching layer
// to make it more efficient // to make it more efficient
std::optional<StorePath> cachedResolve( std::optional<Derivation> cachedResolve(
LocalStore& store, LocalStore& store,
const StorePath& original) const StorePath& original)
{ {
// This is quite dirty and leaky, but will disappear once #4340 is merged
static Sync<std::map<StorePath, std::optional<Derivation>>> resolutionsCache;
{ {
auto resolutions = drvPathResolutions.lock(); auto resolutions = resolutionsCache.lock();
auto resolvedPathOptIter = resolutions->find(original); auto resolvedDrvIter = resolutions->find(original);
if (resolvedPathOptIter != resolutions->end()) { if (resolvedDrvIter != resolutions->end()) {
auto & [_, resolvedPathOpt] = *resolvedPathOptIter; auto & [_, resolvedDrv] = *resolvedDrvIter;
if (resolvedPathOpt) return *resolvedDrv;
return resolvedPathOpt;
} }
} }
@ -898,12 +899,9 @@ std::optional<StorePath> cachedResolve(
auto attempt = drv.tryResolve(store); auto attempt = drv.tryResolve(store);
if (!attempt) if (!attempt)
return std::nullopt; return std::nullopt;
/* Just compute store path */
auto pathResolved =
writeDerivation(store, *std::move(attempt), NoRepair, true);
/* Store in memo table. */ /* Store in memo table. */
drvPathResolutions.lock()->insert_or_assign(original, pathResolved); resolutionsCache.lock()->insert_or_assign(original, *attempt);
return pathResolved; return *attempt;
} }
std::map<std::string, std::optional<StorePath>> std::map<std::string, std::optional<StorePath>>
@ -933,26 +931,24 @@ LocalStore::queryPartialDerivationOutputMap(const StorePath& path_)
auto drv = readDerivation(path); auto drv = readDerivation(path);
for (auto & output : drv.outputsAndOptPaths(*this)) {
outputs.emplace(output.first, std::nullopt);
}
auto resolvedDrv = cachedResolve(*this, path); auto resolvedDrv = cachedResolve(*this, path);
if (!resolvedDrv) if (!resolvedDrv) {
for (auto& [outputName, _] : drv.outputsAndOptPaths(*this)) {
if (!outputs.count(outputName))
outputs.emplace(outputName, std::nullopt);
}
return outputs; return outputs;
}
retrySQLite<void>([&]() { auto resolvedDrvHashes = staticOutputHashes(*this, *resolvedDrv);
auto state(_state.lock()); for (auto& [outputName, hash] : resolvedDrvHashes) {
path = *resolvedDrv; auto realisation = queryRealisation(DrvOutput{hash, outputName});
auto useQueryDerivationOutputs{ if (realisation)
state->stmts->QueryAllRealisedOutputs.use()(path.to_string())}; outputs.insert_or_assign(outputName, realisation->outPath);
else
while (useQueryDerivationOutputs.next()) outputs.insert_or_assign(outputName, std::nullopt);
outputs.insert_or_assign( }
useQueryDerivationOutputs.getStr(0),
parseStorePath(useQueryDerivationOutputs.getStr(1)));
});
return outputs; return outputs;
} }
@ -1695,12 +1691,11 @@ std::optional<const Realisation> LocalStore::queryRealisation(
typedef std::optional<const Realisation> Ret; typedef std::optional<const Realisation> Ret;
return retrySQLite<Ret>([&]() -> Ret { return retrySQLite<Ret>([&]() -> Ret {
auto state(_state.lock()); auto state(_state.lock());
auto use(state->stmts->QueryRealisedOutput.use()(id.drvPath.to_string())( auto use(state->stmts->QueryRealisedOutput.use()(id.strHash())(
id.outputName)); id.outputName));
if (!use.next()) if (!use.next())
return std::nullopt; return std::nullopt;
auto outputPath = parseStorePath(use.getStr(0)); auto outputPath = parseStorePath(use.getStr(0));
auto resolvedDrv = StorePath(use.getStr(1));
return Ret{ return Ret{
Realisation{.id = id, .outPath = outputPath}}; Realisation{.id = id, .outPath = outputPath}};
}); });

View file

@ -7,18 +7,18 @@ namespace nix {
MakeError(InvalidDerivationOutputId, Error); MakeError(InvalidDerivationOutputId, Error);
DrvOutput DrvOutput::parse(const std::string &strRep) { DrvOutput DrvOutput::parse(const std::string &strRep) {
const auto &[rawPath, outputs] = parsePathWithOutputs(strRep); size_t n = strRep.find("!");
if (outputs.size() != 1) if (n == strRep.npos)
throw InvalidDerivationOutputId("Invalid derivation output id %s", strRep); throw InvalidDerivationOutputId("Invalid derivation output id %s", strRep);
return DrvOutput{ return DrvOutput{
.drvPath = StorePath(rawPath), .drvHash = Hash::parseAnyPrefixed(strRep.substr(0, n)),
.outputName = *outputs.begin(), .outputName = strRep.substr(n+1),
}; };
} }
std::string DrvOutput::to_string() const { std::string DrvOutput::to_string() const {
return std::string(drvPath.to_string()) + "!" + outputName; return strHash() + "!" + outputName;
} }
nlohmann::json Realisation::toJSON() const { nlohmann::json Realisation::toJSON() const {

View file

@ -6,11 +6,15 @@
namespace nix { namespace nix {
struct DrvOutput { struct DrvOutput {
StorePath drvPath; // The hash modulo of the derivation
Hash drvHash;
std::string outputName; std::string outputName;
std::string to_string() const; std::string to_string() const;
std::string strHash() const
{ return drvHash.to_string(Base16, true); }
static DrvOutput parse(const std::string &); static DrvOutput parse(const std::string &);
bool operator<(const DrvOutput& other) const { return to_pair() < other.to_pair(); } bool operator<(const DrvOutput& other) const { return to_pair() < other.to_pair(); }
@ -18,8 +22,8 @@ struct DrvOutput {
private: private:
// Just to make comparison operators easier to write // Just to make comparison operators easier to write
std::pair<StorePath, std::string> to_pair() const std::pair<Hash, std::string> to_pair() const
{ return std::make_pair(drvPath, outputName); } { return std::make_pair(drvHash, outputName); }
}; };
struct Realisation { struct Realisation {