Store::queryMissing(): Use a thread pool

For one particular NixOS configuration, this cut the runtime of
"nix-store -r --dry-run" from 6m51s to 3.4s. It also fixes a bug in
the size calculation that was causing certain paths to be counted
twice, e.g. before:

  these paths will be fetched (1249.98 MiB download, 2995.74 MiB unpacked):

and after:

  these paths will be fetched (1219.56 MiB download, 2862.17 MiB unpacked):
This commit is contained in:
Eelco Dolstra 2016-07-21 17:40:40 +02:00
parent 6f70fcd1c5
commit d57981bac4

View file

@ -72,48 +72,87 @@ void Store::computeFSClosure(const Path & path,
void Store::queryMissing(const PathSet & targets,
PathSet & willBuild, PathSet & willSubstitute, PathSet & unknown,
unsigned long long & downloadSize, unsigned long long & narSize)
PathSet & willBuild_, PathSet & willSubstitute_, PathSet & unknown_,
unsigned long long & downloadSize_, unsigned long long & narSize_)
{
downloadSize = narSize = 0;
downloadSize_ = narSize_ = 0;
PathSet todo(targets.begin(), targets.end()), done;
ThreadPool pool;
/* Getting substitute info has high latency when using the binary
cache substituter. Thus it's essential to do substitute
queries in parallel as much as possible. To accomplish this
we do the following:
struct State
{
PathSet done;
PathSet & unknown, & willSubstitute, & willBuild;
unsigned long long & downloadSize;
unsigned long long & narSize;
};
- For all paths still to be processed (todo), we add all
paths for which we need info to the set query. For an
unbuilt derivation this is the output paths; otherwise, it's
the path itself.
struct DrvState
{
size_t left;
bool done = false;
PathSet outPaths;
DrvState(size_t left) : left(left) { }
};
- We get info about all paths in query in parallel.
Sync<State> state_(State{PathSet(), unknown_, willSubstitute_, willBuild_, downloadSize_, narSize_});
- We process the results and add new items to todo if
necessary. E.g. if a path is substitutable, then we need to
get info on its references.
std::function<void(Path)> doPath;
- Repeat until todo is empty.
*/
auto mustBuildDrv = [&](const Path & drvPath, const Derivation & drv) {
{
auto state(state_.lock());
state->willBuild.insert(drvPath);
}
while (!todo.empty()) {
for (auto & i : drv.inputDrvs)
pool.enqueue(std::bind(doPath, makeDrvPathWithOutputs(i.first, i.second)));
};
PathSet query, todoDrv, todoNonDrv;
auto checkOutput = [&](
const Path & drvPath, ref<Derivation> drv, const Path & outPath, ref<Sync<DrvState>> drvState_)
{
if (drvState_->lock()->done) return;
for (auto & i : todo) {
if (done.find(i) != done.end()) continue;
done.insert(i);
SubstitutablePathInfos infos;
querySubstitutablePathInfos({outPath}, infos);
DrvPathWithOutputs i2 = parseDrvPathWithOutputs(i);
if (infos.empty()) {
drvState_->lock()->done = true;
mustBuildDrv(drvPath, *drv);
} else {
{
auto drvState(drvState_->lock());
if (drvState->done) return;
assert(drvState->left);
drvState->left--;
drvState->outPaths.insert(outPath);
if (!drvState->left) {
for (auto & path : drvState->outPaths)
pool.enqueue(std::bind(doPath, path));
}
}
}
};
doPath = [&](const Path & path) {
{
auto state(state_.lock());
if (state->done.count(path)) return;
state->done.insert(path);
}
DrvPathWithOutputs i2 = parseDrvPathWithOutputs(path);
if (isDerivation(i2.first)) {
if (!isValidPath(i2.first)) {
// FIXME: we could try to substitute p.
unknown.insert(i);
continue;
// FIXME: we could try to substitute the derivation.
auto state(state_.lock());
state->unknown.insert(path);
return;
}
Derivation drv = derivationFromPath(i2.first);
PathSet invalid;
@ -121,67 +160,47 @@ void Store::queryMissing(const PathSet & targets,
if (wantOutput(j.first, i2.second)
&& !isValidPath(j.second.path))
invalid.insert(j.second.path);
if (invalid.empty()) continue;
if (invalid.empty()) return;
todoDrv.insert(i);
if (settings.useSubstitutes && drv.substitutesAllowed())
query.insert(invalid.begin(), invalid.end());
}
if (settings.useSubstitutes && drv.substitutesAllowed()) {
auto drvState = make_ref<Sync<DrvState>>(DrvState(invalid.size()));
for (auto & output : invalid)
pool.enqueue(std::bind(checkOutput, i2.first, make_ref<Derivation>(drv), output, drvState));
} else
mustBuildDrv(i2.first, drv);
else {
if (isValidPath(i)) continue;
query.insert(i);
todoNonDrv.insert(i);
}
}
} else {
todo.clear();
if (isValidPath(path)) return;
SubstitutablePathInfos infos;
querySubstitutablePathInfos(query, infos);
querySubstitutablePathInfos({path}, infos);
for (auto & i : todoDrv) {
DrvPathWithOutputs i2 = parseDrvPathWithOutputs(i);
// FIXME: cache this
Derivation drv = derivationFromPath(i2.first);
PathSet outputs;
bool mustBuild = false;
if (settings.useSubstitutes && drv.substitutesAllowed()) {
for (auto & j : drv.outputs) {
if (!wantOutput(j.first, i2.second)) continue;
if (!isValidPath(j.second.path)) {
if (infos.find(j.second.path) == infos.end())
mustBuild = true;
else
outputs.insert(j.second.path);
}
}
} else
mustBuild = true;
if (mustBuild) {
willBuild.insert(i2.first);
todo.insert(drv.inputSrcs.begin(), drv.inputSrcs.end());
for (auto & j : drv.inputDrvs)
todo.insert(makeDrvPathWithOutputs(j.first, j.second));
} else
todoNonDrv.insert(outputs.begin(), outputs.end());
if (infos.empty()) {
auto state(state_.lock());
state->unknown.insert(path);
return;
}
for (auto & i : todoNonDrv) {
done.insert(i);
SubstitutablePathInfos::iterator info = infos.find(i);
if (info != infos.end()) {
willSubstitute.insert(i);
downloadSize += info->second.downloadSize;
narSize += info->second.narSize;
todo.insert(info->second.references.begin(), info->second.references.end());
} else
unknown.insert(i);
auto info = infos.find(path);
assert(info != infos.end());
{
auto state(state_.lock());
state->willSubstitute.insert(path);
state->downloadSize += info->second.downloadSize;
state->narSize += info->second.narSize;
}
for (auto & ref : info->second.references)
pool.enqueue(std::bind(doPath, ref));
}
};
for (auto & path : targets)
pool.enqueue(std::bind(doPath, path));
pool.process();
}