Make LocalStore::addToStore(srcPath) run in constant memory

This reduces memory consumption of

  nix-instantiate \
    -E 'with import <nixpkgs> {}; runCommand "foo" { src = ./blender; } "echo foo"' \
    --option nar-buffer-size 10000

(where ./blender is a 1.1 GiB tree) from 1716 to 36 MiB, while still
ensuring that we don't do any write I/O for small source paths (up to
'nar-buffer-size' bytes). The downside is that large paths are now
always written to a temporary location in the store, even if they
produce an already valid store path. Thus, adding large paths might be
slower and run out of disk space. ¯\_(ツ)_/¯ Of course, you can always
restore the old behaviour by setting 'nar-buffer-size' to a very high
value.
This commit is contained in:
Eelco Dolstra 2018-04-09 23:46:20 +02:00
parent 7d8d78f06a
commit 34f25124ba
2 changed files with 115 additions and 9 deletions

View file

@ -365,6 +365,9 @@ public:
Setting<bool> warnDirty{this, true, "warn-dirty", Setting<bool> warnDirty{this, true, "warn-dirty",
"Whether to warn about dirty Git/Mercurial trees."}; "Whether to warn about dirty Git/Mercurial trees."};
Setting<size_t> narBufferSize{this, 32 * 1024 * 1024, "nar-buffer-size",
"Maximum size of NARs before spilling them to disk."};
}; };

View file

@ -1098,16 +1098,119 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
{ {
Path srcPath(absPath(_srcPath)); Path srcPath(absPath(_srcPath));
/* Read the whole path into memory. This is not a very scalable if (method != FileIngestionMethod::Recursive)
method for very large paths, but `copyPath' is mainly used for return addToStoreFromDump(readFile(srcPath), name, method, hashAlgo, repair);
small files. */
StringSink sink;
if (method == FileIngestionMethod::Recursive)
dumpPath(srcPath, sink, filter);
else
sink.s = make_ref<std::string>(readFile(srcPath));
return addToStoreFromDump(*sink.s, name, method, hashAlgo, repair); /* For computing the NAR hash. */
auto sha256Sink = std::make_unique<HashSink>(htSHA256);
/* For computing the store path. In recursive SHA-256 mode, this
is the same as the NAR hash, so no need to do it again. */
std::unique_ptr<HashSink> hashSink =
hashAlgo == htSHA256
? nullptr
: std::make_unique<HashSink>(hashAlgo);
/* Read the source path into memory, but only if it's up to
narBufferSize bytes. If it's larger, write it to a temporary
location in the Nix store. If the subsequently computed
destination store path is already valid, we just delete the
temporary path. Otherwise, we move it to the destination store
path. */
bool inMemory = true;
std::string nar;
auto source = sinkToSource([&](Sink & sink) {
LambdaSink sink2([&](const unsigned char * buf, size_t len) {
(*sha256Sink)(buf, len);
if (hashSink) (*hashSink)(buf, len);
if (inMemory) {
if (nar.size() + len > settings.narBufferSize) {
inMemory = false;
sink << 1;
sink((const unsigned char *) nar.data(), nar.size());
nar.clear();
} else {
nar.append((const char *) buf, len);
}
}
if (!inMemory) sink(buf, len);
});
dumpPath(srcPath, sink2, filter);
});
std::unique_ptr<AutoDelete> delTempDir;
Path tempPath;
try {
/* Wait for the source coroutine to give us some dummy
data. This is so that we don't create the temporary
directory if the NAR fits in memory. */
readInt(*source);
auto tempDir = createTempDir(realStoreDir, "add");
delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x";
restorePath(tempPath, *source);
} catch (EndOfFile &) {
if (!inMemory) throw;
/* The NAR fits in memory, so we didn't do restorePath(). */
}
auto sha256 = sha256Sink->finish();
Hash hash = hashSink ? hashSink->finish().first : sha256.first;
Path dstPath = makeFixedOutputPath(method, hash, name);
addTempRoot(dstPath);
if (repair || !isValidPath(dstPath)) {
/* The first check above is an optimisation to prevent
unnecessary lock acquisition. */
Path realPath = realStoreDir + "/" + baseNameOf(dstPath);
PathLocks outputLock({realPath});
if (repair || !isValidPath(dstPath)) {
deletePath(realPath);
autoGC();
if (inMemory) {
/* Restore from the NAR in memory. */
StringSource source(nar);
restorePath(realPath, source);
} else {
/* Move the temporary path we restored above. */
if (rename(tempPath.c_str(), realPath.c_str()))
throw Error("renaming '%s' to '%s'", tempPath, realPath);
}
canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath
optimisePath(realPath);
ValidPathInfo info(dstPath);
info.narHash = sha256.first;
info.narSize = sha256.second;
info.ca = FixedOutputHash { .method = method, .hash = hash };
registerValidPath(info);
}
outputLock.setDeletion(true);
}
return dstPath;
} }