From 34f25124ba2ab32b8a95d6b37cd68d7bb85ff2d4 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 9 Apr 2018 23:46:20 +0200 Subject: [PATCH] Make LocalStore::addToStore(srcPath) run in constant memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reduces memory consumption of nix-instantiate \ -E 'with import {}; runCommand "foo" { src = ./blender; } "echo foo"' \ --option nar-buffer-size 10000 (where ./blender is a 1.1 GiB tree) from 1716 to 36 MiB, while still ensuring that we don't do any write I/O for small source paths (up to 'nar-buffer-size' bytes). The downside is that large paths are now always written to a temporary location in the store, even if they produce an already valid store path. Thus, adding large paths might be slower and run out of disk space. ¯\_(ツ)_/¯ Of course, you can always restore the old behaviour by setting 'nar-buffer-size' to a very high value. --- src/libstore/globals.hh | 3 + src/libstore/local-store.cc | 121 +++++++++++++++++++++++++++++++++--- 2 files changed, 115 insertions(+), 9 deletions(-) diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 58cf08763..4d5eec7bf 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -365,6 +365,9 @@ public: Setting warnDirty{this, true, "warn-dirty", "Whether to warn about dirty Git/Mercurial trees."}; + + Setting narBufferSize{this, 32 * 1024 * 1024, "nar-buffer-size", + "Maximum size of NARs before spilling them to disk."}; }; diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index eed225349..b9176ec38 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1098,16 +1098,119 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, { Path srcPath(absPath(_srcPath)); - /* Read the whole path into memory. This is not a very scalable - method for very large paths, but `copyPath' is mainly used for - small files. */ - StringSink sink; - if (method == FileIngestionMethod::Recursive) - dumpPath(srcPath, sink, filter); - else - sink.s = make_ref(readFile(srcPath)); + if (method != FileIngestionMethod::Recursive) + return addToStoreFromDump(readFile(srcPath), name, method, hashAlgo, repair); - return addToStoreFromDump(*sink.s, name, method, hashAlgo, repair); + /* For computing the NAR hash. */ + auto sha256Sink = std::make_unique(htSHA256); + + /* For computing the store path. In recursive SHA-256 mode, this + is the same as the NAR hash, so no need to do it again. */ + std::unique_ptr hashSink = + hashAlgo == htSHA256 + ? nullptr + : std::make_unique(hashAlgo); + + /* Read the source path into memory, but only if it's up to + narBufferSize bytes. If it's larger, write it to a temporary + location in the Nix store. If the subsequently computed + destination store path is already valid, we just delete the + temporary path. Otherwise, we move it to the destination store + path. */ + bool inMemory = true; + std::string nar; + + auto source = sinkToSource([&](Sink & sink) { + + LambdaSink sink2([&](const unsigned char * buf, size_t len) { + (*sha256Sink)(buf, len); + if (hashSink) (*hashSink)(buf, len); + + if (inMemory) { + if (nar.size() + len > settings.narBufferSize) { + inMemory = false; + sink << 1; + sink((const unsigned char *) nar.data(), nar.size()); + nar.clear(); + } else { + nar.append((const char *) buf, len); + } + } + + if (!inMemory) sink(buf, len); + }); + + dumpPath(srcPath, sink2, filter); + }); + + std::unique_ptr delTempDir; + Path tempPath; + + try { + /* Wait for the source coroutine to give us some dummy + data. This is so that we don't create the temporary + directory if the NAR fits in memory. */ + readInt(*source); + + auto tempDir = createTempDir(realStoreDir, "add"); + delTempDir = std::make_unique(tempDir); + tempPath = tempDir + "/x"; + + restorePath(tempPath, *source); + + } catch (EndOfFile &) { + if (!inMemory) throw; + /* The NAR fits in memory, so we didn't do restorePath(). */ + } + + auto sha256 = sha256Sink->finish(); + + Hash hash = hashSink ? hashSink->finish().first : sha256.first; + + Path dstPath = makeFixedOutputPath(method, hash, name); + + addTempRoot(dstPath); + + if (repair || !isValidPath(dstPath)) { + + /* The first check above is an optimisation to prevent + unnecessary lock acquisition. */ + + Path realPath = realStoreDir + "/" + baseNameOf(dstPath); + + PathLocks outputLock({realPath}); + + if (repair || !isValidPath(dstPath)) { + + deletePath(realPath); + + autoGC(); + + if (inMemory) { + /* Restore from the NAR in memory. */ + StringSource source(nar); + restorePath(realPath, source); + } else { + /* Move the temporary path we restored above. */ + if (rename(tempPath.c_str(), realPath.c_str())) + throw Error("renaming '%s' to '%s'", tempPath, realPath); + } + + canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath + + optimisePath(realPath); + + ValidPathInfo info(dstPath); + info.narHash = sha256.first; + info.narSize = sha256.second; + info.ca = FixedOutputHash { .method = method, .hash = hash }; + registerValidPath(info); + } + + outputLock.setDeletion(true); + } + + return dstPath; }