From 9ec10046e04a509cc982102f587cf06840f02327 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Sat, 11 Jul 2020 16:06:24 +0000
Subject: [PATCH 01/10] Narrow scope of temporary value

---
 src/libstore/daemon.cc | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc
index db7139374..6e0b290ed 100644
--- a/src/libstore/daemon.cc
+++ b/src/libstore/daemon.cc
@@ -375,21 +375,24 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
     }
 
     case wopAddToStore: {
-        std::string s, baseName;
+        HashType hashAlgo;
+        std::string baseName;
         FileIngestionMethod method;
         {
-            bool fixed; uint8_t recursive;
-            from >> baseName >> fixed /* obsolete */ >> recursive >> s;
+            bool fixed;
+            uint8_t recursive;
+            std::string hashAlgoRaw;
+            from >> baseName >> fixed /* obsolete */ >> recursive >> hashAlgoRaw;
             if (recursive > (uint8_t) FileIngestionMethod::Recursive)
                 throw Error("unsupported FileIngestionMethod with value of %i; you may need to upgrade nix-daemon", recursive);
             method = FileIngestionMethod { recursive };
             /* Compatibility hack. */
             if (!fixed) {
-                s = "sha256";
+                hashAlgoRaw = "sha256";
                 method = FileIngestionMethod::Recursive;
             }
+            hashAlgo = parseHashType(hashAlgoRaw);
         }
-        HashType hashAlgo = parseHashType(s);
 
         StringSink savedNAR;
         TeeSource savedNARSource(from, savedNAR);

From c86fc3a9657096b74fe967f2f0bbd120e46908f6 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Sat, 11 Jul 2020 15:55:04 +0000
Subject: [PATCH 02/10] Crudely make `addToStoreFromDump` take `Source` not
 string

I just as little beyond the type as possible, so the implementation
changes this enables can be reviewed separately.
---
 src/libstore/build.cc       | 2 +-
 src/libstore/daemon.cc      | 5 ++++-
 src/libstore/local-store.cc | 5 ++++-
 src/libstore/local-store.hh | 2 +-
 src/libstore/store-api.hh   | 2 +-
 5 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/libstore/build.cc b/src/libstore/build.cc
index ac2e67574..62294a08c 100644
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@@ -2774,7 +2774,7 @@ struct RestrictedStore : public LocalFSStore
         goal.addDependency(info.path);
     }
 
-    StorePath addToStoreFromDump(const string & dump, const string & name,
+    StorePath addToStoreFromDump(Source & dump, const string & name,
         FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override
     {
         auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, repair);
diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc
index 6e0b290ed..69d7ef511 100644
--- a/src/libstore/daemon.cc
+++ b/src/libstore/daemon.cc
@@ -410,8 +410,11 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
         logger->startWork();
         if (!savedRegular.regular) throw Error("regular file expected");
 
+        StringSource dumpSource {
+            method == FileIngestionMethod::Recursive ? *savedNAR.s : savedRegular.s
+        };
         auto path = store->addToStoreFromDump(
-            method == FileIngestionMethod::Recursive ? *savedNAR.s : savedRegular.s,
+            dumpSource,
             baseName,
             method,
             hashAlgo);
diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index 26b226fe8..603f36352 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -1033,9 +1033,12 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
 }
 
 
-StorePath LocalStore::addToStoreFromDump(const string & dump, const string & name,
+StorePath LocalStore::addToStoreFromDump(Source & dumpSource, const string & name,
     FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
 {
+    // FIXME: See if we can use the original source to reduce memory usage.
+    auto dump = dumpSource.drain();
+
     Hash h = hashString(hashAlgo, dump);
 
     auto dstPath = makeFixedOutputPath(method, h, name);
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh
index c0e5d0286..355c2814f 100644
--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@@ -153,7 +153,7 @@ public:
        in `dump', which is either a NAR serialisation (if recursive ==
        true) or simply the contents of a regular file (if recursive ==
        false). */
-    StorePath addToStoreFromDump(const string & dump, const string & name,
+    StorePath addToStoreFromDump(Source & dump, const string & name,
         FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override;
 
     StorePath addTextToStore(const string & name, const string & s,
diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh
index a4be0411e..d1cb2035f 100644
--- a/src/libstore/store-api.hh
+++ b/src/libstore/store-api.hh
@@ -460,7 +460,7 @@ public:
         std::optional<Hash> expectedCAHash = {});
 
     // FIXME: remove?
-    virtual StorePath addToStoreFromDump(const string & dump, const string & name,
+    virtual StorePath addToStoreFromDump(Source & dump, const string & name,
         FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair)
     {
         throw Error("addToStoreFromDump() is not supported by this store");

From 9de96ef7d409fedea092045c4dbae7177f88962a Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Sat, 11 Jul 2020 19:03:39 +0000
Subject: [PATCH 03/10] Dedup `LocalStore::addToStore*`

The downsides is that the coroutine has byte-by-byte loop transfer. Will
fix that next.
---
 src/libstore/local-store.cc | 83 ++++++++++---------------------------
 src/libstore/local-store.hh |  4 ++
 2 files changed, 25 insertions(+), 62 deletions(-)

diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index 603f36352..925ac25bf 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -1033,65 +1033,16 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
 }
 
 
-StorePath LocalStore::addToStoreFromDump(Source & dumpSource, const string & name,
+StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name,
     FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
 {
-    // FIXME: See if we can use the original source to reduce memory usage.
-    auto dump = dumpSource.drain();
-
-    Hash h = hashString(hashAlgo, dump);
-
-    auto dstPath = makeFixedOutputPath(method, h, name);
-
-    addTempRoot(dstPath);
-
-    if (repair || !isValidPath(dstPath)) {
-
-        /* The first check above is an optimisation to prevent
-           unnecessary lock acquisition. */
-
-        auto realPath = Store::toRealPath(dstPath);
-
-        PathLocks outputLock({realPath});
-
-        if (repair || !isValidPath(dstPath)) {
-
-            deletePath(realPath);
-
-            autoGC();
-
-            if (method == FileIngestionMethod::Recursive) {
-                StringSource source(dump);
-                restorePath(realPath, source);
-            } else
-                writeFile(realPath, dump);
-
-            canonicalisePathMetaData(realPath, -1);
-
-            /* Register the SHA-256 hash of the NAR serialisation of
-               the path in the database.  We may just have computed it
-               above (if called with recursive == true and hashAlgo ==
-               sha256); otherwise, compute it here. */
-            HashResult hash;
-            if (method == FileIngestionMethod::Recursive) {
-                hash.first = hashAlgo == htSHA256 ? h : hashString(htSHA256, dump);
-                hash.second = dump.size();
-            } else
-                hash = hashPath(htSHA256, realPath);
-
-            optimisePath(realPath); // FIXME: combine with hashPath()
-
-            ValidPathInfo info(dstPath);
-            info.narHash = hash.first;
-            info.narSize = hash.second;
-            info.ca = FixedOutputHash { .method = method, .hash = h };
-            registerValidPath(info);
+    return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) {
+        while (1) {
+            uint8_t buf[1];
+            auto n = dump.read(buf, 1);
+            sink(buf, n);
         }
-
-        outputLock.setDeletion(true);
-    }
-
-    return dstPath;
+    });
 }
 
 
@@ -1100,6 +1051,19 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
 {
     Path srcPath(absPath(_srcPath));
 
+    return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) {
+        if (method == FileIngestionMethod::Recursive)
+            dumpPath(srcPath, sink, filter);
+        else
+            readFile(srcPath, sink);
+    });
+}
+
+
+StorePath LocalStore::addToStoreCommon(
+    const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair,
+    std::function<void(Sink &)> demux)
+{
     /* For computing the NAR hash. */
     auto sha256Sink = std::make_unique<HashSink>(htSHA256);
 
@@ -1120,7 +1084,6 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
     std::string nar;
 
     auto source = sinkToSource([&](Sink & sink) {
-
         LambdaSink sink2([&](const unsigned char * buf, size_t len) {
             (*sha256Sink)(buf, len);
             if (hashSink) (*hashSink)(buf, len);
@@ -1138,11 +1101,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
 
             if (!inMemory) sink(buf, len);
         });
-
-        if (method == FileIngestionMethod::Recursive)
-            dumpPath(srcPath, sink2, filter);
-        else
-            readFile(srcPath, sink2);
+        demux(sink2);
     });
 
     std::unique_ptr<AutoDelete> delTempDir;
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh
index 355c2814f..215731f87 100644
--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@@ -290,6 +290,10 @@ private:
        specified by the ‘secret-key-files’ option. */
     void signPathInfo(ValidPathInfo & info);
 
+    StorePath addToStoreCommon(
+        const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair,
+        std::function<void(Sink &)> demux);
+
     Path getRealStoreDir() override { return realStoreDir; }
 
     void createUser(const std::string & userName, uid_t userId) override;

From 592851fb67cd15807109d6f65fb81f6af89af966 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Sat, 11 Jul 2020 23:40:49 +0000
Subject: [PATCH 04/10] LocalStore::addToStoreFromDump copy in chunks

Rather than copying byte-by-byte, we let the coroutine know how much
data we would like it to send back to us.
---
 src/libstore/local-store.cc | 16 +++++++++-------
 src/libstore/local-store.hh |  2 +-
 src/libutil/serialise.cc    | 33 ++++++++++++++++++++-------------
 src/libutil/serialise.hh    | 11 ++++++++++-
 4 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index 925ac25bf..dac7a50c4 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -1036,11 +1036,13 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
 StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name,
     FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
 {
-    return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) {
+    return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & wanted) {
         while (1) {
-            uint8_t buf[1];
-            auto n = dump.read(buf, 1);
+            constexpr size_t bufSize = 1024;
+            uint8_t buf[bufSize];
+            auto n = dump.read(buf, std::min(wanted, bufSize));
             sink(buf, n);
+            // when control is yielded back to us wanted will be updated.
         }
     });
 }
@@ -1051,7 +1053,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
 {
     Path srcPath(absPath(_srcPath));
 
-    return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) {
+    return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & _) {
         if (method == FileIngestionMethod::Recursive)
             dumpPath(srcPath, sink, filter);
         else
@@ -1062,7 +1064,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
 
 StorePath LocalStore::addToStoreCommon(
     const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair,
-    std::function<void(Sink &)> demux)
+    std::function<void(Sink &, size_t &)> demux)
 {
     /* For computing the NAR hash. */
     auto sha256Sink = std::make_unique<HashSink>(htSHA256);
@@ -1083,7 +1085,7 @@ StorePath LocalStore::addToStoreCommon(
     bool inMemory = true;
     std::string nar;
 
-    auto source = sinkToSource([&](Sink & sink) {
+    auto source = sinkToSource([&](Sink & sink, size_t & wanted) {
         LambdaSink sink2([&](const unsigned char * buf, size_t len) {
             (*sha256Sink)(buf, len);
             if (hashSink) (*hashSink)(buf, len);
@@ -1101,7 +1103,7 @@ StorePath LocalStore::addToStoreCommon(
 
             if (!inMemory) sink(buf, len);
         });
-        demux(sink2);
+        demux(sink2, wanted);
     });
 
     std::unique_ptr<AutoDelete> delTempDir;
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh
index 215731f87..ae23004c4 100644
--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@@ -292,7 +292,7 @@ private:
 
     StorePath addToStoreCommon(
         const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair,
-        std::function<void(Sink &)> demux);
+        std::function<void(Sink &, size_t &)> demux);
 
     Path getRealStoreDir() override { return realStoreDir; }
 
diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc
index c8b71188f..141e9e976 100644
--- a/src/libutil/serialise.cc
+++ b/src/libutil/serialise.cc
@@ -165,35 +165,43 @@ size_t StringSource::read(unsigned char * data, size_t len)
 #endif
 
 std::unique_ptr<Source> sinkToSource(
-    std::function<void(Sink &)> fun,
+    std::function<void(Sink &, size_t &)> fun,
     std::function<void()> eof)
 {
     struct SinkToSource : Source
     {
-        typedef boost::coroutines2::coroutine<std::string> coro_t;
+        typedef boost::coroutines2::coroutine<std::basic_string<uint8_t>> coro_t;
 
-        std::function<void(Sink &)> fun;
+        std::function<void(Sink &, size_t &)> fun;
         std::function<void()> eof;
         std::optional<coro_t::pull_type> coro;
         bool started = false;
 
-        SinkToSource(std::function<void(Sink &)> fun, std::function<void()> eof)
+        /* It would be nicer to have the co-routines have both args and a
+           return value, but unfortunately that was removed from Boost's
+           implementation for some reason, so we use some extra state instead.
+           */
+       size_t wanted = 0;
+
+        SinkToSource(std::function<void(Sink &, size_t &)> fun, std::function<void()> eof)
             : fun(fun), eof(eof)
         {
         }
 
-        std::string cur;
+        std::basic_string<uint8_t> cur;
         size_t pos = 0;
 
         size_t read(unsigned char * data, size_t len) override
         {
-            if (!coro)
+            wanted = len < cur.size() ? 0 : len - cur.size();
+            if (!coro) {
                 coro = coro_t::pull_type([&](coro_t::push_type & yield) {
-                    LambdaSink sink([&](const unsigned char * data, size_t len) {
-                            if (len) yield(std::string((const char *) data, len));
+                    LambdaSink sink([&](const uint8_t * data, size_t len) {
+                            if (len) yield(std::basic_string<uint8_t> { data, len });
                         });
-                    fun(sink);
+                    fun(sink, wanted);
                 });
+            }
 
             if (!*coro) { eof(); abort(); }
 
@@ -203,11 +211,10 @@ std::unique_ptr<Source> sinkToSource(
                 pos = 0;
             }
 
-            auto n = std::min(cur.size() - pos, len);
-            memcpy(data, (unsigned char *) cur.data() + pos, n);
-            pos += n;
+            auto numCopied = cur.copy(data, len, pos);
+            pos += numCopied;
 
-            return n;
+            return numCopied;
         }
     };
 
diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh
index 8386a4991..6cb9d1bf5 100644
--- a/src/libutil/serialise.hh
+++ b/src/libutil/serialise.hh
@@ -260,11 +260,20 @@ struct LambdaSource : Source
 /* Convert a function that feeds data into a Sink into a Source. The
    Source executes the function as a coroutine. */
 std::unique_ptr<Source> sinkToSource(
-    std::function<void(Sink &)> fun,
+    std::function<void(Sink &, size_t &)> fun,
     std::function<void()> eof = []() {
         throw EndOfFile("coroutine has finished");
     });
 
+static inline std::unique_ptr<Source> sinkToSource(
+    std::function<void(Sink &)> fun,
+    std::function<void()> eof = []() {
+        throw EndOfFile("coroutine has finished");
+    })
+{
+	return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof);
+}
+
 
 void writePadding(size_t len, Sink & sink);
 void writeString(const unsigned char * buf, size_t len, Sink & sink);

From 8173e7bfefc6a5771b2c9ec48bd6edd3b161dd90 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Tue, 14 Jul 2020 21:11:08 +0000
Subject: [PATCH 05/10] Fix localhost::addToStore(...Path...)

We were calculating the nar hash wrong when the file ingestion method
was flat. I don't think there's anything we can do in that case but dump
the file again, so that's what I do.

As an optomization, we again could reuse the original dump for just the
recursive and non-sha256 case, but I rather do that after this fix, and
after my other PRs which deduplicate this code.
---
 src/libstore/local-store.cc | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index 26b226fe8..5827dfc58 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -1097,15 +1097,8 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
 {
     Path srcPath(absPath(_srcPath));
 
-    /* For computing the NAR hash. */
-    auto sha256Sink = std::make_unique<HashSink>(htSHA256);
-
-    /* For computing the store path. In recursive SHA-256 mode, this
-       is the same as the NAR hash, so no need to do it again. */
-    std::unique_ptr<HashSink> hashSink =
-        method == FileIngestionMethod::Recursive && hashAlgo == htSHA256
-        ? nullptr
-        : std::make_unique<HashSink>(hashAlgo);
+    /* For computing the store path. */
+    auto hashSink = std::make_unique<HashSink>(hashAlgo);
 
     /* Read the source path into memory, but only if it's up to
        narBufferSize bytes. If it's larger, write it to a temporary
@@ -1114,13 +1107,12 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
        temporary path. Otherwise, we move it to the destination store
        path. */
     bool inMemory = true;
-    std::string nar;
+    std::string nar; // TODO rename from "nar" to "dump"
 
     auto source = sinkToSource([&](Sink & sink) {
 
         LambdaSink sink2([&](const unsigned char * buf, size_t len) {
-            (*sha256Sink)(buf, len);
-            if (hashSink) (*hashSink)(buf, len);
+            (*hashSink)(buf, len);
 
             if (inMemory) {
                 if (nar.size() + len > settings.narBufferSize) {
@@ -1165,9 +1157,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
         /* The NAR fits in memory, so we didn't do restorePath(). */
     }
 
-    auto sha256 = sha256Sink->finish();
-
-    Hash hash = hashSink ? hashSink->finish().first : sha256.first;
+    auto [hash, size] = hashSink->finish();
 
     auto dstPath = makeFixedOutputPath(method, hash, name);
 
@@ -1201,13 +1191,22 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
                     throw Error("renaming '%s' to '%s'", tempPath, realPath);
             }
 
+            /* For computing the nar hash. In recursive SHA-256 mode, this
+               is the same as the store hash, so no need to do it again. */
+            auto narHash = std::pair { hash, size };
+            if (method != FileIngestionMethod::Recursive || hashAlgo != htSHA256) {
+                HashSink narSink { htSHA256 };
+                dumpPath(realPath, narSink);
+                narHash = narSink.finish();
+            }
+
             canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath
 
             optimisePath(realPath);
 
             ValidPathInfo info(dstPath);
-            info.narHash = sha256.first;
-            info.narSize = sha256.second;
+            info.narHash = narHash.first;
+            info.narSize = narHash.second;
             info.ca = FixedOutputHash { .method = method, .hash = hash };
             registerValidPath(info);
         }

From 650c2c655810c375296b52997e2f85298c7c566a Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Tue, 14 Jul 2020 21:28:50 +0000
Subject: [PATCH 06/10] Rename variable `nar` -> `dump` according to TODO

---
 src/libstore/local-store.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index 5827dfc58..cd92f138c 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -1107,7 +1107,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
        temporary path. Otherwise, we move it to the destination store
        path. */
     bool inMemory = true;
-    std::string nar; // TODO rename from "nar" to "dump"
+    std::string dump;
 
     auto source = sinkToSource([&](Sink & sink) {
 
@@ -1115,13 +1115,13 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
             (*hashSink)(buf, len);
 
             if (inMemory) {
-                if (nar.size() + len > settings.narBufferSize) {
+                if (dump.size() + len > settings.narBufferSize) {
                     inMemory = false;
                     sink << 1;
-                    sink((const unsigned char *) nar.data(), nar.size());
-                    nar.clear();
+                    sink((const unsigned char *) dump.data(), dump.size());
+                    dump.clear();
                 } else {
-                    nar.append((const char *) buf, len);
+                    dump.append((const char *) buf, len);
                 }
             }
 
@@ -1180,7 +1180,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
 
             if (inMemory) {
                 /* Restore from the NAR in memory. */
-                StringSource source(nar);
+                StringSource source(dump);
                 if (method == FileIngestionMethod::Recursive)
                     restorePath(realPath, source);
                 else

From d087cf48552ee82e2bc78bb6c99854bab350ee00 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Wed, 15 Jul 2020 21:10:33 +0000
Subject: [PATCH 07/10] Revert "Revert "LocalStore::addToStore(srcPath): Handle
 the flat case""

This reverts commit cff2157185912025c24a1b9dc99056161634176c.
---
 src/libstore/local-store.cc | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index b3f4b3f7d..26b226fe8 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -1097,16 +1097,13 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
 {
     Path srcPath(absPath(_srcPath));
 
-    if (method != FileIngestionMethod::Recursive)
-        return addToStoreFromDump(readFile(srcPath), name, method, hashAlgo, repair);
-
     /* For computing the NAR hash. */
     auto sha256Sink = std::make_unique<HashSink>(htSHA256);
 
     /* For computing the store path. In recursive SHA-256 mode, this
        is the same as the NAR hash, so no need to do it again. */
     std::unique_ptr<HashSink> hashSink =
-        hashAlgo == htSHA256
+        method == FileIngestionMethod::Recursive && hashAlgo == htSHA256
         ? nullptr
         : std::make_unique<HashSink>(hashAlgo);
 
@@ -1139,7 +1136,10 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
             if (!inMemory) sink(buf, len);
         });
 
-        dumpPath(srcPath, sink2, filter);
+        if (method == FileIngestionMethod::Recursive)
+            dumpPath(srcPath, sink2, filter);
+        else
+            readFile(srcPath, sink2);
     });
 
     std::unique_ptr<AutoDelete> delTempDir;
@@ -1155,7 +1155,10 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
         delTempDir = std::make_unique<AutoDelete>(tempDir);
         tempPath = tempDir + "/x";
 
-        restorePath(tempPath, *source);
+        if (method == FileIngestionMethod::Recursive)
+            restorePath(tempPath, *source);
+        else
+            writeFile(tempPath, *source);
 
     } catch (EndOfFile &) {
         if (!inMemory) throw;
@@ -1188,7 +1191,10 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
             if (inMemory) {
                 /* Restore from the NAR in memory. */
                 StringSource source(nar);
-                restorePath(realPath, source);
+                if (method == FileIngestionMethod::Recursive)
+                    restorePath(realPath, source);
+                else
+                    writeFile(realPath, source);
             } else {
                 /* Move the temporary path we restored above. */
                 if (rename(tempPath.c_str(), realPath.c_str()))

From bc109648c41f8021707b55b815e68a890a09f2f6 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Wed, 15 Jul 2020 23:14:30 +0000
Subject: [PATCH 08/10] Get rid of `LocalStore::addToStoreCommon`

I got it to just become `LocalStore::addToStoreFromDump`, cleanly taking
a store and then doing nothing too fancy with it.

`LocalStore::addToStore(...Path...)` is now just a simple wrapper with a
bare-bones sinkToSource of the right dump command.
---
 src/libstore/local-store.cc | 93 ++++++++++++++++---------------------
 src/libstore/local-store.hh |  4 --
 src/libutil/serialise.cc    | 13 ++++++
 src/libutil/serialise.hh    | 15 +++++-
 4 files changed, 67 insertions(+), 58 deletions(-)

diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index b9fae6089..07e1679da 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -1033,38 +1033,22 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
 }
 
 
-StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name,
-    FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
-{
-    return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & wanted) {
-        while (1) {
-            constexpr size_t bufSize = 1024;
-            uint8_t buf[bufSize];
-            auto n = dump.read(buf, std::min(wanted, bufSize));
-            sink(buf, n);
-            // when control is yielded back to us wanted will be updated.
-        }
-    });
-}
-
-
 StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
     FileIngestionMethod method, HashType hashAlgo, PathFilter & filter, RepairFlag repair)
 {
     Path srcPath(absPath(_srcPath));
-
-    return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & _) {
+    auto source = sinkToSource([&](Sink & sink, size_t & wanted) {
         if (method == FileIngestionMethod::Recursive)
             dumpPath(srcPath, sink, filter);
         else
             readFile(srcPath, sink);
     });
+    return addToStoreFromDump(*source, name, method, hashAlgo, repair);
 }
 
 
-StorePath LocalStore::addToStoreCommon(
-    const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair,
-    std::function<void(Sink &, size_t &)> demux)
+StorePath LocalStore::addToStoreFromDump(Source & source, const string & name,
+    FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
 {
     /* For computing the store path. */
     auto hashSink = std::make_unique<HashSink>(hashAlgo);
@@ -1075,50 +1059,53 @@ StorePath LocalStore::addToStoreCommon(
        destination store path is already valid, we just delete the
        temporary path. Otherwise, we move it to the destination store
        path. */
-    bool inMemory = true;
+    bool inMemory = false;
+
     std::string dump;
 
-    auto source = sinkToSource([&](Sink & sink, size_t & wanted) {
-        LambdaSink sink2([&](const unsigned char * buf, size_t len) {
-            (*hashSink)(buf, len);
-
-            if (inMemory) {
-                if (dump.size() + len > settings.narBufferSize) {
-                    inMemory = false;
-                    sink << 1;
-                    sink((const unsigned char *) dump.data(), dump.size());
-                    dump.clear();
-                } else {
-                    dump.append((const char *) buf, len);
-                }
-            }
-
-            if (!inMemory) sink(buf, len);
-        });
-        demux(sink2, wanted);
-    });
+    /* Fill out buffer, and decide whether we are working strictly in
+       memory based on whether we break out because the buffer is full
+       or the original source is empty */
+    while (dump.size() < settings.narBufferSize) {
+        auto oldSize = dump.size();
+        constexpr size_t chunkSize = 1024;
+        auto want = std::min(chunkSize, settings.narBufferSize - oldSize);
+        dump.resize(oldSize + want);
+        auto got = 0;
+        try {
+            got = source.read((uint8_t *) dump.data() + oldSize, want);
+        } catch (EndOfFile &) {
+            inMemory = true;
+            break;
+        }
+        /* Start hashing as we get data */
+        (*hashSink)((const uint8_t *) dump.data() + oldSize, got);
+        dump.resize(oldSize + got);
+    }
 
     std::unique_ptr<AutoDelete> delTempDir;
     Path tempPath;
 
-    try {
-        /* Wait for the source coroutine to give us some dummy
-           data. This is so that we don't create the temporary
-           directory if the NAR fits in memory. */
-        readInt(*source);
+    if (!inMemory) {
+        StringSource dumpSource { dump };
+        TeeSource rest { source, *hashSink };
+        ChainSource bothSource {
+            .source1 = dumpSource,
+            /* Continue hashing what's left, but don't rehash what we
+               already did. */
+            .source2 = rest,
+        };
 
         auto tempDir = createTempDir(realStoreDir, "add");
         delTempDir = std::make_unique<AutoDelete>(tempDir);
         tempPath = tempDir + "/x";
 
         if (method == FileIngestionMethod::Recursive)
-            restorePath(tempPath, *source);
+            restorePath(tempPath, bothSource);
         else
-            writeFile(tempPath, *source);
+            writeFile(tempPath, bothSource);
 
-    } catch (EndOfFile &) {
-        if (!inMemory) throw;
-        /* The NAR fits in memory, so we didn't do restorePath(). */
+        dump.clear();
     }
 
     auto [hash, size] = hashSink->finish();
@@ -1143,12 +1130,12 @@ StorePath LocalStore::addToStoreCommon(
             autoGC();
 
             if (inMemory) {
+                 StringSource dumpSource { dump };
                 /* Restore from the NAR in memory. */
-                StringSource source(dump);
                 if (method == FileIngestionMethod::Recursive)
-                    restorePath(realPath, source);
+                    restorePath(realPath, dumpSource);
                 else
-                    writeFile(realPath, source);
+                    writeFile(realPath, dumpSource);
             } else {
                 /* Move the temporary path we restored above. */
                 if (rename(tempPath.c_str(), realPath.c_str()))
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh
index ae23004c4..355c2814f 100644
--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@@ -290,10 +290,6 @@ private:
        specified by the ‘secret-key-files’ option. */
     void signPathInfo(ValidPathInfo & info);
 
-    StorePath addToStoreCommon(
-        const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair,
-        std::function<void(Sink &, size_t &)> demux);
-
     Path getRealStoreDir() override { return realStoreDir; }
 
     void createUser(const std::string & userName, uid_t userId) override;
diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc
index 141e9e976..4c72dc9f2 100644
--- a/src/libutil/serialise.cc
+++ b/src/libutil/serialise.cc
@@ -329,5 +329,18 @@ void StringSink::operator () (const unsigned char * data, size_t len)
     s->append((const char *) data, len);
 }
 
+size_t ChainSource::read(unsigned char * data, size_t len)
+{
+    if (useSecond) {
+        return source2.read(data, len);
+    } else {
+        try {
+            return source1.read(data, len);
+        } catch (EndOfFile &) {
+            useSecond = true;
+            return this->read(data, len);
+        }
+    }
+}
 
 }
diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh
index 6cb9d1bf5..3e3735ca5 100644
--- a/src/libutil/serialise.hh
+++ b/src/libutil/serialise.hh
@@ -256,6 +256,19 @@ struct LambdaSource : Source
     }
 };
 
+/* Chain two sources together so after the first is exhausted, the second is
+   used */
+struct ChainSource : Source
+{
+    Source & source1, & source2;
+    bool useSecond = false;
+    ChainSource(Source & s1, Source & s2)
+        : source1(s1), source2(s2)
+    { }
+
+    size_t read(unsigned char * data, size_t len) override;
+};
+
 
 /* Convert a function that feeds data into a Sink into a Source. The
    Source executes the function as a coroutine. */
@@ -271,7 +284,7 @@ static inline std::unique_ptr<Source> sinkToSource(
         throw EndOfFile("coroutine has finished");
     })
 {
-	return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof);
+    return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof);
 }
 
 

From 5602637d9ea195784368e99a226718fc95e6b978 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Wed, 15 Jul 2020 23:19:41 +0000
Subject: [PATCH 09/10] Revert "LocalStore::addToStoreFromDump copy in chunks"

This reverts commit 592851fb67cd15807109d6f65fb81f6af89af966. We don't
need this extra feature anymore
---
 src/libstore/local-store.cc |  2 +-
 src/libutil/serialise.cc    | 33 +++++++++++++--------------------
 src/libutil/serialise.hh    | 11 +----------
 3 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index 07e1679da..b2b5afadd 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -1037,7 +1037,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
     FileIngestionMethod method, HashType hashAlgo, PathFilter & filter, RepairFlag repair)
 {
     Path srcPath(absPath(_srcPath));
-    auto source = sinkToSource([&](Sink & sink, size_t & wanted) {
+    auto source = sinkToSource([&](Sink & sink) {
         if (method == FileIngestionMethod::Recursive)
             dumpPath(srcPath, sink, filter);
         else
diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc
index 4c72dc9f2..00c945113 100644
--- a/src/libutil/serialise.cc
+++ b/src/libutil/serialise.cc
@@ -165,43 +165,35 @@ size_t StringSource::read(unsigned char * data, size_t len)
 #endif
 
 std::unique_ptr<Source> sinkToSource(
-    std::function<void(Sink &, size_t &)> fun,
+    std::function<void(Sink &)> fun,
     std::function<void()> eof)
 {
     struct SinkToSource : Source
     {
-        typedef boost::coroutines2::coroutine<std::basic_string<uint8_t>> coro_t;
+        typedef boost::coroutines2::coroutine<std::string> coro_t;
 
-        std::function<void(Sink &, size_t &)> fun;
+        std::function<void(Sink &)> fun;
         std::function<void()> eof;
         std::optional<coro_t::pull_type> coro;
         bool started = false;
 
-        /* It would be nicer to have the co-routines have both args and a
-           return value, but unfortunately that was removed from Boost's
-           implementation for some reason, so we use some extra state instead.
-           */
-       size_t wanted = 0;
-
-        SinkToSource(std::function<void(Sink &, size_t &)> fun, std::function<void()> eof)
+        SinkToSource(std::function<void(Sink &)> fun, std::function<void()> eof)
             : fun(fun), eof(eof)
         {
         }
 
-        std::basic_string<uint8_t> cur;
+        std::string cur;
         size_t pos = 0;
 
         size_t read(unsigned char * data, size_t len) override
         {
-            wanted = len < cur.size() ? 0 : len - cur.size();
-            if (!coro) {
+            if (!coro)
                 coro = coro_t::pull_type([&](coro_t::push_type & yield) {
-                    LambdaSink sink([&](const uint8_t * data, size_t len) {
-                            if (len) yield(std::basic_string<uint8_t> { data, len });
+                    LambdaSink sink([&](const unsigned char * data, size_t len) {
+                            if (len) yield(std::string((const char *) data, len));
                         });
-                    fun(sink, wanted);
+                    fun(sink);
                 });
-            }
 
             if (!*coro) { eof(); abort(); }
 
@@ -211,10 +203,11 @@ std::unique_ptr<Source> sinkToSource(
                 pos = 0;
             }
 
-            auto numCopied = cur.copy(data, len, pos);
-            pos += numCopied;
+            auto n = std::min(cur.size() - pos, len);
+            memcpy(data, (unsigned char *) cur.data() + pos, n);
+            pos += n;
 
-            return numCopied;
+            return n;
         }
     };
 
diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh
index 3e3735ca5..aa6b42597 100644
--- a/src/libutil/serialise.hh
+++ b/src/libutil/serialise.hh
@@ -273,19 +273,10 @@ struct ChainSource : Source
 /* Convert a function that feeds data into a Sink into a Source. The
    Source executes the function as a coroutine. */
 std::unique_ptr<Source> sinkToSource(
-    std::function<void(Sink &, size_t &)> fun,
-    std::function<void()> eof = []() {
-        throw EndOfFile("coroutine has finished");
-    });
-
-static inline std::unique_ptr<Source> sinkToSource(
     std::function<void(Sink &)> fun,
     std::function<void()> eof = []() {
         throw EndOfFile("coroutine has finished");
-    })
-{
-    return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof);
-}
+    });
 
 
 void writePadding(size_t len, Sink & sink);

From 3dcca18c30cbc09652f5ac644a9f8750f9ced0c9 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Thu, 16 Jul 2020 13:39:03 +0000
Subject: [PATCH 10/10] Fix bug in TeeSource

We use this to simplify `LocalStore::addToStoreFromDump`.

Also, hope I fixed build error with old clang (used in Darwin CI).
---
 src/libstore/local-store.cc | 14 ++++----------
 src/libutil/serialise.hh    |  2 +-
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index b2b5afadd..96d10d9ba 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -1047,11 +1047,12 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
 }
 
 
-StorePath LocalStore::addToStoreFromDump(Source & source, const string & name,
+StorePath LocalStore::addToStoreFromDump(Source & source0, const string & name,
     FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
 {
     /* For computing the store path. */
     auto hashSink = std::make_unique<HashSink>(hashAlgo);
+    TeeSource source { source0, *hashSink };
 
     /* Read the source path into memory, but only if it's up to
        narBufferSize bytes. If it's larger, write it to a temporary
@@ -1078,8 +1079,6 @@ StorePath LocalStore::addToStoreFromDump(Source & source, const string & name,
             inMemory = true;
             break;
         }
-        /* Start hashing as we get data */
-        (*hashSink)((const uint8_t *) dump.data() + oldSize, got);
         dump.resize(oldSize + got);
     }
 
@@ -1087,14 +1086,9 @@ StorePath LocalStore::addToStoreFromDump(Source & source, const string & name,
     Path tempPath;
 
     if (!inMemory) {
+        /* Drain what we pulled so far, and then keep on pulling */
         StringSource dumpSource { dump };
-        TeeSource rest { source, *hashSink };
-        ChainSource bothSource {
-            .source1 = dumpSource,
-            /* Continue hashing what's left, but don't rehash what we
-               already did. */
-            .source2 = rest,
-        };
+        ChainSource bothSource { dumpSource, source };
 
         auto tempDir = createTempDir(realStoreDir, "add");
         delTempDir = std::make_unique<AutoDelete>(tempDir);
diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh
index aa6b42597..5d9acf887 100644
--- a/src/libutil/serialise.hh
+++ b/src/libutil/serialise.hh
@@ -189,7 +189,7 @@ struct TeeSource : Source
     size_t read(unsigned char * data, size_t len)
     {
         size_t n = orig.read(data, len);
-        sink(data, len);
+        sink(data, n);
         return n;
     }
 };