Merge pull request #3801 from obsidiansystems/from-dump-stream

Constant space `addToStoreFromDump` and deduplicate code
This commit is contained in:
Eelco Dolstra 2020-07-21 15:11:52 +02:00 committed by GitHub
commit 0835447eaa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 100 additions and 119 deletions

View file

@ -2774,7 +2774,7 @@ struct RestrictedStore : public LocalFSStore
goal.addDependency(info.path); goal.addDependency(info.path);
} }
StorePath addToStoreFromDump(const string & dump, const string & name, StorePath addToStoreFromDump(Source & dump, const string & name,
FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override
{ {
auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, repair); auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, repair);

View file

@ -350,21 +350,24 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
} }
case wopAddToStore: { case wopAddToStore: {
std::string s, baseName; HashType hashAlgo;
std::string baseName;
FileIngestionMethod method; FileIngestionMethod method;
{ {
bool fixed; uint8_t recursive; bool fixed;
from >> baseName >> fixed /* obsolete */ >> recursive >> s; uint8_t recursive;
std::string hashAlgoRaw;
from >> baseName >> fixed /* obsolete */ >> recursive >> hashAlgoRaw;
if (recursive > (uint8_t) FileIngestionMethod::Recursive) if (recursive > (uint8_t) FileIngestionMethod::Recursive)
throw Error("unsupported FileIngestionMethod with value of %i; you may need to upgrade nix-daemon", recursive); throw Error("unsupported FileIngestionMethod with value of %i; you may need to upgrade nix-daemon", recursive);
method = FileIngestionMethod { recursive }; method = FileIngestionMethod { recursive };
/* Compatibility hack. */ /* Compatibility hack. */
if (!fixed) { if (!fixed) {
s = "sha256"; hashAlgoRaw = "sha256";
method = FileIngestionMethod::Recursive; method = FileIngestionMethod::Recursive;
} }
hashAlgo = parseHashType(hashAlgoRaw);
} }
HashType hashAlgo = parseHashType(s);
StringSink saved; StringSink saved;
TeeSource savedNARSource(from, saved); TeeSource savedNARSource(from, saved);
@ -382,7 +385,9 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork(); logger->startWork();
if (!savedRegular.regular) throw Error("regular file expected"); if (!savedRegular.regular) throw Error("regular file expected");
auto path = store->addToStoreFromDump(*saved.s, baseName, method, hashAlgo); // FIXME: try to stream directly from `from`.
StringSource dumpSource { *saved.s };
auto path = store->addToStoreFromDump(dumpSource, baseName, method, hashAlgo);
logger->stopWork(); logger->stopWork();
to << store->printStorePath(path); to << store->printStorePath(path);

View file

@ -1033,82 +1033,26 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
} }
StorePath LocalStore::addToStoreFromDump(const string & dump, const string & name,
FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
{
Hash h = hashString(hashAlgo, dump);
auto dstPath = makeFixedOutputPath(method, h, name);
addTempRoot(dstPath);
if (repair || !isValidPath(dstPath)) {
/* The first check above is an optimisation to prevent
unnecessary lock acquisition. */
auto realPath = Store::toRealPath(dstPath);
PathLocks outputLock({realPath});
if (repair || !isValidPath(dstPath)) {
deletePath(realPath);
autoGC();
if (method == FileIngestionMethod::Recursive) {
StringSource source(dump);
restorePath(realPath, source);
} else
writeFile(realPath, dump);
canonicalisePathMetaData(realPath, -1);
/* Register the SHA-256 hash of the NAR serialisation of
the path in the database. We may just have computed it
above (if called with recursive == true and hashAlgo ==
sha256); otherwise, compute it here. */
HashResult hash;
if (method == FileIngestionMethod::Recursive) {
hash.first = hashAlgo == htSHA256 ? h : hashString(htSHA256, dump);
hash.second = dump.size();
} else
hash = hashPath(htSHA256, realPath);
optimisePath(realPath); // FIXME: combine with hashPath()
ValidPathInfo info(dstPath);
info.narHash = hash.first;
info.narSize = hash.second;
info.ca = FixedOutputHash { .method = method, .hash = h };
registerValidPath(info);
}
outputLock.setDeletion(true);
}
return dstPath;
}
StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
FileIngestionMethod method, HashType hashAlgo, PathFilter & filter, RepairFlag repair) FileIngestionMethod method, HashType hashAlgo, PathFilter & filter, RepairFlag repair)
{ {
Path srcPath(absPath(_srcPath)); Path srcPath(absPath(_srcPath));
auto source = sinkToSource([&](Sink & sink) {
if (method == FileIngestionMethod::Recursive)
dumpPath(srcPath, sink, filter);
else
readFile(srcPath, sink);
});
return addToStoreFromDump(*source, name, method, hashAlgo, repair);
}
if (method != FileIngestionMethod::Recursive)
return addToStoreFromDump(readFile(srcPath), name, method, hashAlgo, repair);
/* For computing the NAR hash. */ StorePath LocalStore::addToStoreFromDump(Source & source0, const string & name,
auto sha256Sink = std::make_unique<HashSink>(htSHA256); FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
{
/* For computing the store path. In recursive SHA-256 mode, this /* For computing the store path. */
is the same as the NAR hash, so no need to do it again. */ auto hashSink = std::make_unique<HashSink>(hashAlgo);
std::unique_ptr<HashSink> hashSink = TeeSource source { source0, *hashSink };
hashAlgo == htSHA256
? nullptr
: std::make_unique<HashSink>(hashAlgo);
/* Read the source path into memory, but only if it's up to /* Read the source path into memory, but only if it's up to
narBufferSize bytes. If it's larger, write it to a temporary narBufferSize bytes. If it's larger, write it to a temporary
@ -1116,55 +1060,49 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
destination store path is already valid, we just delete the destination store path is already valid, we just delete the
temporary path. Otherwise, we move it to the destination store temporary path. Otherwise, we move it to the destination store
path. */ path. */
bool inMemory = true; bool inMemory = false;
std::string nar;
auto source = sinkToSource([&](Sink & sink) { std::string dump;
LambdaSink sink2([&](const unsigned char * buf, size_t len) { /* Fill out buffer, and decide whether we are working strictly in
(*sha256Sink)(buf, len); memory based on whether we break out because the buffer is full
if (hashSink) (*hashSink)(buf, len); or the original source is empty */
while (dump.size() < settings.narBufferSize) {
if (inMemory) { auto oldSize = dump.size();
if (nar.size() + len > settings.narBufferSize) { constexpr size_t chunkSize = 1024;
inMemory = false; auto want = std::min(chunkSize, settings.narBufferSize - oldSize);
sink << 1; dump.resize(oldSize + want);
sink((const unsigned char *) nar.data(), nar.size()); auto got = 0;
nar.clear(); try {
} else { got = source.read((uint8_t *) dump.data() + oldSize, want);
nar.append((const char *) buf, len); } catch (EndOfFile &) {
} inMemory = true;
} break;
}
if (!inMemory) sink(buf, len); dump.resize(oldSize + got);
}); }
dumpPath(srcPath, sink2, filter);
});
std::unique_ptr<AutoDelete> delTempDir; std::unique_ptr<AutoDelete> delTempDir;
Path tempPath; Path tempPath;
try { if (!inMemory) {
/* Wait for the source coroutine to give us some dummy /* Drain what we pulled so far, and then keep on pulling */
data. This is so that we don't create the temporary StringSource dumpSource { dump };
directory if the NAR fits in memory. */ ChainSource bothSource { dumpSource, source };
readInt(*source);
auto tempDir = createTempDir(realStoreDir, "add"); auto tempDir = createTempDir(realStoreDir, "add");
delTempDir = std::make_unique<AutoDelete>(tempDir); delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x"; tempPath = tempDir + "/x";
restorePath(tempPath, *source); if (method == FileIngestionMethod::Recursive)
restorePath(tempPath, bothSource);
else
writeFile(tempPath, bothSource);
} catch (EndOfFile &) { dump.clear();
if (!inMemory) throw;
/* The NAR fits in memory, so we didn't do restorePath(). */
} }
auto sha256 = sha256Sink->finish(); auto [hash, size] = hashSink->finish();
Hash hash = hashSink ? hashSink->finish().first : sha256.first;
auto dstPath = makeFixedOutputPath(method, hash, name); auto dstPath = makeFixedOutputPath(method, hash, name);
@ -1186,22 +1124,34 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
autoGC(); autoGC();
if (inMemory) { if (inMemory) {
StringSource dumpSource { dump };
/* Restore from the NAR in memory. */ /* Restore from the NAR in memory. */
StringSource source(nar); if (method == FileIngestionMethod::Recursive)
restorePath(realPath, source); restorePath(realPath, dumpSource);
else
writeFile(realPath, dumpSource);
} else { } else {
/* Move the temporary path we restored above. */ /* Move the temporary path we restored above. */
if (rename(tempPath.c_str(), realPath.c_str())) if (rename(tempPath.c_str(), realPath.c_str()))
throw Error("renaming '%s' to '%s'", tempPath, realPath); throw Error("renaming '%s' to '%s'", tempPath, realPath);
} }
/* For computing the nar hash. In recursive SHA-256 mode, this
is the same as the store hash, so no need to do it again. */
auto narHash = std::pair { hash, size };
if (method != FileIngestionMethod::Recursive || hashAlgo != htSHA256) {
HashSink narSink { htSHA256 };
dumpPath(realPath, narSink);
narHash = narSink.finish();
}
canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath
optimisePath(realPath); optimisePath(realPath);
ValidPathInfo info(dstPath); ValidPathInfo info(dstPath);
info.narHash = sha256.first; info.narHash = narHash.first;
info.narSize = sha256.second; info.narSize = narHash.second;
info.ca = FixedOutputHash { .method = method, .hash = hash }; info.ca = FixedOutputHash { .method = method, .hash = hash };
registerValidPath(info); registerValidPath(info);
} }

View file

@ -153,7 +153,7 @@ public:
in `dump', which is either a NAR serialisation (if recursive == in `dump', which is either a NAR serialisation (if recursive ==
true) or simply the contents of a regular file (if recursive == true) or simply the contents of a regular file (if recursive ==
false). */ false). */
StorePath addToStoreFromDump(const string & dump, const string & name, StorePath addToStoreFromDump(Source & dump, const string & name,
FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override; FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override;
StorePath addTextToStore(const string & name, const string & s, StorePath addTextToStore(const string & name, const string & s,

View file

@ -460,7 +460,7 @@ public:
std::optional<Hash> expectedCAHash = {}); std::optional<Hash> expectedCAHash = {});
// FIXME: remove? // FIXME: remove?
virtual StorePath addToStoreFromDump(const string & dump, const string & name, virtual StorePath addToStoreFromDump(Source & dump, const string & name,
FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair)
{ {
throw Error("addToStoreFromDump() is not supported by this store"); throw Error("addToStoreFromDump() is not supported by this store");

View file

@ -322,5 +322,18 @@ void StringSink::operator () (const unsigned char * data, size_t len)
s->append((const char *) data, len); s->append((const char *) data, len);
} }
size_t ChainSource::read(unsigned char * data, size_t len)
{
if (useSecond) {
return source2.read(data, len);
} else {
try {
return source1.read(data, len);
} catch (EndOfFile &) {
useSecond = true;
return this->read(data, len);
}
}
}
} }

View file

@ -189,7 +189,7 @@ struct TeeSource : Source
size_t read(unsigned char * data, size_t len) size_t read(unsigned char * data, size_t len)
{ {
size_t n = orig.read(data, len); size_t n = orig.read(data, len);
sink(data, len); sink(data, n);
return n; return n;
} }
}; };
@ -256,6 +256,19 @@ struct LambdaSource : Source
} }
}; };
/* Chain two sources together so after the first is exhausted, the second is
used */
struct ChainSource : Source
{
Source & source1, & source2;
bool useSecond = false;
ChainSource(Source & s1, Source & s2)
: source1(s1), source2(s2)
{ }
size_t read(unsigned char * data, size_t len) override;
};
/* Convert a function that feeds data into a Sink into a Source. The /* Convert a function that feeds data into a Sink into a Source. The
Source executes the function as a coroutine. */ Source executes the function as a coroutine. */