From 68dfb8c6aef7afebf0312c48bb5010653fc464b3 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 16 Jul 2020 05:09:41 +0000 Subject: [PATCH 1/2] Optimize `addToStoreSlow` and remove `TeeParseSink` --- src/libstore/daemon.cc | 47 +++++++---------------------------- src/libstore/export-import.cc | 12 +++++---- src/libstore/store-api.cc | 35 ++++++++++++++++++++------ src/libutil/archive.hh | 25 ++++++++++++++++--- 4 files changed, 65 insertions(+), 54 deletions(-) diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index db7139374..573836f7f 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -173,31 +173,6 @@ struct TunnelSource : BufferedSource } }; -/* If the NAR archive contains a single file at top-level, then save - the contents of the file to `s'. Otherwise barf. */ -struct RetrieveRegularNARSink : ParseSink -{ - bool regular; - string s; - - RetrieveRegularNARSink() : regular(true) { } - - void createDirectory(const Path & path) - { - regular = false; - } - - void receiveContents(unsigned char * data, unsigned int len) - { - s.append((const char *) data, len); - } - - void createSymlink(const Path & path, const string & target) - { - regular = false; - } -}; - struct ClientSettings { bool keepFailed; @@ -391,9 +366,9 @@ static void performOp(TunnelLogger * logger, ref store, } HashType hashAlgo = parseHashType(s); - StringSink savedNAR; - TeeSource savedNARSource(from, savedNAR); - RetrieveRegularNARSink savedRegular; + StringSink saved; + TeeSource savedNARSource(from, saved); + RetrieveRegularNARSink savedRegular { saved }; if (method == FileIngestionMethod::Recursive) { /* Get the entire NAR dump from the client and save it to @@ -407,11 +382,7 @@ static void performOp(TunnelLogger * logger, ref store, logger->startWork(); if (!savedRegular.regular) throw Error("regular file expected"); - auto path = store->addToStoreFromDump( - method == FileIngestionMethod::Recursive ? *savedNAR.s : savedRegular.s, - baseName, - method, - hashAlgo); + auto path = store->addToStoreFromDump(*saved.s, baseName, method, hashAlgo); logger->stopWork(); to << store->printStorePath(path); @@ -727,15 +698,15 @@ static void performOp(TunnelLogger * logger, ref store, if (!trusted) info.ultimate = false; - std::string saved; std::unique_ptr source; if (GET_PROTOCOL_MINOR(clientVersion) >= 21) source = std::make_unique(from, to); else { - TeeParseSink tee(from); - parseDump(tee, tee.source); - saved = std::move(*tee.saved.s); - source = std::make_unique(saved); + StringSink saved; + TeeSource tee { from, saved }; + ParseSink ether; + parseDump(ether, tee); + source = std::make_unique(std::move(*saved.s)); } logger->startWork(); diff --git a/src/libstore/export-import.cc b/src/libstore/export-import.cc index 082d0f1d1..b963d64d7 100644 --- a/src/libstore/export-import.cc +++ b/src/libstore/export-import.cc @@ -60,8 +60,10 @@ StorePaths Store::importPaths(Source & source, CheckSigsFlag checkSigs) if (n != 1) throw Error("input doesn't look like something created by 'nix-store --export'"); /* Extract the NAR from the source. */ - TeeParseSink tee(source); - parseDump(tee, tee.source); + StringSink saved; + TeeSource tee { source, saved }; + ParseSink ether; + parseDump(ether, tee); uint32_t magic = readInt(source); if (magic != exportMagic) @@ -77,15 +79,15 @@ StorePaths Store::importPaths(Source & source, CheckSigsFlag checkSigs) if (deriver != "") info.deriver = parseStorePath(deriver); - info.narHash = hashString(htSHA256, *tee.saved.s); - info.narSize = tee.saved.s->size(); + info.narHash = hashString(htSHA256, *saved.s); + info.narSize = saved.s->size(); // Ignore optional legacy signature. if (readInt(source) == 1) readString(source); // Can't use underlying source, which would have been exhausted - auto source = StringSource { *tee.saved.s }; + auto source = StringSource { *saved.s }; addToStore(info, source, NoRepair, checkSigs); res.push_back(info.path); diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 5b9f79049..5c8dddba5 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -226,16 +226,37 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, FileIngestionMethod method, HashType hashAlgo, std::optional expectedCAHash) { - /* FIXME: inefficient: we're reading/hashing 'tmpFile' three + /* FIXME: inefficient: we're reading/hashing 'tmpFile' two times. */ + HashSink narHashSink { htSHA256 }; + HashSink caHashSink { hashAlgo }; + RetrieveRegularNARSink fileSink { caHashSink }; - auto [narHash, narSize] = hashPath(htSHA256, srcPath); + TeeSink sinkIfNar { narHashSink, caHashSink }; - auto hash = method == FileIngestionMethod::Recursive - ? hashAlgo == htSHA256 - ? narHash - : hashPath(hashAlgo, srcPath).first - : hashFile(hashAlgo, srcPath); + /* We use the tee sink if we need to hash he nar twice */ + auto & sink = method == FileIngestionMethod::Recursive && hashAlgo != htSHA256 + ? static_cast(sinkIfNar) + : narHashSink; + + auto fileSource = sinkToSource([&](Sink & sink) { + dumpPath(srcPath, sink); + }); + + TeeSource tapped { *fileSource, sink }; + + ParseSink blank; + auto & parseSink = method == FileIngestionMethod::Flat + ? fileSink + : blank; + + parseDump(parseSink, tapped); + + auto [narHash, narSize] = narHashSink.finish(); + + auto hash = method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 + ? narHash + : caHashSink.finish().first; if (expectedCAHash && expectedCAHash != hash) throw Error("hash mismatch for '%s'", srcPath); diff --git a/src/libutil/archive.hh b/src/libutil/archive.hh index 302b1bb18..57780d16a 100644 --- a/src/libutil/archive.hh +++ b/src/libutil/archive.hh @@ -63,12 +63,29 @@ struct ParseSink virtual void createSymlink(const Path & path, const string & target) { }; }; -struct TeeParseSink : ParseSink +/* If the NAR archive contains a single file at top-level, then save + the contents of the file to `s'. Otherwise barf. */ +struct RetrieveRegularNARSink : ParseSink { - StringSink saved; - TeeSource source; + bool regular = true; + Sink & sink; - TeeParseSink(Source & source) : source(source, saved) { } + RetrieveRegularNARSink(Sink & sink) : sink(sink) { } + + void createDirectory(const Path & path) + { + regular = false; + } + + void receiveContents(unsigned char * data, unsigned int len) + { + sink(data, len); + } + + void createSymlink(const Path & path, const string & target) + { + regular = false; + } }; void parseDump(ParseSink & sink, Source & source); From ac2fc7ba1fe6b64ec535e4ce63d13fcadf7fdba7 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 20 Jul 2020 11:29:46 -0400 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Eelco Dolstra --- src/libstore/store-api.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 5c8dddba5..6c0a61766 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -234,7 +234,7 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, TeeSink sinkIfNar { narHashSink, caHashSink }; - /* We use the tee sink if we need to hash he nar twice */ + /* We use the tee sink if we need to hash the nar twice */ auto & sink = method == FileIngestionMethod::Recursive && hashAlgo != htSHA256 ? static_cast(sinkIfNar) : narHashSink; @@ -250,7 +250,11 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, ? fileSink : blank; - parseDump(parseSink, tapped); + parseDump( + parseSink, + method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 + ? *fileSource // don't need to hash twice if we just can use the `narHash` twice + : tapped); auto [narHash, narSize] = narHashSink.finish();