Merge pull request #3842 from obsidiansystems/fix-and-document-addToStoreSlow

Correct bug, thoroughly document addToStoreSlow
This commit is contained in:
Eelco Dolstra 2020-07-21 10:01:34 +02:00 committed by GitHub
commit 51ee506693
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -222,40 +222,68 @@ StorePath Store::computeStorePathForText(const string & name, const string & s,
} }
/*
The aim of this function is to compute in one pass the correct ValidPathInfo for
the files that we are trying to add to the store. To accomplish that in one
pass, given the different kind of inputs that we can take (normal nar archives,
nar archives with non SHA-256 hashes, and flat files), we set up a net of sinks
and aliases. Also, since the dataflow is obfuscated by this, we include here a
graphviz diagram:
digraph graphname {
node [shape=box]
fileSource -> narSink
narSink [style=dashed]
narSink -> unsualHashTee [style = dashed, label = "Recursive && !SHA-256"]
narSink -> narHashSink [style = dashed, label = "else"]
unsualHashTee -> narHashSink
unsualHashTee -> caHashSink
fileSource -> parseSink
parseSink [style=dashed]
parseSink-> fileSink [style = dashed, label = "Flat"]
parseSink -> blank [style = dashed, label = "Recursive"]
fileSink -> caHashSink
}
*/
ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath,
FileIngestionMethod method, HashType hashAlgo, FileIngestionMethod method, HashType hashAlgo,
std::optional<Hash> expectedCAHash) std::optional<Hash> expectedCAHash)
{ {
/* FIXME: inefficient: we're reading/hashing 'tmpFile' two
times. */
HashSink narHashSink { htSHA256 }; HashSink narHashSink { htSHA256 };
HashSink caHashSink { hashAlgo }; HashSink caHashSink { hashAlgo };
/* Note that fileSink and unusualHashTee must be mutually exclusive, since
they both write to caHashSink. Note that that requisite is currently true
because the former is only used in the flat case. */
RetrieveRegularNARSink fileSink { caHashSink }; RetrieveRegularNARSink fileSink { caHashSink };
TeeSink unusualHashTee { narHashSink, caHashSink };
TeeSink sinkIfNar { narHashSink, caHashSink }; auto & narSink = method == FileIngestionMethod::Recursive && hashAlgo != htSHA256
? static_cast<Sink &>(unusualHashTee)
/* We use the tee sink if we need to hash the nar twice */
auto & sink = method == FileIngestionMethod::Recursive && hashAlgo != htSHA256
? static_cast<Sink &>(sinkIfNar)
: narHashSink; : narHashSink;
auto fileSource = sinkToSource([&](Sink & sink) { /* Functionally, this means that fileSource will yield the content of
dumpPath(srcPath, sink); srcPath. The fact that we use scratchpadSink as a temporary buffer here
is an implementation detail. */
auto fileSource = sinkToSource([&](Sink & scratchpadSink) {
dumpPath(srcPath, scratchpadSink);
}); });
TeeSource tapped { *fileSource, sink }; /* tapped provides the same data as fileSource, but we also write all the
information to narSink. */
TeeSource tapped { *fileSource, narSink };
ParseSink blank; ParseSink blank;
auto & parseSink = method == FileIngestionMethod::Flat auto & parseSink = method == FileIngestionMethod::Flat
? fileSink ? fileSink
: blank; : blank;
parseDump( /* The information that flows from tapped (besides being replicated in
parseSink, narSink), is now put in parseSink. */
method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 parseDump(parseSink, tapped);
? *fileSource // don't need to hash twice if we just can use the `narHash` twice
: tapped);
/* We extract the result of the computation from the sink by calling
finish. */
auto [narHash, narSize] = narHashSink.finish(); auto [narHash, narSize] = narHashSink.finish();
auto hash = method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 auto hash = method == FileIngestionMethod::Recursive && hashAlgo == htSHA256
@ -271,8 +299,8 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath,
info.ca = FixedOutputHash { .method = method, .hash = hash }; info.ca = FixedOutputHash { .method = method, .hash = hash };
if (!isValidPath(info.path)) { if (!isValidPath(info.path)) {
auto source = sinkToSource([&](Sink & sink) { auto source = sinkToSource([&](Sink & scratchpadSink) {
dumpPath(srcPath, sink); dumpPath(srcPath, scratchpadSink);
}); });
addToStore(info, *source); addToStore(info, *source);
} }