NarAccessor: Run in constant memory

This commit is contained in:
Eelco Dolstra 2020-07-13 17:30:42 +02:00
parent fc84c358d9
commit 0a9da00a10
7 changed files with 57 additions and 37 deletions

View file

@ -155,13 +155,17 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource
auto now1 = std::chrono::steady_clock::now(); auto now1 = std::chrono::steady_clock::now();
/* Read the NAR simultaneously into a CompressionSink+FileSink (to
write the compressed NAR to disk), into a HashSink (to get the
NAR hash), and into a NarAccessor (to get the NAR listing). */
HashSink fileHashSink(htSHA256); HashSink fileHashSink(htSHA256);
std::shared_ptr<FSAccessor> narAccessor;
{ {
FdSink fileSink(fdTemp.get()); FdSink fileSink(fdTemp.get());
TeeSink teeSink(fileSink, fileHashSink); TeeSink teeSink(fileSink, fileHashSink);
auto compressionSink = makeCompressionSink(compression, teeSink); auto compressionSink = makeCompressionSink(compression, teeSink);
copyNAR(narSource, *compressionSink); TeeSource teeSource(narSource, *compressionSink);
narAccessor = makeNarAccessor(teeSource);
compressionSink->finish(); compressionSink->finish();
} }
@ -200,10 +204,9 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource
#if 0 #if 0
auto accessor_ = std::dynamic_pointer_cast<RemoteFSAccessor>(accessor); auto accessor_ = std::dynamic_pointer_cast<RemoteFSAccessor>(accessor);
auto narAccessor = makeNarAccessor(nar);
if (accessor_) if (accessor_)
accessor_->addToCache(printStorePath(info.path), *nar, narAccessor); accessor_->addToCache(printStorePath(info.path), *nar, narAccessor);
#endif
/* Optionally write a JSON file containing a listing of the /* Optionally write a JSON file containing a listing of the
contents of the NAR. */ contents of the NAR. */
@ -216,15 +219,13 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource
{ {
auto res = jsonRoot.placeholder("root"); auto res = jsonRoot.placeholder("root");
listNar(res, narAccessor, "", true); listNar(res, ref<FSAccessor>(narAccessor), "", true);
} }
} }
upsertFile(std::string(info.path.to_string()) + ".ls", jsonOut.str(), "application/json"); upsertFile(std::string(info.path.to_string()) + ".ls", jsonOut.str(), "application/json");
} }
#endif
#if 0
/* Optionally maintain an index of DWARF debug info files /* Optionally maintain an index of DWARF debug info files
consisting of JSON files named 'debuginfo/<build-id>' that consisting of JSON files named 'debuginfo/<build-id>' that
specify the NAR file and member containing the debug info. */ specify the NAR file and member containing the debug info. */
@ -281,7 +282,6 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource
threadPool.process(); threadPool.process();
} }
} }
#endif
/* Atomically write the NAR file. */ /* Atomically write the NAR file. */
if (repair || !fileExists(narInfo->url)) { if (repair || !fileExists(narInfo->url)) {

View file

@ -391,7 +391,8 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
} }
HashType hashAlgo = parseHashType(s); HashType hashAlgo = parseHashType(s);
TeeSource savedNAR(from); StringSink savedNAR;
TeeSource savedNARSource(from, savedNAR);
RetrieveRegularNARSink savedRegular; RetrieveRegularNARSink savedRegular;
if (method == FileIngestionMethod::Recursive) { if (method == FileIngestionMethod::Recursive) {
@ -399,7 +400,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
a string so that we can pass it to a string so that we can pass it to
addToStoreFromDump(). */ addToStoreFromDump(). */
ParseSink sink; /* null sink; just parse the NAR */ ParseSink sink; /* null sink; just parse the NAR */
parseDump(sink, savedNAR); parseDump(sink, savedNARSource);
} else } else
parseDump(savedRegular, from); parseDump(savedRegular, from);
@ -407,7 +408,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
if (!savedRegular.regular) throw Error("regular file expected"); if (!savedRegular.regular) throw Error("regular file expected");
auto path = store->addToStoreFromDump( auto path = store->addToStoreFromDump(
method == FileIngestionMethod::Recursive ? *savedNAR.data : savedRegular.s, method == FileIngestionMethod::Recursive ? *savedNAR.s : savedRegular.s,
baseName, baseName,
method, method,
hashAlgo); hashAlgo);
@ -733,7 +734,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
else { else {
TeeParseSink tee(from); TeeParseSink tee(from);
parseDump(tee, tee.source); parseDump(tee, tee.source);
saved = std::move(*tee.source.data); saved = std::move(*tee.saved.s);
source = std::make_unique<StringSource>(saved); source = std::make_unique<StringSource>(saved);
} }

View file

@ -77,15 +77,15 @@ StorePaths Store::importPaths(Source & source, std::shared_ptr<FSAccessor> acces
if (deriver != "") if (deriver != "")
info.deriver = parseStorePath(deriver); info.deriver = parseStorePath(deriver);
info.narHash = hashString(htSHA256, *tee.source.data); info.narHash = hashString(htSHA256, *tee.saved.s);
info.narSize = tee.source.data->size(); info.narSize = tee.saved.s->size();
// Ignore optional legacy signature. // Ignore optional legacy signature.
if (readInt(source) == 1) if (readInt(source) == 1)
readString(source); readString(source);
// Can't use underlying source, which would have been exhausted // Can't use underlying source, which would have been exhausted
auto source = StringSource { *tee.source.data }; auto source = StringSource { *tee.saved.s };
addToStore(info, source, NoRepair, checkSigs, accessor); addToStore(info, source, NoRepair, checkSigs, accessor);
res.push_back(info.path); res.push_back(info.path);

View file

@ -18,7 +18,7 @@ struct NarMember
/* If this is a regular file, position of the contents of this /* If this is a regular file, position of the contents of this
file in the NAR. */ file in the NAR. */
size_t start = 0, size = 0; uint64_t start = 0, size = 0;
std::string target; std::string target;
@ -34,17 +34,19 @@ struct NarAccessor : public FSAccessor
NarMember root; NarMember root;
struct NarIndexer : ParseSink, StringSource struct NarIndexer : ParseSink, Source
{ {
NarAccessor & acc; NarAccessor & acc;
Source & source;
std::stack<NarMember *> parents; std::stack<NarMember *> parents;
std::string currentStart;
bool isExec = false; bool isExec = false;
NarIndexer(NarAccessor & acc, const std::string & nar) uint64_t pos = 0;
: StringSource(nar), acc(acc)
NarIndexer(NarAccessor & acc, Source & source)
: acc(acc), source(source)
{ } { }
void createMember(const Path & path, NarMember member) { void createMember(const Path & path, NarMember member) {
@ -79,31 +81,38 @@ struct NarAccessor : public FSAccessor
void preallocateContents(unsigned long long size) override void preallocateContents(unsigned long long size) override
{ {
currentStart = string(s, pos, 16); assert(size <= std::numeric_limits<uint64_t>::max());
assert(size <= std::numeric_limits<size_t>::max()); parents.top()->size = (uint64_t) size;
parents.top()->size = (size_t)size;
parents.top()->start = pos; parents.top()->start = pos;
} }
void receiveContents(unsigned char * data, unsigned int len) override void receiveContents(unsigned char * data, unsigned int len) override
{ { }
// Sanity check
if (!currentStart.empty()) {
assert(len < 16 || currentStart == string((char *) data, 16));
currentStart.clear();
}
}
void createSymlink(const Path & path, const string & target) override void createSymlink(const Path & path, const string & target) override
{ {
createMember(path, createMember(path,
NarMember{FSAccessor::Type::tSymlink, false, 0, 0, target}); NarMember{FSAccessor::Type::tSymlink, false, 0, 0, target});
} }
size_t read(unsigned char * data, size_t len) override
{
auto n = source.read(data, len);
pos += n;
return n;
}
}; };
NarAccessor(ref<const std::string> nar) : nar(nar) NarAccessor(ref<const std::string> nar) : nar(nar)
{ {
NarIndexer indexer(*this, *nar); StringSource source(*nar);
NarIndexer indexer(*this, source);
parseDump(indexer, indexer);
}
NarAccessor(Source & source)
{
NarIndexer indexer(*this, source);
parseDump(indexer, indexer); parseDump(indexer, indexer);
} }
@ -219,6 +228,11 @@ ref<FSAccessor> makeNarAccessor(ref<const std::string> nar)
return make_ref<NarAccessor>(nar); return make_ref<NarAccessor>(nar);
} }
ref<FSAccessor> makeNarAccessor(Source & source)
{
return make_ref<NarAccessor>(source);
}
ref<FSAccessor> makeLazyNarAccessor(const std::string & listing, ref<FSAccessor> makeLazyNarAccessor(const std::string & listing,
GetNarBytes getNarBytes) GetNarBytes getNarBytes)
{ {

View file

@ -6,10 +6,14 @@
namespace nix { namespace nix {
struct Source;
/* Return an object that provides access to the contents of a NAR /* Return an object that provides access to the contents of a NAR
file. */ file. */
ref<FSAccessor> makeNarAccessor(ref<const std::string> nar); ref<FSAccessor> makeNarAccessor(ref<const std::string> nar);
ref<FSAccessor> makeNarAccessor(Source & source);
/* Create a NAR accessor from a NAR listing (in the format produced by /* Create a NAR accessor from a NAR listing (in the format produced by
listNar()). The callback getNarBytes(offset, length) is used by the listNar()). The callback getNarBytes(offset, length) is used by the
readFile() method of the accessor to get the contents of files readFile() method of the accessor to get the contents of files

View file

@ -65,9 +65,10 @@ struct ParseSink
struct TeeParseSink : ParseSink struct TeeParseSink : ParseSink
{ {
StringSink saved;
TeeSource source; TeeSource source;
TeeParseSink(Source & source) : source(source) { } TeeParseSink(Source & source) : source(source, saved) { }
}; };
void parseDump(ParseSink & sink, Source & source); void parseDump(ParseSink & sink, Source & source);

View file

@ -179,17 +179,17 @@ struct TeeSink : Sink
}; };
/* Adapter class of a Source that saves all data read to `s'. */ /* Adapter class of a Source that saves all data read to a sink. */
struct TeeSource : Source struct TeeSource : Source
{ {
Source & orig; Source & orig;
ref<std::string> data; Sink & sink;
TeeSource(Source & orig) TeeSource(Source & orig, Sink & sink)
: orig(orig), data(make_ref<std::string>()) { } : orig(orig), sink(sink) { }
size_t read(unsigned char * data, size_t len) size_t read(unsigned char * data, size_t len)
{ {
size_t n = orig.read(data, len); size_t n = orig.read(data, len);
this->data->append((const char *) data, n); sink(data, len);
return n; return n;
} }
}; };