archive: rename ParseSink to NARParseVisitor

- Rename the listener to not be called a "sink". If it were a "sink" it
  would be eating bytes and conform with any of the Nix sink stuff
  (maybe FileHandle should be a Sink itself! but that's a later CL's
  problem). This is a parser listener.
- Move the RetrieveRegularNARSink thing into store-api.cc, which is its
  only usage, and fix it to actually do what it is stated to do: crash
  if its invariants are violated.

  It's, of course, used to erm, unpack single-file NAR files, generated
  via a horrible contraption of sources and sinks that looks like a
  plumbing blueprint. Refactoring that is a future task.
- Add a description of the invariants of NARParseVisitor in preparation
  of refactoring it.

Change-Id: Ifca1d74d2947204a1f66349772e54dad0743e944
This commit is contained in:
jade 2024-09-11 00:27:39 -07:00
parent 686120ee4a
commit 81c2e0ac8e
6 changed files with 79 additions and 42 deletions

View file

@ -1215,7 +1215,7 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
bool narRead = false;
Finally cleanup = [&]() {
if (!narRead) {
ParseSink sink;
NARParseVisitor sink;
try {
parseDump(sink, source);
} catch (...) {

View file

@ -2,6 +2,7 @@
#include "archive.hh"
#include <map>
#include <memory>
#include <stack>
#include <algorithm>
@ -33,7 +34,7 @@ struct NarAccessor : public FSAccessor
NarMember root;
struct NarIndexer : ParseSink, Source
struct NarIndexer : NARParseVisitor, Source
{
NarAccessor & acc;
Source & source;

View file

@ -379,6 +379,37 @@ void Store::addMultipleToStore(
}
}
namespace {
/**
* If the NAR archive contains a single file at top-level, then save
* the contents of the file to `s`. Otherwise assert.
*/
struct RetrieveRegularNARVisitor : NARParseVisitor
{
Sink & sink;
RetrieveRegularNARVisitor(Sink & sink) : sink(sink) { }
void createRegularFile(const Path & path) override
{
}
void receiveContents(std::string_view data) override
{
sink(data);
}
void createDirectory(const Path & path) override
{
assert(false && "RetrieveRegularNARVisitor::createDirectory must not be called");
}
void createSymlink(const Path & path, const std::string & target) override
{
assert(false && "RetrieveRegularNARVisitor::createSymlink must not be called");
}
};
}
/*
The aim of this function is to compute in one pass the correct ValidPathInfo for
@ -413,7 +444,7 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath,
/* Note that fileSink and unusualHashTee must be mutually exclusive, since
they both write to caHashSink. Note that that requisite is currently true
because the former is only used in the flat case. */
RetrieveRegularNARSink fileSink { caHashSink };
RetrieveRegularNARVisitor fileSink { caHashSink };
TeeSink unusualHashTee { narHashSink, caHashSink };
auto & narSink = method == FileIngestionMethod::Recursive && hashAlgo != HashType::SHA256
@ -429,7 +460,7 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath,
information to narSink. */
TeeSource tapped { fileSource, narSink };
ParseSink blank;
NARParseVisitor blank;
auto & parseSink = method == FileIngestionMethod::Flat
? fileSink
: blank;

View file

@ -334,7 +334,7 @@ Generator<Entry> parse(Source & source)
}
static WireFormatGenerator restore(ParseSink & sink, Generator<nar::Entry> nar)
static WireFormatGenerator restore(NARParseVisitor & sink, Generator<nar::Entry> nar)
{
while (auto entry = nar.next()) {
co_yield std::visit(
@ -377,12 +377,12 @@ static WireFormatGenerator restore(ParseSink & sink, Generator<nar::Entry> nar)
}
}
WireFormatGenerator parseAndCopyDump(ParseSink & sink, Source & source)
WireFormatGenerator parseAndCopyDump(NARParseVisitor & sink, Source & source)
{
return restore(sink, nar::parse(source));
}
void parseDump(ParseSink & sink, Source & source)
void parseDump(NARParseVisitor & sink, Source & source)
{
auto parser = parseAndCopyDump(sink, source);
while (parser.next()) {
@ -390,7 +390,36 @@ void parseDump(ParseSink & sink, Source & source)
}
}
struct RestoreSink : ParseSink
/*
* Note [NAR restoration security]:
* It's *critical* that NAR restoration will never overwrite anything even if
* duplicate filenames are passed in. It is inevitable that not all NARs are
* fit to actually successfully restore to the target filesystem; errors may
* occur due to collisions, and this *must* cause the NAR to be rejected.
*
* Although the filenames are blocked from being *the same bytes* by a higher
* layer, filesystems have other ideas on every platform:
* - The store may be on a case-insensitive filesystem like APFS, ext4 with
* casefold directories, zfs with casesensitivity=insensitive
* - The store may be on a Unicode normalizing (or normalization-insensitive)
* filesystem like APFS (where files are looked up by
* hash(normalize(fname))), HFS+ (where file names are always normalized to
* approximately NFD), or zfs with normalization=formC, etc.
*
* It is impossible to know the version of Unicode being used by the underlying
* filesystem, thus it is *impossible* to stop these collisions.
*
* Overwriting files as a result of invalid NARs will cause a security bug like
* CppNix's CVE-2024-45593 (GHSA-h4vv-h3jq-v493)
*/
/**
* This code restores NARs from disk.
*
* See Note [NAR restoration security] for security invariants in this procedure.
*
*/
struct NARRestoreVisitor : NARParseVisitor
{
Path dstPath;
AutoCloseFD fd;
@ -457,7 +486,7 @@ struct RestoreSink : ParseSink
void restorePath(const Path & path, Source & source)
{
RestoreSink sink;
NARRestoreVisitor sink;
sink.dstPath = path;
parseDump(sink, source);
}
@ -468,10 +497,9 @@ WireFormatGenerator copyNAR(Source & source)
// FIXME: if 'source' is the output of dumpPath() followed by EOF,
// we should just forward all data directly without parsing.
static ParseSink parseSink; /* null sink; just parse the NAR */
static NARParseVisitor parseSink; /* null sink; just parse the NAR */
return parseAndCopyDump(parseSink, source);
}
}

View file

@ -76,8 +76,12 @@ WireFormatGenerator dumpString(std::string_view s);
/**
* \todo Fix this API, it sucks.
* A visitor for NAR parsing that performs filesystem (or virtual-filesystem)
* actions to restore a NAR.
*
* Methods of this may arbitrarily fail due to filename collisions.
*/
struct ParseSink
struct NARParseVisitor
{
virtual void createDirectory(const Path & path) { };
@ -90,33 +94,6 @@ struct ParseSink
virtual void createSymlink(const Path & path, const std::string & target) { };
};
/**
* If the NAR archive contains a single file at top-level, then save
* the contents of the file to `s`. Otherwise barf.
*/
struct RetrieveRegularNARSink : ParseSink
{
bool regular = true;
Sink & sink;
RetrieveRegularNARSink(Sink & sink) : sink(sink) { }
void createDirectory(const Path & path) override
{
regular = false;
}
void receiveContents(std::string_view data) override
{
sink(data);
}
void createSymlink(const Path & path, const std::string & target) override
{
regular = false;
}
};
namespace nar {
struct MetadataString;
@ -160,8 +137,8 @@ Generator<Entry> parse(Source & source);
}
WireFormatGenerator parseAndCopyDump(ParseSink & sink, Source & source);
void parseDump(ParseSink & sink, Source & source);
WireFormatGenerator parseAndCopyDump(NARParseVisitor & sink, Source & source);
void parseDump(NARParseVisitor & sink, Source & source);
void restorePath(const Path & path, Source & source);

View file

@ -193,7 +193,7 @@ inline Paths createDirs(PathView path)
}
/**
* Create a symlink.
* Create a symlink. Throws if the symlink exists.
*/
void createSymlink(const Path & target, const Path & link);