From 81c2e0ac8e76ddb3fd3c8e2ce59929853614b1b6 Mon Sep 17 00:00:00 2001 From: Jade Lovelace Date: Wed, 11 Sep 2024 00:27:39 -0700 Subject: [PATCH 1/2] archive: rename ParseSink to NARParseVisitor - Rename the listener to not be called a "sink". If it were a "sink" it would be eating bytes and conform with any of the Nix sink stuff (maybe FileHandle should be a Sink itself! but that's a later CL's problem). This is a parser listener. - Move the RetrieveRegularNARSink thing into store-api.cc, which is its only usage, and fix it to actually do what it is stated to do: crash if its invariants are violated. It's, of course, used to erm, unpack single-file NAR files, generated via a horrible contraption of sources and sinks that looks like a plumbing blueprint. Refactoring that is a future task. - Add a description of the invariants of NARParseVisitor in preparation of refactoring it. Change-Id: Ifca1d74d2947204a1f66349772e54dad0743e944 --- src/libstore/local-store.cc | 2 +- src/libstore/nar-accessor.cc | 3 ++- src/libstore/store-api.cc | 35 ++++++++++++++++++++++++++++-- src/libutil/archive.cc | 42 ++++++++++++++++++++++++++++++------ src/libutil/archive.hh | 37 ++++++------------------------- src/libutil/file-system.hh | 2 +- 6 files changed, 79 insertions(+), 42 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 4c8e2ea2f..d3520582e 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1215,7 +1215,7 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, bool narRead = false; Finally cleanup = [&]() { if (!narRead) { - ParseSink sink; + NARParseVisitor sink; try { parseDump(sink, source); } catch (...) { diff --git a/src/libstore/nar-accessor.cc b/src/libstore/nar-accessor.cc index f0dfcb19b..fa7d5e3cb 100644 --- a/src/libstore/nar-accessor.cc +++ b/src/libstore/nar-accessor.cc @@ -2,6 +2,7 @@ #include "archive.hh" #include +#include #include #include @@ -33,7 +34,7 @@ struct NarAccessor : public FSAccessor NarMember root; - struct NarIndexer : ParseSink, Source + struct NarIndexer : NARParseVisitor, Source { NarAccessor & acc; Source & source; diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 6d9fec41b..1619e5062 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -379,6 +379,37 @@ void Store::addMultipleToStore( } } +namespace { +/** + * If the NAR archive contains a single file at top-level, then save + * the contents of the file to `s`. Otherwise assert. + */ +struct RetrieveRegularNARVisitor : NARParseVisitor +{ + Sink & sink; + + RetrieveRegularNARVisitor(Sink & sink) : sink(sink) { } + + void createRegularFile(const Path & path) override + { + } + + void receiveContents(std::string_view data) override + { + sink(data); + } + + void createDirectory(const Path & path) override + { + assert(false && "RetrieveRegularNARVisitor::createDirectory must not be called"); + } + + void createSymlink(const Path & path, const std::string & target) override + { + assert(false && "RetrieveRegularNARVisitor::createSymlink must not be called"); + } +}; +} /* The aim of this function is to compute in one pass the correct ValidPathInfo for @@ -413,7 +444,7 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, /* Note that fileSink and unusualHashTee must be mutually exclusive, since they both write to caHashSink. Note that that requisite is currently true because the former is only used in the flat case. */ - RetrieveRegularNARSink fileSink { caHashSink }; + RetrieveRegularNARVisitor fileSink { caHashSink }; TeeSink unusualHashTee { narHashSink, caHashSink }; auto & narSink = method == FileIngestionMethod::Recursive && hashAlgo != HashType::SHA256 @@ -429,7 +460,7 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, information to narSink. */ TeeSource tapped { fileSource, narSink }; - ParseSink blank; + NARParseVisitor blank; auto & parseSink = method == FileIngestionMethod::Flat ? fileSink : blank; diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc index d4da18f14..78d49026a 100644 --- a/src/libutil/archive.cc +++ b/src/libutil/archive.cc @@ -334,7 +334,7 @@ Generator parse(Source & source) } -static WireFormatGenerator restore(ParseSink & sink, Generator nar) +static WireFormatGenerator restore(NARParseVisitor & sink, Generator nar) { while (auto entry = nar.next()) { co_yield std::visit( @@ -377,12 +377,12 @@ static WireFormatGenerator restore(ParseSink & sink, Generator nar) } } -WireFormatGenerator parseAndCopyDump(ParseSink & sink, Source & source) +WireFormatGenerator parseAndCopyDump(NARParseVisitor & sink, Source & source) { return restore(sink, nar::parse(source)); } -void parseDump(ParseSink & sink, Source & source) +void parseDump(NARParseVisitor & sink, Source & source) { auto parser = parseAndCopyDump(sink, source); while (parser.next()) { @@ -390,7 +390,36 @@ void parseDump(ParseSink & sink, Source & source) } } -struct RestoreSink : ParseSink +/* + * Note [NAR restoration security]: + * It's *critical* that NAR restoration will never overwrite anything even if + * duplicate filenames are passed in. It is inevitable that not all NARs are + * fit to actually successfully restore to the target filesystem; errors may + * occur due to collisions, and this *must* cause the NAR to be rejected. + * + * Although the filenames are blocked from being *the same bytes* by a higher + * layer, filesystems have other ideas on every platform: + * - The store may be on a case-insensitive filesystem like APFS, ext4 with + * casefold directories, zfs with casesensitivity=insensitive + * - The store may be on a Unicode normalizing (or normalization-insensitive) + * filesystem like APFS (where files are looked up by + * hash(normalize(fname))), HFS+ (where file names are always normalized to + * approximately NFD), or zfs with normalization=formC, etc. + * + * It is impossible to know the version of Unicode being used by the underlying + * filesystem, thus it is *impossible* to stop these collisions. + * + * Overwriting files as a result of invalid NARs will cause a security bug like + * CppNix's CVE-2024-45593 (GHSA-h4vv-h3jq-v493) + */ + +/** + * This code restores NARs from disk. + * + * See Note [NAR restoration security] for security invariants in this procedure. + * + */ +struct NARRestoreVisitor : NARParseVisitor { Path dstPath; AutoCloseFD fd; @@ -457,7 +486,7 @@ struct RestoreSink : ParseSink void restorePath(const Path & path, Source & source) { - RestoreSink sink; + NARRestoreVisitor sink; sink.dstPath = path; parseDump(sink, source); } @@ -468,10 +497,9 @@ WireFormatGenerator copyNAR(Source & source) // FIXME: if 'source' is the output of dumpPath() followed by EOF, // we should just forward all data directly without parsing. - static ParseSink parseSink; /* null sink; just parse the NAR */ + static NARParseVisitor parseSink; /* null sink; just parse the NAR */ return parseAndCopyDump(parseSink, source); } - } diff --git a/src/libutil/archive.hh b/src/libutil/archive.hh index b34d06e3d..5e8db4c53 100644 --- a/src/libutil/archive.hh +++ b/src/libutil/archive.hh @@ -76,8 +76,12 @@ WireFormatGenerator dumpString(std::string_view s); /** * \todo Fix this API, it sucks. + * A visitor for NAR parsing that performs filesystem (or virtual-filesystem) + * actions to restore a NAR. + * + * Methods of this may arbitrarily fail due to filename collisions. */ -struct ParseSink +struct NARParseVisitor { virtual void createDirectory(const Path & path) { }; @@ -90,33 +94,6 @@ struct ParseSink virtual void createSymlink(const Path & path, const std::string & target) { }; }; -/** - * If the NAR archive contains a single file at top-level, then save - * the contents of the file to `s`. Otherwise barf. - */ -struct RetrieveRegularNARSink : ParseSink -{ - bool regular = true; - Sink & sink; - - RetrieveRegularNARSink(Sink & sink) : sink(sink) { } - - void createDirectory(const Path & path) override - { - regular = false; - } - - void receiveContents(std::string_view data) override - { - sink(data); - } - - void createSymlink(const Path & path, const std::string & target) override - { - regular = false; - } -}; - namespace nar { struct MetadataString; @@ -160,8 +137,8 @@ Generator parse(Source & source); } -WireFormatGenerator parseAndCopyDump(ParseSink & sink, Source & source); -void parseDump(ParseSink & sink, Source & source); +WireFormatGenerator parseAndCopyDump(NARParseVisitor & sink, Source & source); +void parseDump(NARParseVisitor & sink, Source & source); void restorePath(const Path & path, Source & source); diff --git a/src/libutil/file-system.hh b/src/libutil/file-system.hh index e49323e84..9fe931556 100644 --- a/src/libutil/file-system.hh +++ b/src/libutil/file-system.hh @@ -193,7 +193,7 @@ inline Paths createDirs(PathView path) } /** - * Create a symlink. + * Create a symlink. Throws if the symlink exists. */ void createSymlink(const Path & target, const Path & link); From ca1dc3f70bf98e2424b7b2666ee2180675b67451 Mon Sep 17 00:00:00 2001 From: Jade Lovelace Date: Wed, 11 Sep 2024 00:27:39 -0700 Subject: [PATCH 2/2] archive: refactor bad mutable-state API in the NAR parse listener Remove the mutable state stuff that assumes that one file is being written a time. It's true that we don't write multiple files interleaved, but that mutable state is evil. Change-Id: Ia1481da48255d901e4b09a9b783e7af44fae8cff --- src/libstore/nar-accessor.cc | 30 ++++----- src/libstore/store-api.cc | 23 +++++-- src/libutil/archive.cc | 114 ++++++++++++++++++++--------------- src/libutil/archive.hh | 39 +++++++++--- 4 files changed, 127 insertions(+), 79 deletions(-) diff --git a/src/libstore/nar-accessor.cc b/src/libstore/nar-accessor.cc index fa7d5e3cb..7600de6e7 100644 --- a/src/libstore/nar-accessor.cc +++ b/src/libstore/nar-accessor.cc @@ -45,11 +45,12 @@ struct NarAccessor : public FSAccessor uint64_t pos = 0; + public: NarIndexer(NarAccessor & acc, Source & source) : acc(acc), source(source) { } - void createMember(const Path & path, NarMember member) + NarMember & createMember(const Path & path, NarMember member) { size_t level = std::count(path.begin(), path.end(), '/'); while (parents.size() > level) parents.pop(); @@ -63,6 +64,8 @@ struct NarAccessor : public FSAccessor auto result = parents.top()->children.emplace(baseNameOf(path), std::move(member)); parents.push(&result.first->second); } + + return *parents.top(); } void createDirectory(const Path & path) override @@ -70,28 +73,17 @@ struct NarAccessor : public FSAccessor createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0}); } - void createRegularFile(const Path & path) override + std::unique_ptr createRegularFile(const Path & path, uint64_t size, bool executable) override { - createMember(path, {FSAccessor::Type::tRegular, false, 0, 0}); - } + auto & memb = createMember(path, {FSAccessor::Type::tRegular, false, 0, 0}); - void closeRegularFile() override - { } - - void isExecutable() override - { - parents.top()->isExecutable = true; - } - - void preallocateContents(uint64_t size) override - { assert(size <= std::numeric_limits::max()); - parents.top()->size = (uint64_t) size; - parents.top()->start = pos; - } + memb.size = (uint64_t) size; + memb.start = pos; + memb.isExecutable = executable; - void receiveContents(std::string_view data) override - { } + return std::make_unique(); + } void createSymlink(const Path & path, const std::string & target) override { diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 1619e5062..ed8657774 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -386,17 +386,28 @@ namespace { */ struct RetrieveRegularNARVisitor : NARParseVisitor { + struct MyFileHandle : public FileHandle + { + Sink & sink; + + void receiveContents(std::string_view data) override + { + sink(data); + } + + private: + MyFileHandle(Sink & sink) : sink(sink) {} + + friend struct RetrieveRegularNARVisitor; + }; + Sink & sink; RetrieveRegularNARVisitor(Sink & sink) : sink(sink) { } - void createRegularFile(const Path & path) override + std::unique_ptr createRegularFile(const Path & path, uint64_t size, bool executable) override { - } - - void receiveContents(std::string_view data) override - { - sink(data); + return std::unique_ptr(new MyFileHandle{sink}); } void createDirectory(const Path & path) override diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc index 78d49026a..225483804 100644 --- a/src/libutil/archive.cc +++ b/src/libutil/archive.cc @@ -347,16 +347,13 @@ static WireFormatGenerator restore(NARParseVisitor & sink, Generator }, [&](nar::File f) { return [](auto f, auto & sink) -> WireFormatGenerator { - sink.createRegularFile(f.path); - sink.preallocateContents(f.size); - if (f.executable) { - sink.isExecutable(); - } + auto handle = sink.createRegularFile(f.path, f.size, f.executable); + while (auto block = f.contents.next()) { - sink.receiveContents(std::string_view{block->data(), block->size()}); + handle->receiveContents(std::string_view{block->data(), block->size()}); co_yield *block; } - sink.closeRegularFile(); + handle->close(); }(std::move(f), sink); }, [&](nar::Symlink sl) { @@ -422,8 +419,67 @@ void parseDump(NARParseVisitor & sink, Source & source) struct NARRestoreVisitor : NARParseVisitor { Path dstPath; - AutoCloseFD fd; +private: + class MyFileHandle : public FileHandle + { + AutoCloseFD fd; + + MyFileHandle(AutoCloseFD && fd, uint64_t size, bool executable) : FileHandle(), fd(std::move(fd)) + { + if (executable) { + makeExecutable(); + } + + maybePreallocateContents(size); + } + + void makeExecutable() + { + struct stat st; + if (fstat(fd.get(), &st) == -1) + throw SysError("fstat"); + if (fchmod(fd.get(), st.st_mode | (S_IXUSR | S_IXGRP | S_IXOTH)) == -1) + throw SysError("fchmod"); + } + + void maybePreallocateContents(uint64_t len) + { + if (!archiveSettings.preallocateContents) + return; + +#if HAVE_POSIX_FALLOCATE + if (len) { + errno = posix_fallocate(fd.get(), 0, len); + /* Note that EINVAL may indicate that the underlying + filesystem doesn't support preallocation (e.g. on + OpenSolaris). Since preallocation is just an + optimisation, ignore it. */ + if (errno && errno != EINVAL && errno != EOPNOTSUPP && errno != ENOSYS) + throw SysError("preallocating file of %1% bytes", len); + } +#endif + } + + public: + + ~MyFileHandle() = default; + + virtual void close() override + { + /* Call close explicitly to make sure the error is checked */ + fd.close(); + } + + void receiveContents(std::string_view data) override + { + writeFull(fd.get(), data); + } + + friend struct NARRestoreVisitor; + }; + +public: void createDirectory(const Path & path) override { Path p = dstPath + path; @@ -431,49 +487,13 @@ struct NARRestoreVisitor : NARParseVisitor throw SysError("creating directory '%1%'", p); }; - void createRegularFile(const Path & path) override + std::unique_ptr createRegularFile(const Path & path, uint64_t size, bool executable) override { Path p = dstPath + path; - fd = AutoCloseFD{open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0666)}; + AutoCloseFD fd = AutoCloseFD{open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0666)}; if (!fd) throw SysError("creating file '%1%'", p); - } - void closeRegularFile() override - { - /* Call close explicitly to make sure the error is checked */ - fd.close(); - } - - void isExecutable() override - { - struct stat st; - if (fstat(fd.get(), &st) == -1) - throw SysError("fstat"); - if (fchmod(fd.get(), st.st_mode | (S_IXUSR | S_IXGRP | S_IXOTH)) == -1) - throw SysError("fchmod"); - } - - void preallocateContents(uint64_t len) override - { - if (!archiveSettings.preallocateContents) - return; - -#if HAVE_POSIX_FALLOCATE - if (len) { - errno = posix_fallocate(fd.get(), 0, len); - /* Note that EINVAL may indicate that the underlying - filesystem doesn't support preallocation (e.g. on - OpenSolaris). Since preallocation is just an - optimisation, ignore it. */ - if (errno && errno != EINVAL && errno != EOPNOTSUPP && errno != ENOSYS) - throw SysError("preallocating file of %1% bytes", len); - } -#endif - } - - void receiveContents(std::string_view data) override - { - writeFull(fd.get(), data); + return std::unique_ptr(new MyFileHandle(std::move(fd), size, executable)); } void createSymlink(const Path & path, const std::string & target) override diff --git a/src/libutil/archive.hh b/src/libutil/archive.hh index 5e8db4c53..c633bee00 100644 --- a/src/libutil/archive.hh +++ b/src/libutil/archive.hh @@ -83,15 +83,40 @@ WireFormatGenerator dumpString(std::string_view s); */ struct NARParseVisitor { - virtual void createDirectory(const Path & path) { }; + /** + * A type-erased file handle specific to this particular NARParseVisitor. + */ + struct FileHandle + { + FileHandle() {} + FileHandle(FileHandle const &) = delete; + FileHandle & operator=(FileHandle &) = delete; - virtual void createRegularFile(const Path & path) { }; - virtual void closeRegularFile() { }; - virtual void isExecutable() { }; - virtual void preallocateContents(uint64_t size) { }; - virtual void receiveContents(std::string_view data) { }; + /** Puts one block of data into the file */ + virtual void receiveContents(std::string_view data) { } - virtual void createSymlink(const Path & path, const std::string & target) { }; + /** + * Explicitly closes the file. Further operations may throw an assert. + * This exists so that closing can fail and throw an exception without doing so in a destructor. + */ + virtual void close() { } + + virtual ~FileHandle() = default; + }; + + virtual void createDirectory(const Path & path) { } + + /** + * Creates a regular file in the extraction output with the given size and executable flag. + * The size is guaranteed to be the true size of the file. + */ + [[nodiscard]] + virtual std::unique_ptr createRegularFile(const Path & path, uint64_t size, bool executable) + { + return std::make_unique(); + } + + virtual void createSymlink(const Path & path, const std::string & target) { } }; namespace nar {