libstore: rewrite the nar parser as a contents generator

this is not completely necessary at this point because the parser right
now already returns a generator to pass through all input data it read,
but the nar parser *was* very lax and would accept nars that weren't in
canonical form (defined as the form dumpPath would return). nar hashing
depends on these things, and as such rewriting the parser now allows us
to reject non-canonical nars that extract to the same store contents as
their canonical counterpart but have different nar hashes despite that.

Change-Id: Iccd319e3bd5912d8297014c84c495edc59019bb7
This commit is contained in:
eldritch horrors 2024-05-16 03:17:03 +02:00
parent c052716edd
commit 4ec87742a1
2 changed files with 215 additions and 121 deletions

View file

@ -1,5 +1,6 @@
#include <cerrno> #include <cerrno>
#include <algorithm> #include <algorithm>
#include <string_view>
#include <vector> #include <vector>
#include <map> #include <map>
@ -13,6 +14,8 @@
#include "archive.hh" #include "archive.hh"
#include "file-system.hh" #include "file-system.hh"
#include "finally.hh"
#include "serialise.hh"
#include "config.hh" #include "config.hh"
#include "logging.hh" #include "logging.hh"
#include "signals.hh" #include "signals.hh"
@ -174,31 +177,6 @@ static void skipGeneric(Source & source)
#endif #endif
static WireFormatGenerator parseContents(ParseSink & sink, Source & source, const Path & path)
{
uint64_t size = readLongLong(source);
co_yield size;
sink.preallocateContents(size);
uint64_t left = size;
std::array<char, 65536> buf;
while (left) {
checkInterrupt();
auto n = buf.size();
if ((uint64_t)n > left) n = left;
source(buf.data(), n);
co_yield std::span{buf.data(), n};
sink.receiveContents({buf.data(), n});
left -= n;
}
readPadding(size, source);
co_yield SerializingTransform::padding(size);
}
struct CaseInsensitiveCompare struct CaseInsensitiveCompare
{ {
bool operator() (const std::string & a, const std::string & b) const bool operator() (const std::string & a, const std::string & b) const
@ -207,129 +185,201 @@ struct CaseInsensitiveCompare
} }
}; };
static WireFormatGenerator parse(ParseSink & sink, Source & source, const Path & path) namespace nar {
static Generator<Entry> parseObject(Source & source, const Path & path)
{ {
std::string s; #define EXPECT(raw, kind) \
do { \
const auto s = readString(source); \
if (s != raw) { \
throw badArchive("expected " kind " tag"); \
} \
co_yield MetadataString{s}; \
} while (0)
s = readString(source); EXPECT("(", "open");
co_yield s; EXPECT("type", "type");
if (s != "(") throw badArchive("expected open tag");
enum { tpUnknown, tpRegular, tpDirectory, tpSymlink } type = tpUnknown; checkInterrupt();
std::map<Path, int, CaseInsensitiveCompare> names; const auto t = readString(source);
co_yield MetadataString{t};
while (1) { if (t == "regular") {
checkInterrupt(); auto contentsOrFlag = readString(source);
co_yield MetadataString{contentsOrFlag};
s = readString(source); const bool executable = contentsOrFlag == "executable";
co_yield s; if (executable) {
if (s == ")") {
break;
}
else if (s == "type") {
if (type != tpUnknown)
throw badArchive("multiple type fields");
std::string t = readString(source);
co_yield t;
if (t == "regular") {
type = tpRegular;
sink.createRegularFile(path);
}
else if (t == "directory") {
sink.createDirectory(path);
type = tpDirectory;
}
else if (t == "symlink") {
type = tpSymlink;
}
else throw badArchive("unknown file type " + t);
}
else if (s == "contents" && type == tpRegular) {
co_yield parseContents(sink, source, path);
sink.closeRegularFile();
}
else if (s == "executable" && type == tpRegular) {
auto s = readString(source); auto s = readString(source);
co_yield s; co_yield MetadataString{s};
if (s != "") throw badArchive("executable marker has non-empty value"); if (s != "") {
sink.isExecutable(); throw badArchive("executable marker has non-empty value");
}
contentsOrFlag = readString(source);
co_yield MetadataString{contentsOrFlag};
} }
if (contentsOrFlag == "contents") {
const uint64_t size = readLongLong(source);
co_yield MetadataRaw{SerializingTransform()(size)};
auto makeReader = [](Source & source, uint64_t & left) -> Generator<Bytes> {
std::array<char, 65536> buf;
else if (s == "entry" && type == tpDirectory) { while (left) {
std::string name, prevName; checkInterrupt();
auto n = std::min<uint64_t>(buf.size(), left);
s = readString(source); source(buf.data(), n);
co_yield s; co_yield std::span{buf.data(), n};
if (s != "(") throw badArchive("expected open tag"); left -= n;
}
};
auto left = size;
co_yield File{path, executable, size, makeReader(source, left)};
// we could drain the remainder of the file, but coroutines being interruptible
// at any time makes this difficult. for files this is not that hard, but being
// consistent with directories is more important than handling the simple case.
assert(left == 0);
readPadding(size, source);
co_yield MetadataRaw{SerializingTransform::padding(size)};
} else {
throw badArchive("file without contents found: " + path);
}
} else if (t == "directory") {
auto makeReader = [](Source & source, const Path & path, bool & completed
) -> Generator<Entry> {
std::map<Path, int, CaseInsensitiveCompare> names;
std::string prevName;
while (1) { while (1) {
checkInterrupt(); checkInterrupt();
s = readString(source); {
co_yield s; const auto s = readString(source);
co_yield MetadataString{s};
if (s == ")") { if (s == ")") {
break; completed = true;
} else if (s == "name") { co_return;
name = readString(source); } else if (s != "entry") {
co_yield name; throw badArchive("expected entry tag");
if (name.empty() || name == "." || name == ".." || name.find('/') != std::string::npos || name.find((char) 0) != std::string::npos)
throw Error("NAR contains invalid file name '%1%'", name);
if (name <= prevName)
throw Error("NAR directory is not sorted");
prevName = name;
if (archiveSettings.useCaseHack) {
auto i = names.find(name);
if (i != names.end()) {
debug("case collision between '%1%' and '%2%'", i->first, name);
name += caseHackSuffix;
name += std::to_string(++i->second);
} else
names[name] = 0;
} }
} else if (s == "node") { EXPECT("(", "open");
if (name.empty()) throw badArchive("entry name missing"); }
co_yield parse(sink, source, path + "/" + name);
} else EXPECT("name", "name");
throw badArchive("unknown field " + s); auto name = readString(source);
co_yield MetadataString{name};
if (name.empty() || name == "." || name == ".."
|| name.find('/') != std::string::npos
|| name.find((char) 0) != std::string::npos)
{
throw Error("NAR contains invalid file name '%1%'", name);
}
if (name <= prevName) {
throw Error("NAR directory is not sorted");
}
prevName = name;
if (archiveSettings.useCaseHack) {
auto i = names.find(name);
if (i != names.end()) {
debug("case collision between '%1%' and '%2%'", i->first, name);
name += caseHackSuffix;
name += std::to_string(++i->second);
} else {
names[name] = 0;
}
}
EXPECT("node", "node");
co_yield parseObject(source, path + "/" + name);
EXPECT(")", "close");
} }
} };
bool completed = false;
else if (s == "target" && type == tpSymlink) { co_yield Directory{path, makeReader(source, path, completed)};
std::string target = readString(source); // directories may nest, so to drain a directory properly we'd have to add a Finally
co_yield target; // argument to the generator to ensure that the draining code is always run. this is
sink.createSymlink(path, target); // usually not necessary, hard to follow, and rather error-prone on top of all that.
} assert(completed);
// directories are terminated already, don't try to read another ")"
else co_return;
throw badArchive("unknown field " + s); } else if (t == "symlink") {
EXPECT("target", "target");
std::string target = readString(source);
co_yield MetadataString{target};
co_yield Symlink{path, target};
} else {
throw badArchive("unknown file type " + t);
} }
EXPECT(")", "close");
#undef EXPECT
} }
Generator<Entry> parse(Source & source)
WireFormatGenerator parseAndCopyDump(ParseSink & sink, Source & source)
{ {
std::string version; std::string version;
try { try {
version = readString(source, narVersionMagic1.size()); version = readString(source, narVersionMagic1.size());
co_yield version; co_yield MetadataString{version};
} catch (SerialisationError & e) { } catch (SerialisationError & e) {
/* This generally means the integer at the start couldn't be /* This generally means the integer at the start couldn't be
decoded. Ignore and throw the exception below. */ decoded. Ignore and throw the exception below. */
} }
if (version != narVersionMagic1) if (version != narVersionMagic1)
throw badArchive("input doesn't look like a Nix archive"); throw badArchive("input doesn't look like a Nix archive");
co_yield parse(sink, source, ""); co_yield parseObject(source, "");
}
}
static WireFormatGenerator restore(ParseSink & sink, Generator<nar::Entry> nar)
{
while (auto entry = nar.next()) {
co_yield std::visit(
overloaded{
[](nar::MetadataString m) -> WireFormatGenerator {
co_yield m.data;
},
[](nar::MetadataRaw r) -> WireFormatGenerator {
co_yield r.raw;
},
[&](nar::File f) {
return [](auto f, auto & sink) -> WireFormatGenerator {
sink.createRegularFile(f.path);
sink.preallocateContents(f.size);
if (f.executable) {
sink.isExecutable();
}
while (auto block = f.contents.next()) {
sink.receiveContents(std::string_view{block->data(), block->size()});
co_yield *block;
}
sink.closeRegularFile();
}(std::move(f), sink);
},
[&](nar::Symlink sl) {
return [](auto sl, auto & sink) -> WireFormatGenerator {
sink.createSymlink(sl.path, sl.target);
co_return;
}(std::move(sl), sink);
},
[&](nar::Directory d) {
return [](auto d, auto & sink) -> WireFormatGenerator {
sink.createDirectory(d.path);
return restore(sink, std::move(d.contents));
}(std::move(d), sink);
},
},
std::move(*entry)
);
}
}
WireFormatGenerator parseAndCopyDump(ParseSink & sink, Source & source)
{
return restore(sink, nar::parse(source));
} }
void parseDump(ParseSink & sink, Source & source) void parseDump(ParseSink & sink, Source & source)

View file

@ -1,6 +1,7 @@
#pragma once #pragma once
///@file ///@file
#include "generator.hh"
#include "types.hh" #include "types.hh"
#include "serialise.hh" #include "serialise.hh"
#include "file-system.hh" #include "file-system.hh"
@ -116,6 +117,49 @@ struct RetrieveRegularNARSink : ParseSink
} }
}; };
namespace nar {
struct MetadataString;
struct MetadataRaw;
struct File;
struct Symlink;
struct Directory;
using Entry = std::variant<MetadataString, MetadataRaw, File, Symlink, Directory>;
struct MetadataString
{
std::string_view data;
};
struct MetadataRaw
{
Bytes raw;
};
struct File
{
const Path & path;
bool executable;
uint64_t size;
Generator<Bytes> contents;
};
struct Symlink
{
const Path & path;
const Path & target;
};
struct Directory
{
const Path & path;
Generator<Entry> contents;
};
Generator<Entry> parse(Source & source);
}
WireFormatGenerator parseAndCopyDump(ParseSink & sink, Source & source); WireFormatGenerator parseAndCopyDump(ParseSink & sink, Source & source);
void parseDump(ParseSink & sink, Source & source); void parseDump(ParseSink & sink, Source & source);