libutil: begin porting serialization to generators

generators are a better basis for serializers than streaming into sinks
as we do currently for many reasons, such as being usable as sources if
one wishes to (without requiring an intermediate sink to serialize full
data sets into memory, or boost coroutines to turn sinks into sources),
composing more naturally (as one can just yield a sub-generator instead
of being forced to wrap entire substreams into clunky functions or even
more clunky custom types to implement operator<< on), allowing wrappers
to transform data with clear ownership semantics (removing the need for
explicit memory allocations and Source wrappers), and many other things

Change-Id: I361d89ff556354f6930d9204f55117565f2f7f20
This commit is contained in:
eldritch horrors 2024-03-19 22:22:18 +01:00
parent c65f5dd18e
commit 5eec6418de
5 changed files with 175 additions and 91 deletions

View file

@ -160,8 +160,7 @@ struct TunnelSink : Sink
TunnelSink(Sink & to) : to(to) { } TunnelSink(Sink & to) : to(to) { }
void operator () (std::string_view data) void operator () (std::string_view data)
{ {
to << STDERR_WRITE; to << STDERR_WRITE << data;
writeString(data, to);
} }
}; };

View file

@ -897,7 +897,7 @@ std::exception_ptr RemoteStore::Connection::processStderr(Sink * sink, Source *
if (!source) throw Error("no source"); if (!source) throw Error("no source");
size_t len = readNum<size_t>(from); size_t len = readNum<size_t>(from);
auto buf = std::make_unique<char[]>(len); auto buf = std::make_unique<char[]>(len);
writeString({(const char *) buf.get(), source->read(buf.get(), len)}, to); to << std::string_view((const char *) buf.get(), source->read(buf.get(), len));
to.flush(); to.flush();
} }

View file

@ -330,55 +330,40 @@ void writePadding(size_t len, Sink & sink)
} }
void writeString(std::string_view data, Sink & sink) WireFormatGenerator SerializingTransform::operator()(std::string_view s)
{ {
sink << data.size(); co_yield s.size();
sink(data); co_yield Bytes(s.begin(), s.size());
writePadding(data.size(), sink); co_yield SerializingTransform::padding(s.size());
} }
WireFormatGenerator SerializingTransform::operator()(const Strings & ss)
Sink & operator << (Sink & sink, std::string_view s)
{ {
writeString(s, sink); co_yield ss.size();
return sink; for (const auto & s : ss)
co_yield std::string_view(s);
} }
WireFormatGenerator SerializingTransform::operator()(const StringSet & ss)
template<class T> void writeStrings(const T & ss, Sink & sink)
{ {
sink << ss.size(); co_yield ss.size();
for (auto & i : ss) for (const auto & s : ss)
sink << i; co_yield std::string_view(s);
} }
Sink & operator << (Sink & sink, const Strings & s) WireFormatGenerator SerializingTransform::operator()(const Error & ex)
{
writeStrings(s, sink);
return sink;
}
Sink & operator << (Sink & sink, const StringSet & s)
{
writeStrings(s, sink);
return sink;
}
Sink & operator << (Sink & sink, const Error & ex)
{ {
auto & info = ex.info(); auto & info = ex.info();
sink co_yield "Error";
<< "Error" co_yield info.level;
<< info.level co_yield "Error"; // removed
<< "Error" // removed co_yield info.msg.str();
<< info.msg.str() co_yield 0; // FIXME: info.errPos
<< 0 // FIXME: info.errPos co_yield info.traces.size();
<< info.traces.size();
for (auto & trace : info.traces) { for (auto & trace : info.traces) {
sink << 0; // FIXME: trace.pos co_yield 0; // FIXME: trace.pos
sink << trace.hint.str(); co_yield trace.hint.str();
} }
return sink;
} }

View file

@ -350,12 +350,15 @@ inline Sink & operator<<(Sink & sink, Generator<Bytes> && g)
return sink; return sink;
} }
void writePadding(size_t len, Sink & sink); struct SerializingTransform;
void writeString(std::string_view s, Sink & sink); using WireFormatGenerator = Generator<Bytes, SerializingTransform>;
inline Sink & operator << (Sink & sink, uint64_t n) struct SerializingTransform
{
std::array<unsigned char, 8> buf;
Bytes operator()(uint64_t n)
{ {
unsigned char buf[8];
buf[0] = n & 0xff; buf[0] = n & 0xff;
buf[1] = (n >> 8) & 0xff; buf[1] = (n >> 8) & 0xff;
buf[2] = (n >> 16) & 0xff; buf[2] = (n >> 16) & 0xff;
@ -364,19 +367,65 @@ inline Sink & operator << (Sink & sink, uint64_t n)
buf[5] = (n >> 40) & 0xff; buf[5] = (n >> 40) & 0xff;
buf[6] = (n >> 48) & 0xff; buf[6] = (n >> 48) & 0xff;
buf[7] = (unsigned char) (n >> 56) & 0xff; buf[7] = (unsigned char) (n >> 56) & 0xff;
sink({(char *) buf, sizeof(buf)}); return {reinterpret_cast<const char *>(buf.begin()), 8};
return sink;
} }
Sink & operator << (Sink & in, const Error & ex); static Bytes padding(size_t unpadded)
Sink & operator << (Sink & sink, std::string_view s); {
Sink & operator << (Sink & sink, const Strings & s); return Bytes("\0\0\0\0\0\0\0", unpadded % 8 ? 8 - unpadded % 8 : 0);
Sink & operator << (Sink & sink, const StringSet & s); }
// opt in to generator chaining. without this co_yielding
// another generator of any type will cause a type error.
auto operator()(Generator<Bytes> && g)
{
return std::move(g);
}
// only choose this for *exactly* char spans, do not allow implicit
// conversions. this would cause ambiguities with strings literals,
// and resolving those with more string-like overloads needs a lot.
template<typename Span>
requires std::same_as<Span, std::span<char>> || std::same_as<Span, std::span<const char>>
Bytes operator()(Span s)
{
return s;
}
WireFormatGenerator operator()(std::string_view s);
WireFormatGenerator operator()(const Strings & s);
WireFormatGenerator operator()(const StringSet & s);
WireFormatGenerator operator()(const Error & s);
};
void writePadding(size_t len, Sink & sink);
inline Sink & operator<<(Sink & sink, uint64_t u)
{
return sink << [&]() -> WireFormatGenerator { co_yield u; }();
}
inline Sink & operator<<(Sink & sink, std::string_view s)
{
return sink << [&]() -> WireFormatGenerator { co_yield s; }();
}
inline Sink & operator<<(Sink & sink, const Strings & s)
{
return sink << [&]() -> WireFormatGenerator { co_yield s; }();
}
inline Sink & operator<<(Sink & sink, const StringSet & s)
{
return sink << [&]() -> WireFormatGenerator { co_yield s; }();
}
inline Sink & operator<<(Sink & sink, const Error & ex)
{
return sink << [&]() -> WireFormatGenerator { co_yield ex; }();
}
MakeError(SerialisationError, Error); MakeError(SerialisationError, Error);
template<typename T> template<typename T>
T readNum(Source & source) T readNum(Source & source)
{ {

View file

@ -2,30 +2,47 @@
#include "error.hh" #include "error.hh"
#include "fmt.hh" #include "fmt.hh"
#include "pos-table.hh" #include "pos-table.hh"
#include "generator.hh"
#include "ref.hh" #include "ref.hh"
#include "types.hh" #include "types.hh"
#include <concepts>
#include <cstdint>
#include <initializer_list>
#include <limits.h> #include <limits.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <numeric> #include <numeric>
#include <stdexcept>
#include <string_view>
#include <type_traits>
namespace nix { namespace nix {
TEST(Sink, uint64_t) // don't deduce the type of `val` for added insurance.
template<typename T>
static std::string toWire(const std::type_identity_t<T> & val)
{ {
StringSink s; std::string result;
s << 42; auto g = [] (const auto & val) -> WireFormatGenerator { co_yield val; }(val);
ASSERT_EQ(s.s, std::string({42, 0, 0, 0, 0, 0, 0, 0})); while (auto buffer = g.next()) {
result.append(buffer->data(), buffer->size());
}
return result;
} }
TEST(Sink, string_view) TEST(WireFormatGenerator, uint64_t)
{ {
StringSink s; auto s = toWire<uint64_t>(42);
s << ""; ASSERT_EQ(s, std::string({42, 0, 0, 0, 0, 0, 0, 0}));
}
TEST(WireFormatGenerator, string_view)
{
auto s = toWire<std::string_view>("");
// clang-format off // clang-format off
ASSERT_EQ( ASSERT_EQ(
s.s, s,
std::string({ std::string({
// length // length
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -34,11 +51,10 @@ TEST(Sink, string_view)
); );
// clang-format on // clang-format on
s = {}; s = toWire<std::string_view>("test");
s << "test";
// clang-format off // clang-format off
ASSERT_EQ( ASSERT_EQ(
s.s, s,
std::string({ std::string({
// length // length
4, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0,
@ -50,11 +66,10 @@ TEST(Sink, string_view)
); );
// clang-format on // clang-format on
s = {}; s = toWire<std::string_view>("longer string");
s << "longer string";
// clang-format off // clang-format off
ASSERT_EQ( ASSERT_EQ(
s.s, s,
std::string({ std::string({
// length // length
13, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0,
@ -67,13 +82,12 @@ TEST(Sink, string_view)
// clang-format on // clang-format on
} }
TEST(Sink, StringSet) TEST(WireFormatGenerator, StringSet)
{ {
StringSink s; auto s = toWire<StringSet>({});
s << StringSet{};
// clang-format off // clang-format off
ASSERT_EQ( ASSERT_EQ(
s.s, s,
std::string({ std::string({
// length // length
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -82,11 +96,10 @@ TEST(Sink, StringSet)
); );
// clang-format on // clang-format on
s = {}; s = toWire<StringSet>({"a", ""});
s << StringSet{"a", ""};
// clang-format off // clang-format off
ASSERT_EQ( ASSERT_EQ(
s.s, s,
std::string({ std::string({
// length // length
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
@ -99,13 +112,12 @@ TEST(Sink, StringSet)
// clang-format on // clang-format on
} }
TEST(Sink, Strings) TEST(WireFormatGenerator, Strings)
{ {
StringSink s; auto s = toWire<Strings>({});
s << Strings{};
// clang-format off // clang-format off
ASSERT_EQ( ASSERT_EQ(
s.s, s,
std::string({ std::string({
// length // length
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -114,11 +126,10 @@ TEST(Sink, Strings)
); );
// clang-format on // clang-format on
s = {}; s = toWire<Strings>({"a", ""});
s << Strings{"a", ""};
// clang-format off // clang-format off
ASSERT_EQ( ASSERT_EQ(
s.s, s,
std::string({ std::string({
// length // length
2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
@ -131,23 +142,22 @@ TEST(Sink, Strings)
// clang-format on // clang-format on
} }
TEST(Sink, Error) TEST(WireFormatGenerator, Error)
{ {
PosTable pt; PosTable pt;
auto o = pt.addOrigin(Pos::String{make_ref<std::string>("test")}, 4); auto o = pt.addOrigin(Pos::String{make_ref<std::string>("test")}, 4);
StringSink s; auto s = toWire<Error>(Error{ErrorInfo{
s << Error{ErrorInfo{
.level = lvlInfo, .level = lvlInfo,
.msg = HintFmt("foo"), .msg = HintFmt("foo"),
.pos = pt[pt.add(o, 1)], .pos = pt[pt.add(o, 1)],
.traces = {{.pos = pt[pt.add(o, 2)], .hint = HintFmt("b %1%", "foo")}}, .traces = {{.pos = pt[pt.add(o, 2)], .hint = HintFmt("b %1%", "foo")}},
}}; }});
// NOTE position of the error and all traces are ignored // NOTE position of the error and all traces are ignored
// by the wire format // by the wire format
// clang-format off // clang-format off
ASSERT_EQ( ASSERT_EQ(
s.s, s,
std::string({ std::string({
5, 0, 0, 0, 0, 0, 0, 0, 'E', 'r', 'r', 'o', 'r', 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 'E', 'r', 'r', 'o', 'r', 0, 0, 0,
3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0,
@ -163,4 +173,45 @@ TEST(Sink, Error)
// clang-format on // clang-format on
} }
TEST(WireFormatGenerator, exampleMessage)
{
auto gen = []() -> WireFormatGenerator {
std::set<std::string> foo{"a", "longer string", ""};
co_yield 42;
co_yield foo;
co_yield std::string_view("test");
co_yield true;
}();
std::vector<char> full;
while (auto s = gen.next()) {
full.insert(full.end(), s->begin(), s->end());
}
ASSERT_EQ(
full,
(std::vector<char>{
// clang-format off
// 42
42, 0, 0, 0, 0, 0, 0, 0,
// foo
3, 0, 0, 0, 0, 0, 0, 0,
/// ""
0, 0, 0, 0, 0, 0, 0, 0,
/// a
1, 0, 0, 0, 0, 0, 0, 0,
'a', 0, 0, 0, 0, 0, 0, 0,
/// longer string
13, 0, 0, 0, 0, 0, 0, 0,
'l', 'o', 'n', 'g', 'e', 'r', ' ', 's', 't', 'r', 'i', 'n', 'g', 0, 0, 0,
// foo done
// test
4, 0, 0, 0, 0, 0, 0, 0,
't', 'e', 's', 't', 0, 0, 0, 0,
// true
1, 0, 0, 0, 0, 0, 0, 0,
//clang-format on
}));
}
} }