lix/src/libutil/serialise.hh
Robert Hensing c4d903ddb0 Fix memory corruption caused by GC-invisible coroutine stacks
Crucially this introduces BoehmGCStackAllocator, but it also
adds a bunch of wiring to avoid making libutil depend on bdw-gc.

Part of the solutions for #4178, #4200
2020-10-30 21:21:59 +01:00

515 lines
12 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#include <memory>
#include "types.hh"
#include "util.hh"
namespace boost::context { struct stack_context; }
namespace nix {
/* Abstract destination of binary data. */
struct Sink
{
virtual ~Sink() { }
virtual void operator () (const unsigned char * data, size_t len) = 0;
virtual bool good() { return true; }
void operator () (const std::string & s)
{
(*this)((const unsigned char *) s.data(), s.size());
}
};
/* Just throws away data. */
struct NullSink : Sink
{
void operator () (const unsigned char * data, size_t len) override
{ }
};
/* A buffered abstract sink. Warning: a BufferedSink should not be
used from multiple threads concurrently. */
struct BufferedSink : virtual Sink
{
size_t bufSize, bufPos;
std::unique_ptr<unsigned char[]> buffer;
BufferedSink(size_t bufSize = 32 * 1024)
: bufSize(bufSize), bufPos(0), buffer(nullptr) { }
void operator () (const unsigned char * data, size_t len) override;
void operator () (const std::string & s)
{
Sink::operator()(s);
}
void flush();
virtual void write(const unsigned char * data, size_t len) = 0;
};
/* Abstract source of binary data. */
struct Source
{
virtual ~Source() { }
/* Store exactly len bytes in the buffer pointed to by data.
It blocks until all the requested data is available, or throws
an error if it is not going to be available. */
void operator () (unsigned char * data, size_t len);
/* Store up to len in the buffer pointed to by data, and
return the number of bytes stored. It blocks until at least
one byte is available. */
virtual size_t read(unsigned char * data, size_t len) = 0;
virtual bool good() { return true; }
void drainInto(Sink & sink);
std::string drain();
};
/* A buffered abstract source. Warning: a BufferedSource should not be
used from multiple threads concurrently. */
struct BufferedSource : Source
{
size_t bufSize, bufPosIn, bufPosOut;
std::unique_ptr<unsigned char[]> buffer;
BufferedSource(size_t bufSize = 32 * 1024)
: bufSize(bufSize), bufPosIn(0), bufPosOut(0), buffer(nullptr) { }
size_t read(unsigned char * data, size_t len) override;
bool hasData();
protected:
/* Underlying read call, to be overridden. */
virtual size_t readUnbuffered(unsigned char * data, size_t len) = 0;
};
/* A sink that writes data to a file descriptor. */
struct FdSink : BufferedSink
{
int fd;
bool warn = false;
size_t written = 0;
FdSink() : fd(-1) { }
FdSink(int fd) : fd(fd) { }
FdSink(FdSink&&) = default;
FdSink& operator=(FdSink && s)
{
flush();
fd = s.fd;
s.fd = -1;
warn = s.warn;
written = s.written;
return *this;
}
~FdSink();
void write(const unsigned char * data, size_t len) override;
bool good() override;
private:
bool _good = true;
};
/* A source that reads data from a file descriptor. */
struct FdSource : BufferedSource
{
int fd;
size_t read = 0;
FdSource() : fd(-1) { }
FdSource(int fd) : fd(fd) { }
FdSource(FdSource&&) = default;
FdSource& operator=(FdSource && s)
{
fd = s.fd;
s.fd = -1;
read = s.read;
return *this;
}
bool good() override;
protected:
size_t readUnbuffered(unsigned char * data, size_t len) override;
private:
bool _good = true;
};
/* A sink that writes data to a string. */
struct StringSink : Sink
{
ref<std::string> s;
StringSink() : s(make_ref<std::string>()) { };
explicit StringSink(const size_t reservedSize) : s(make_ref<std::string>()) {
s->reserve(reservedSize);
};
StringSink(ref<std::string> s) : s(s) { };
void operator () (const unsigned char * data, size_t len) override;
};
/* A source that reads data from a string. */
struct StringSource : Source
{
const string & s;
size_t pos;
StringSource(const string & _s) : s(_s), pos(0) { }
size_t read(unsigned char * data, size_t len) override;
};
/* A sink that writes all incoming data to two other sinks. */
struct TeeSink : Sink
{
Sink & sink1, & sink2;
TeeSink(Sink & sink1, Sink & sink2) : sink1(sink1), sink2(sink2) { }
virtual void operator () (const unsigned char * data, size_t len)
{
sink1(data, len);
sink2(data, len);
}
};
/* Adapter class of a Source that saves all data read to a sink. */
struct TeeSource : Source
{
Source & orig;
Sink & sink;
TeeSource(Source & orig, Sink & sink)
: orig(orig), sink(sink) { }
size_t read(unsigned char * data, size_t len)
{
size_t n = orig.read(data, len);
sink(data, n);
return n;
}
};
/* A reader that consumes the original Source until 'size'. */
struct SizedSource : Source
{
Source & orig;
size_t remain;
SizedSource(Source & orig, size_t size)
: orig(orig), remain(size) { }
size_t read(unsigned char * data, size_t len)
{
if (this->remain <= 0) {
throw EndOfFile("sized: unexpected end-of-file");
}
len = std::min(len, this->remain);
size_t n = this->orig.read(data, len);
this->remain -= n;
return n;
}
/* Consume the original source until no remain data is left to consume. */
size_t drainAll()
{
std::vector<unsigned char> buf(8192);
size_t sum = 0;
while (this->remain > 0) {
size_t n = read(buf.data(), buf.size());
sum += n;
}
return sum;
}
};
/* A sink that that just counts the number of bytes given to it */
struct LengthSink : Sink
{
uint64_t length = 0;
virtual void operator () (const unsigned char * _, size_t len)
{
length += len;
}
};
/* Convert a function into a sink. */
struct LambdaSink : Sink
{
typedef std::function<void(const unsigned char *, size_t)> lambda_t;
lambda_t lambda;
LambdaSink(const lambda_t & lambda) : lambda(lambda) { }
virtual void operator () (const unsigned char * data, size_t len)
{
lambda(data, len);
}
};
/* Convert a function into a source. */
struct LambdaSource : Source
{
typedef std::function<size_t(unsigned char *, size_t)> lambda_t;
lambda_t lambda;
LambdaSource(const lambda_t & lambda) : lambda(lambda) { }
size_t read(unsigned char * data, size_t len) override
{
return lambda(data, len);
}
};
/* Chain two sources together so after the first is exhausted, the second is
used */
struct ChainSource : Source
{
Source & source1, & source2;
bool useSecond = false;
ChainSource(Source & s1, Source & s2)
: source1(s1), source2(s2)
{ }
size_t read(unsigned char * data, size_t len) override;
};
/* Convert a function that feeds data into a Sink into a Source. The
Source executes the function as a coroutine. */
std::unique_ptr<Source> sinkToSource(
std::function<void(Sink &)> fun,
std::function<void()> eof = []() {
throw EndOfFile("coroutine has finished");
});
void writePadding(size_t len, Sink & sink);
void writeString(const unsigned char * buf, size_t len, Sink & sink);
inline Sink & operator << (Sink & sink, uint64_t n)
{
unsigned char buf[8];
buf[0] = n & 0xff;
buf[1] = (n >> 8) & 0xff;
buf[2] = (n >> 16) & 0xff;
buf[3] = (n >> 24) & 0xff;
buf[4] = (n >> 32) & 0xff;
buf[5] = (n >> 40) & 0xff;
buf[6] = (n >> 48) & 0xff;
buf[7] = (unsigned char) (n >> 56) & 0xff;
sink(buf, sizeof(buf));
return sink;
}
Sink & operator << (Sink & sink, const string & s);
Sink & operator << (Sink & sink, const Strings & s);
Sink & operator << (Sink & sink, const StringSet & s);
Sink & operator << (Sink & in, const Error & ex);
MakeError(SerialisationError, Error);
template<typename T>
T readNum(Source & source)
{
unsigned char buf[8];
source(buf, sizeof(buf));
uint64_t n =
((uint64_t) buf[0]) |
((uint64_t) buf[1] << 8) |
((uint64_t) buf[2] << 16) |
((uint64_t) buf[3] << 24) |
((uint64_t) buf[4] << 32) |
((uint64_t) buf[5] << 40) |
((uint64_t) buf[6] << 48) |
((uint64_t) buf[7] << 56);
if (n > (uint64_t)std::numeric_limits<T>::max())
throw SerialisationError("serialised integer %d is too large for type '%s'", n, typeid(T).name());
return (T) n;
}
inline unsigned int readInt(Source & source)
{
return readNum<unsigned int>(source);
}
inline uint64_t readLongLong(Source & source)
{
return readNum<uint64_t>(source);
}
void readPadding(size_t len, Source & source);
size_t readString(unsigned char * buf, size_t max, Source & source);
string readString(Source & source, size_t max = std::numeric_limits<size_t>::max());
template<class T> T readStrings(Source & source);
Source & operator >> (Source & in, string & s);
template<typename T>
Source & operator >> (Source & in, T & n)
{
n = readNum<T>(in);
return in;
}
template<typename T>
Source & operator >> (Source & in, bool & b)
{
b = readNum<uint64_t>(in);
return in;
}
Error readError(Source & source);
/* An adapter that converts a std::basic_istream into a source. */
struct StreamToSourceAdapter : Source
{
std::shared_ptr<std::basic_istream<char>> istream;
StreamToSourceAdapter(std::shared_ptr<std::basic_istream<char>> istream)
: istream(istream)
{ }
size_t read(unsigned char * data, size_t len) override
{
if (!istream->read((char *) data, len)) {
if (istream->eof()) {
if (istream->gcount() == 0)
throw EndOfFile("end of file");
} else
throw Error("I/O error in StreamToSourceAdapter");
}
return istream->gcount();
}
};
/* A source that reads a distinct format of concatenated chunks back into its
logical form, in order to guarantee a known state to the original stream,
even in the event of errors.
Use with FramedSink, which also allows the logical stream to be terminated
in the event of an exception.
*/
struct FramedSource : Source
{
Source & from;
bool eof = false;
std::vector<unsigned char> pending;
size_t pos = 0;
FramedSource(Source & from) : from(from)
{ }
~FramedSource()
{
if (!eof) {
while (true) {
auto n = readInt(from);
if (!n) break;
std::vector<unsigned char> data(n);
from(data.data(), n);
}
}
}
size_t read(unsigned char * data, size_t len) override
{
if (eof) throw EndOfFile("reached end of FramedSource");
if (pos >= pending.size()) {
size_t len = readInt(from);
if (!len) {
eof = true;
return 0;
}
pending = std::vector<unsigned char>(len);
pos = 0;
from(pending.data(), len);
}
auto n = std::min(len, pending.size() - pos);
memcpy(data, pending.data() + pos, n);
pos += n;
return n;
}
};
/* Write as chunks in the format expected by FramedSource.
The exception_ptr reference can be used to terminate the stream when you
detect that an error has occurred on the remote end.
*/
struct FramedSink : nix::BufferedSink
{
BufferedSink & to;
std::exception_ptr & ex;
FramedSink(BufferedSink & to, std::exception_ptr & ex) : to(to), ex(ex)
{ }
~FramedSink()
{
try {
to << 0;
to.flush();
} catch (...) {
ignoreException();
}
}
void write(const unsigned char * data, size_t len) override
{
/* Don't send more data if the remote has
encountered an error. */
if (ex) {
auto ex2 = ex;
ex = nullptr;
std::rethrow_exception(ex2);
}
to << len;
to(data, len);
};
};
/* Stack allocation strategy for sinkToSource.
Mutable to avoid a boehm gc dependency in libutil.
boost::context doesn't provide a virtual class, so we define our own.
*/
struct StackAllocator {
virtual boost::context::stack_context allocate() = 0;
virtual void deallocate(boost::context::stack_context sctx) = 0;
/* The stack allocator to use in sinkToSource and potentially elsewhere.
It is reassigned by the initGC() method in libexpr. */
static StackAllocator *defaultAllocator;
};
}