decompress(): Use a Source and Sink

This allows decompression to happen in O(1) memory.
This commit is contained in:
Eelco Dolstra 2018-03-16 16:59:31 +01:00
parent 64441f0551
commit 3e6b194d78
No known key found for this signature in database
GPG key ID: 8170B4726D7198DE
6 changed files with 210 additions and 91 deletions

View file

@ -203,22 +203,18 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink)
stats.narRead++; stats.narRead++;
stats.narReadCompressedBytes += nar->size(); stats.narReadCompressedBytes += nar->size();
/* Decompress the NAR. FIXME: would be nice to have the remote uint64_t narSize = 0;
side do this. */
try {
nar = decompress(info->compression, *nar);
} catch (UnknownCompressionMethod &) {
throw Error(format("binary cache path '%s' uses unknown compression method '%s'")
% storePath % info->compression);
}
stats.narReadBytes += nar->size(); StringSource source(*nar);
printMsg(lvlTalkative, format("exporting path '%1%' (%2% bytes)") % storePath % nar->size()); LambdaSink wrapperSink([&](const unsigned char * data, size_t len) {
sink(data, len);
narSize += len;
});
assert(nar->size() % 8 == 0); decompress(info->compression, source, wrapperSink);
sink((unsigned char *) nar->c_str(), nar->size()); stats.narReadBytes += narSize;
} }
void BinaryCacheStore::queryPathInfoUncached(const Path & storePath, void BinaryCacheStore::queryPathInfoUncached(const Path & storePath,

View file

@ -17,7 +17,23 @@
namespace nix { namespace nix {
static ref<std::string> decompressXZ(const std::string & in) static const size_t bufSize = 32 * 1024;
static void decompressNone(Source & source, Sink & sink)
{
std::vector<unsigned char> buf(bufSize);
while (true) {
size_t n;
try {
n = source.read(buf.data(), buf.size());
} catch (EndOfFile &) {
break;
}
sink(buf.data(), n);
}
}
static void decompressXZ(Source & source, Sink & sink)
{ {
lzma_stream strm(LZMA_STREAM_INIT); lzma_stream strm(LZMA_STREAM_INIT);
@ -29,36 +45,44 @@ static ref<std::string> decompressXZ(const std::string & in)
Finally free([&]() { lzma_end(&strm); }); Finally free([&]() { lzma_end(&strm); });
lzma_action action = LZMA_RUN; lzma_action action = LZMA_RUN;
uint8_t outbuf[BUFSIZ]; std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
ref<std::string> res = make_ref<std::string>(); strm.next_in = nullptr;
strm.next_in = (uint8_t *) in.c_str(); strm.avail_in = 0;
strm.avail_in = in.size(); strm.next_out = outbuf.data();
strm.next_out = outbuf; strm.avail_out = outbuf.size();
strm.avail_out = sizeof(outbuf); bool eof = false;
while (true) { while (true) {
checkInterrupt(); checkInterrupt();
if (strm.avail_in == 0 && !eof) {
strm.next_in = inbuf.data();
try {
strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
} catch (EndOfFile &) {
eof = true;
}
}
if (strm.avail_in == 0) if (strm.avail_in == 0)
action = LZMA_FINISH; action = LZMA_FINISH;
lzma_ret ret = lzma_code(&strm, action); lzma_ret ret = lzma_code(&strm, action);
if (strm.avail_out == 0 || ret == LZMA_STREAM_END) { if (strm.avail_out < outbuf.size()) {
res->append((char *) outbuf, sizeof(outbuf) - strm.avail_out); sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
strm.next_out = outbuf; strm.next_out = outbuf.data();
strm.avail_out = sizeof(outbuf); strm.avail_out = outbuf.size();
} }
if (ret == LZMA_STREAM_END) if (ret == LZMA_STREAM_END) return;
return res;
if (ret != LZMA_OK) if (ret != LZMA_OK)
throw CompressionError("error %d while decompressing xz file", ret); throw CompressionError("error %d while decompressing xz file", ret);
} }
} }
static ref<std::string> decompressBzip2(const std::string & in) static void decompressBzip2(Source & source, Sink & sink)
{ {
bz_stream strm; bz_stream strm;
memset(&strm, 0, sizeof(strm)); memset(&strm, 0, sizeof(strm));
@ -69,39 +93,50 @@ static ref<std::string> decompressBzip2(const std::string & in)
Finally free([&]() { BZ2_bzDecompressEnd(&strm); }); Finally free([&]() { BZ2_bzDecompressEnd(&strm); });
char outbuf[BUFSIZ]; std::vector<char> inbuf(bufSize), outbuf(bufSize);
ref<std::string> res = make_ref<std::string>(); strm.next_in = nullptr;
strm.next_in = (char *) in.c_str(); strm.avail_in = 0;
strm.avail_in = in.size(); strm.next_out = outbuf.data();
strm.next_out = outbuf; strm.avail_out = outbuf.size();
strm.avail_out = sizeof(outbuf); bool eof = false;
while (true) { while (true) {
checkInterrupt(); checkInterrupt();
int ret = BZ2_bzDecompress(&strm); if (strm.avail_in == 0 && !eof) {
strm.next_in = inbuf.data();
if (strm.avail_out == 0 || ret == BZ_STREAM_END) { try {
res->append(outbuf, sizeof(outbuf) - strm.avail_out); strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
strm.next_out = outbuf; } catch (EndOfFile &) {
strm.avail_out = sizeof(outbuf); eof = true;
}
} }
if (ret == BZ_STREAM_END) int ret = BZ2_bzDecompress(&strm);
return res;
if (strm.avail_in == 0 && strm.avail_out == outbuf.size() && eof)
throw CompressionError("bzip2 data ends prematurely");
if (strm.avail_out < outbuf.size()) {
sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
strm.next_out = outbuf.data();
strm.avail_out = outbuf.size();
}
if (ret == BZ_STREAM_END) return;
if (ret != BZ_OK) if (ret != BZ_OK)
throw CompressionError("error while decompressing bzip2 file"); throw CompressionError("error while decompressing bzip2 file");
if (strm.avail_in == 0)
throw CompressionError("bzip2 data ends prematurely");
} }
} }
static ref<std::string> decompressBrotli(const std::string & in) static void decompressBrotli(Source & source, Sink & sink)
{ {
#if !HAVE_BROTLI #if !HAVE_BROTLI
return make_ref<std::string>(runProgram(BROTLI, true, {"-d"}, {in})); RunOptions options(BROTLI, {"-d"});
options.stdin = &source;
options.stdout = &sink;
runProgram2(options);
#else #else
auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
if (!s) if (!s)
@ -109,16 +144,26 @@ static ref<std::string> decompressBrotli(const std::string & in)
Finally free([s]() { BrotliDecoderDestroyInstance(s); }); Finally free([s]() { BrotliDecoderDestroyInstance(s); });
uint8_t outbuf[BUFSIZ]; std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
ref<std::string> res = make_ref<std::string>(); const uint8_t * next_in = nullptr;
const uint8_t *next_in = (uint8_t *)in.c_str(); size_t avail_in = 0;
size_t avail_in = in.size(); bool eof = false;
uint8_t *next_out = outbuf;
size_t avail_out = sizeof(outbuf);
while (true) { while (true) {
checkInterrupt(); checkInterrupt();
if (avail_in == 0 && !eof) {
next_in = inbuf.data();
try {
avail_in = source.read((unsigned char *) next_in, inbuf.size());
} catch (EndOfFile &) {
eof = true;
}
}
uint8_t * next_out = outbuf.data();
size_t avail_out = outbuf.size();
auto ret = BrotliDecoderDecompressStream(s, auto ret = BrotliDecoderDecompressStream(s,
&avail_in, &next_in, &avail_in, &next_in,
&avail_out, &next_out, &avail_out, &next_out,
@ -128,51 +173,49 @@ static ref<std::string> decompressBrotli(const std::string & in)
case BROTLI_DECODER_RESULT_ERROR: case BROTLI_DECODER_RESULT_ERROR:
throw CompressionError("error while decompressing brotli file"); throw CompressionError("error while decompressing brotli file");
case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
if (eof)
throw CompressionError("incomplete or corrupt brotli file"); throw CompressionError("incomplete or corrupt brotli file");
break;
case BROTLI_DECODER_RESULT_SUCCESS: case BROTLI_DECODER_RESULT_SUCCESS:
if (avail_in != 0) if (avail_in != 0)
throw CompressionError("unexpected input after brotli decompression"); throw CompressionError("unexpected input after brotli decompression");
break; break;
case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
// I'm not sure if this can happen, but abort if this happens with empty buffer // I'm not sure if this can happen, but abort if this happens with empty buffer
if (avail_out == sizeof(outbuf)) if (avail_out == outbuf.size())
throw CompressionError("brotli decompression requires larger buffer"); throw CompressionError("brotli decompression requires larger buffer");
break; break;
} }
// Always ensure we have full buffer for next invocation // Always ensure we have full buffer for next invocation
if (avail_out < sizeof(outbuf)) { if (avail_out < outbuf.size())
res->append((char*)outbuf, sizeof(outbuf) - avail_out); sink((unsigned char *) outbuf.data(), outbuf.size() - avail_out);
next_out = outbuf;
avail_out = sizeof(outbuf);
}
if (ret == BROTLI_DECODER_RESULT_SUCCESS) return res; if (ret == BROTLI_DECODER_RESULT_SUCCESS) return;
} }
#endif // HAVE_BROTLI #endif // HAVE_BROTLI
} }
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel)
{
StringSink ssink;
auto sink = makeCompressionSink(method, ssink, parallel);
(*sink)(in);
sink->finish();
return ssink.s;
}
ref<std::string> decompress(const std::string & method, const std::string & in) ref<std::string> decompress(const std::string & method, const std::string & in)
{
StringSource source(in);
StringSink sink;
decompress(method, source, sink);
return sink.s;
}
void decompress(const std::string & method, Source & source, Sink & sink)
{ {
if (method == "none") if (method == "none")
return make_ref<std::string>(in); return decompressNone(source, sink);
else if (method == "xz") else if (method == "xz")
return decompressXZ(in); return decompressXZ(source, sink);
else if (method == "bzip2") else if (method == "bzip2")
return decompressBzip2(in); return decompressBzip2(source, sink);
else if (method == "br") else if (method == "br")
return decompressBrotli(in); return decompressBrotli(source, sink);
else else
throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); throw UnknownCompressionMethod("unknown compression method '%s'", method);
} }
struct NoneSink : CompressionSink struct NoneSink : CompressionSink
@ -499,4 +542,13 @@ ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & next
throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); throw UnknownCompressionMethod(format("unknown compression method '%s'") % method);
} }
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel)
{
StringSink ssink;
auto sink = makeCompressionSink(method, ssink, parallel);
(*sink)(in);
sink->finish();
return ssink.s;
}
} }

View file

@ -8,10 +8,12 @@
namespace nix { namespace nix {
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false);
ref<std::string> decompress(const std::string & method, const std::string & in); ref<std::string> decompress(const std::string & method, const std::string & in);
void decompress(const std::string & method, Source & source, Sink & sink);
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false);
struct CompressionSink : BufferedSink struct CompressionSink : BufferedSink
{ {
virtual void finish() = 0; virtual void finish() = 0;

View file

@ -56,7 +56,7 @@ struct Source
void operator () (unsigned char * data, size_t len); void operator () (unsigned char * data, size_t len);
/* Store up to len in the buffer pointed to by data, and /* Store up to len in the buffer pointed to by data, and
return the number of bytes stored. If blocks until at least return the number of bytes stored. It blocks until at least
one byte is available. */ one byte is available. */
virtual size_t read(unsigned char * data, size_t len) = 0; virtual size_t read(unsigned char * data, size_t len) = 0;
@ -175,6 +175,22 @@ struct TeeSource : Source
}; };
/* Convert a function into a sink. */
struct LambdaSink : Sink
{
typedef std::function<void(const unsigned char *, size_t)> lambda_t;
lambda_t lambda;
LambdaSink(const lambda_t & lambda) : lambda(lambda) { }
virtual void operator () (const unsigned char * data, size_t len)
{
lambda(data, len);
}
};
void writePadding(size_t len, Sink & sink); void writePadding(size_t len, Sink & sink);
void writeString(const unsigned char * buf, size_t len, Sink & sink); void writeString(const unsigned char * buf, size_t len, Sink & sink);

View file

@ -3,6 +3,7 @@
#include "affinity.hh" #include "affinity.hh"
#include "sync.hh" #include "sync.hh"
#include "finally.hh" #include "finally.hh"
#include "serialise.hh"
#include <cctype> #include <cctype>
#include <cerrno> #include <cerrno>
@ -568,19 +569,25 @@ void writeFull(int fd, const string & s, bool allowInterrupts)
string drainFD(int fd) string drainFD(int fd)
{ {
string result; StringSink sink;
unsigned char buffer[4096]; drainFD(fd, sink);
return std::move(*sink.s);
}
void drainFD(int fd, Sink & sink)
{
std::vector<unsigned char> buf(4096);
while (1) { while (1) {
checkInterrupt(); checkInterrupt();
ssize_t rd = read(fd, buffer, sizeof buffer); ssize_t rd = read(fd, buf.data(), buf.size());
if (rd == -1) { if (rd == -1) {
if (errno != EINTR) if (errno != EINTR)
throw SysError("reading from file"); throw SysError("reading from file");
} }
else if (rd == 0) break; else if (rd == 0) break;
else result.append((char *) buffer, rd); else sink(buf.data(), rd);
} }
return result;
} }
@ -920,20 +927,47 @@ string runProgram(Path program, bool searchPath, const Strings & args,
return res.second; return res.second;
} }
std::pair<int, std::string> runProgram(const RunOptions & options) std::pair<int, std::string> runProgram(const RunOptions & options_)
{
RunOptions options(options_);
StringSink sink;
options.stdout = &sink;
int status = 0;
try {
runProgram2(options);
} catch (ExecError & e) {
status = e.status;
}
return {status, std::move(*sink.s)};
}
void runProgram2(const RunOptions & options)
{ {
checkInterrupt(); checkInterrupt();
assert(!(options.stdin && options.input));
std::unique_ptr<Source> source_;
Source * source = options.stdin;
if (options.input) {
source_ = std::make_unique<StringSource>(*options.input);
source = source_.get();
}
/* Create a pipe. */ /* Create a pipe. */
Pipe out, in; Pipe out, in;
out.create(); if (options.stdout) out.create();
if (options.input) in.create(); if (source) in.create();
/* Fork. */ /* Fork. */
Pid pid = startProcess([&]() { Pid pid = startProcess([&]() {
if (dup2(out.writeSide.get(), STDOUT_FILENO) == -1) if (options.stdout && dup2(out.writeSide.get(), STDOUT_FILENO) == -1)
throw SysError("dupping stdout"); throw SysError("dupping stdout");
if (options.input && dup2(in.readSide.get(), STDIN_FILENO) == -1) if (source && dup2(in.readSide.get(), STDIN_FILENO) == -1)
throw SysError("dupping stdin"); throw SysError("dupping stdin");
Strings args_(options.args); Strings args_(options.args);
@ -961,11 +995,20 @@ std::pair<int, std::string> runProgram(const RunOptions & options)
}); });
if (options.input) { if (source) {
in.readSide = -1; in.readSide = -1;
writerThread = std::thread([&]() { writerThread = std::thread([&]() {
try { try {
writeFull(in.writeSide.get(), *options.input); std::vector<unsigned char> buf(8 * 1024);
while (true) {
size_t n;
try {
n = source->read(buf.data(), buf.size());
} catch (EndOfFile &) {
break;
}
writeFull(in.writeSide.get(), buf.data(), n);
}
promise.set_value(); promise.set_value();
} catch (...) { } catch (...) {
promise.set_exception(std::current_exception()); promise.set_exception(std::current_exception());
@ -974,15 +1017,17 @@ std::pair<int, std::string> runProgram(const RunOptions & options)
}); });
} }
string result = drainFD(out.readSide.get()); if (options.stdout)
drainFD(out.readSide.get(), *options.stdout);
/* Wait for the child to finish. */ /* Wait for the child to finish. */
int status = pid.wait(); int status = pid.wait();
/* Wait for the writer thread to finish. */ /* Wait for the writer thread to finish. */
if (options.input) promise.get_future().get(); if (source) promise.get_future().get();
return {status, result}; if (status)
throw ExecError(status, fmt("program '%1%' %2%", options.program, statusToString(status)));
} }

View file

@ -25,6 +25,9 @@
namespace nix { namespace nix {
struct Sink;
struct Source;
/* Return an environment variable. */ /* Return an environment variable. */
string getEnv(const string & key, const string & def = ""); string getEnv(const string & key, const string & def = "");
@ -150,6 +153,7 @@ MakeError(EndOfFile, Error)
/* Read a file descriptor until EOF occurs. */ /* Read a file descriptor until EOF occurs. */
string drainFD(int fd); string drainFD(int fd);
void drainFD(int fd, Sink & sink);
/* Automatic cleanup of resources. */ /* Automatic cleanup of resources. */
@ -256,6 +260,8 @@ struct RunOptions
bool searchPath = true; bool searchPath = true;
Strings args; Strings args;
std::experimental::optional<std::string> input; std::experimental::optional<std::string> input;
Source * stdin = nullptr;
Sink * stdout = nullptr;
bool _killStderr = false; bool _killStderr = false;
RunOptions(const Path & program, const Strings & args) RunOptions(const Path & program, const Strings & args)
@ -266,6 +272,8 @@ struct RunOptions
std::pair<int, std::string> runProgram(const RunOptions & options); std::pair<int, std::string> runProgram(const RunOptions & options);
void runProgram2(const RunOptions & options);
class ExecError : public Error class ExecError : public Error
{ {