Fix Brotli decompression in 'nix log'

This didn't work anymore since decompression was only done in the
non-coroutine case.

Decompressors are now sinks, just like compressors.

Also fixed a bug in bzip2 API handling (we have to handle BZ_RUN_OK
rather than BZ_OK), which we didn't notice because there was a missing
'throw':

  if (ret != BZ_OK)
      CompressionError("error while compressing bzip2 file");
This commit is contained in:
Eelco Dolstra 2018-08-06 15:40:29 +02:00
parent fa4def3d46
commit d3761f5f8b
No known key found for this signature in database
GPG key ID: 8170B4726D7198DE
7 changed files with 339 additions and 411 deletions

View file

@ -217,17 +217,6 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink)
{ {
auto info = queryPathInfo(storePath).cast<const NarInfo>(); auto info = queryPathInfo(storePath).cast<const NarInfo>();
auto source = sinkToSource([this, url{info->url}](Sink & sink) {
try {
getFile(url, sink);
} catch (NoSuchBinaryCacheFile & e) {
throw SubstituteGone(e.what());
}
});
stats.narRead++;
//stats.narReadCompressedBytes += nar->size(); // FIXME
uint64_t narSize = 0; uint64_t narSize = 0;
LambdaSink wrapperSink([&](const unsigned char * data, size_t len) { LambdaSink wrapperSink([&](const unsigned char * data, size_t len) {
@ -235,8 +224,18 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink)
narSize += len; narSize += len;
}); });
decompress(info->compression, *source, wrapperSink); auto decompressor = makeDecompressionSink(info->compression, wrapperSink);
try {
getFile(info->url, *decompressor);
} catch (NoSuchBinaryCacheFile & e) {
throw SubstituteGone(e.what());
}
decompressor->flush();
stats.narRead++;
//stats.narReadCompressedBytes += nar->size(); // FIXME
stats.narReadBytes += narSize; stats.narReadBytes += narSize;
} }

View file

@ -39,20 +39,15 @@ void builtinFetchurl(const BasicDerivation & drv, const std::string & netrcData)
request.verifyTLS = false; request.verifyTLS = false;
request.decompress = false; request.decompress = false;
downloader->download(std::move(request), sink); auto decompressor = makeDecompressionSink(
hasSuffix(mainUrl, ".xz") ? "xz" : "none", sink);
downloader->download(std::move(request), *decompressor);
decompressor->finish();
}); });
if (get(drv.env, "unpack", "") == "1") { if (get(drv.env, "unpack", "") == "1")
if (hasSuffix(mainUrl, ".xz")) {
auto source2 = sinkToSource([&](Sink & sink) {
decompress("xz", *source, sink);
});
restorePath(storePath, *source2);
} else
restorePath(storePath, *source); restorePath(storePath, *source);
else
} else
writeFile(storePath, *source); writeFile(storePath, *source);
auto executable = drv.env.find("executable"); auto executable = drv.env.find("executable");

View file

@ -58,16 +58,6 @@ std::string resolveUri(const std::string & uri)
return uri; return uri;
} }
ref<std::string> decodeContent(const std::string & encoding, ref<std::string> data)
{
if (encoding == "")
return data;
else if (encoding == "br")
return decompress(encoding, *data);
else
throw Error("unsupported Content-Encoding '%s'", encoding);
}
struct CurlDownloader : public Downloader struct CurlDownloader : public Downloader
{ {
CURLM * curlm = 0; CURLM * curlm = 0;
@ -106,6 +96,12 @@ struct CurlDownloader : public Downloader
fmt(request.data ? "uploading '%s'" : "downloading '%s'", request.uri), fmt(request.data ? "uploading '%s'" : "downloading '%s'", request.uri),
{request.uri}, request.parentAct) {request.uri}, request.parentAct)
, callback(callback) , callback(callback)
, finalSink([this](const unsigned char * data, size_t len) {
if (this->request.dataCallback)
this->request.dataCallback((char *) data, len);
else
this->result.data->append((char *) data, len);
})
{ {
if (!request.expectedETag.empty()) if (!request.expectedETag.empty())
requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + request.expectedETag).c_str()); requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + request.expectedETag).c_str());
@ -129,23 +125,40 @@ struct CurlDownloader : public Downloader
} }
} }
template<class T> void failEx(std::exception_ptr ex)
void fail(const T & e)
{ {
assert(!done); assert(!done);
done = true; done = true;
callback.rethrow(std::make_exception_ptr(e)); callback.rethrow(ex);
} }
template<class T>
void fail(const T & e)
{
failEx(std::make_exception_ptr(e));
}
LambdaSink finalSink;
std::shared_ptr<CompressionSink> decompressionSink;
std::exception_ptr writeException;
size_t writeCallback(void * contents, size_t size, size_t nmemb) size_t writeCallback(void * contents, size_t size, size_t nmemb)
{ {
try {
size_t realSize = size * nmemb; size_t realSize = size * nmemb;
result.bodySize += realSize; result.bodySize += realSize;
if (request.dataCallback)
request.dataCallback((char *) contents, realSize); if (!decompressionSink)
else decompressionSink = makeDecompressionSink(encoding, finalSink);
result.data->append((char *) contents, realSize);
(*decompressionSink)((unsigned char *) contents, realSize);
return realSize; return realSize;
} catch (...) {
writeException = std::current_exception();
return 0;
}
} }
static size_t writeCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp) static size_t writeCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp)
@ -314,27 +327,33 @@ struct CurlDownloader : public Downloader
debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes", debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes",
request.verb(), request.uri, code, httpStatus, result.bodySize); request.verb(), request.uri, code, httpStatus, result.bodySize);
if (decompressionSink)
decompressionSink->finish();
if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) { if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) {
code = CURLE_OK; code = CURLE_OK;
httpStatus = 304; httpStatus = 304;
} }
if (code == CURLE_OK && if (writeException)
failEx(writeException);
else if (code == CURLE_OK &&
(httpStatus == 200 || httpStatus == 201 || httpStatus == 204 || httpStatus == 304 || httpStatus == 226 /* FTP */ || httpStatus == 0 /* other protocol */)) (httpStatus == 200 || httpStatus == 201 || httpStatus == 204 || httpStatus == 304 || httpStatus == 226 /* FTP */ || httpStatus == 0 /* other protocol */))
{ {
result.cached = httpStatus == 304; result.cached = httpStatus == 304;
done = true; done = true;
try { try {
if (request.decompress)
result.data = decodeContent(encoding, ref<std::string>(result.data));
act.progress(result.data->size(), result.data->size()); act.progress(result.data->size(), result.data->size());
callback(std::move(result)); callback(std::move(result));
} catch (...) { } catch (...) {
done = true; done = true;
callback.rethrow(); callback.rethrow();
} }
} else { }
else {
// We treat most errors as transient, but won't retry when hopeless // We treat most errors as transient, but won't retry when hopeless
Error err = Transient; Error err = Transient;
@ -369,6 +388,7 @@ struct CurlDownloader : public Downloader
case CURLE_UNKNOWN_OPTION: case CURLE_UNKNOWN_OPTION:
case CURLE_SSL_CACERT_BADFILE: case CURLE_SSL_CACERT_BADFILE:
case CURLE_TOO_MANY_REDIRECTS: case CURLE_TOO_MANY_REDIRECTS:
case CURLE_WRITE_ERROR:
err = Misc; err = Misc;
break; break;
default: // Shut up warnings default: // Shut up warnings

View file

@ -88,7 +88,4 @@ public:
bool isUri(const string & s); bool isUri(const string & s);
/* Decode data according to the Content-Encoding header. */
ref<std::string> decodeContent(const std::string & encoding, ref<std::string> data);
} }

View file

@ -153,10 +153,8 @@ S3Helper::DownloadResult S3Helper::getObject(
auto result = checkAws(fmt("AWS error fetching '%s'", key), auto result = checkAws(fmt("AWS error fetching '%s'", key),
client->GetObject(request)); client->GetObject(request));
res.data = decodeContent( res.data = decompress(result.GetContentEncoding(),
result.GetContentEncoding(), dynamic_cast<std::stringstream &>(result.GetBody()).str());
make_ref<std::string>(
dynamic_cast<std::stringstream &>(result.GetBody()).str()));
} catch (S3Error & e) { } catch (S3Error & e) {
if (e.err != Aws::S3::S3Errors::NO_SUCH_KEY) throw; if (e.err != Aws::S3::S3Errors::NO_SUCH_KEY) throw;

View file

@ -17,197 +17,24 @@ namespace nix {
static const size_t bufSize = 32 * 1024; static const size_t bufSize = 32 * 1024;
static void decompressNone(Source & source, Sink & sink) // Don't feed brotli too much at once.
struct ChunkedCompressionSink : CompressionSink
{ {
std::vector<unsigned char> buf(bufSize); uint8_t outbuf[BUFSIZ];
while (true) {
size_t n;
try {
n = source.read(buf.data(), buf.size());
} catch (EndOfFile &) {
break;
}
sink(buf.data(), n);
}
}
static void decompressXZ(Source & source, Sink & sink) void write(const unsigned char * data, size_t len) override
{ {
lzma_stream strm(LZMA_STREAM_INIT); const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
while (len) {
lzma_ret ret = lzma_stream_decoder( size_t n = std::min(CHUNK_SIZE, len);
&strm, UINT64_MAX, LZMA_CONCATENATED); writeInternal(data, n);
if (ret != LZMA_OK) data += n;
throw CompressionError("unable to initialise lzma decoder"); len -= n;
Finally free([&]() { lzma_end(&strm); });
lzma_action action = LZMA_RUN;
std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
strm.next_in = nullptr;
strm.avail_in = 0;
strm.next_out = outbuf.data();
strm.avail_out = outbuf.size();
bool eof = false;
while (true) {
checkInterrupt();
if (strm.avail_in == 0 && !eof) {
strm.next_in = inbuf.data();
try {
strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
} catch (EndOfFile &) {
eof = true;
} }
} }
if (strm.avail_in == 0) virtual void writeInternal(const unsigned char * data, size_t len) = 0;
action = LZMA_FINISH; };
lzma_ret ret = lzma_code(&strm, action);
if (strm.avail_out < outbuf.size()) {
sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
strm.next_out = outbuf.data();
strm.avail_out = outbuf.size();
}
if (ret == LZMA_STREAM_END) return;
if (ret != LZMA_OK)
throw CompressionError("error %d while decompressing xz file", ret);
}
}
static void decompressBzip2(Source & source, Sink & sink)
{
bz_stream strm;
memset(&strm, 0, sizeof(strm));
int ret = BZ2_bzDecompressInit(&strm, 0, 0);
if (ret != BZ_OK)
throw CompressionError("unable to initialise bzip2 decoder");
Finally free([&]() { BZ2_bzDecompressEnd(&strm); });
std::vector<char> inbuf(bufSize), outbuf(bufSize);
strm.next_in = nullptr;
strm.avail_in = 0;
strm.next_out = outbuf.data();
strm.avail_out = outbuf.size();
bool eof = false;
while (true) {
checkInterrupt();
if (strm.avail_in == 0 && !eof) {
strm.next_in = inbuf.data();
try {
strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
} catch (EndOfFile &) {
eof = true;
}
}
int ret = BZ2_bzDecompress(&strm);
if (strm.avail_in == 0 && strm.avail_out == outbuf.size() && eof)
throw CompressionError("bzip2 data ends prematurely");
if (strm.avail_out < outbuf.size()) {
sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
strm.next_out = outbuf.data();
strm.avail_out = outbuf.size();
}
if (ret == BZ_STREAM_END) return;
if (ret != BZ_OK)
throw CompressionError("error while decompressing bzip2 file");
}
}
static void decompressBrotli(Source & source, Sink & sink)
{
auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
if (!s)
throw CompressionError("unable to initialize brotli decoder");
Finally free([s]() { BrotliDecoderDestroyInstance(s); });
std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
const uint8_t * next_in = nullptr;
size_t avail_in = 0;
bool eof = false;
while (true) {
checkInterrupt();
if (avail_in == 0 && !eof) {
next_in = inbuf.data();
try {
avail_in = source.read((unsigned char *) next_in, inbuf.size());
} catch (EndOfFile &) {
eof = true;
}
}
uint8_t * next_out = outbuf.data();
size_t avail_out = outbuf.size();
auto ret = BrotliDecoderDecompressStream(s,
&avail_in, &next_in,
&avail_out, &next_out,
nullptr);
switch (ret) {
case BROTLI_DECODER_RESULT_ERROR:
throw CompressionError("error while decompressing brotli file");
case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
if (eof)
throw CompressionError("incomplete or corrupt brotli file");
break;
case BROTLI_DECODER_RESULT_SUCCESS:
if (avail_in != 0)
throw CompressionError("unexpected input after brotli decompression");
break;
case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
// I'm not sure if this can happen, but abort if this happens with empty buffer
if (avail_out == outbuf.size())
throw CompressionError("brotli decompression requires larger buffer");
break;
}
// Always ensure we have full buffer for next invocation
if (avail_out < outbuf.size())
sink((unsigned char *) outbuf.data(), outbuf.size() - avail_out);
if (ret == BROTLI_DECODER_RESULT_SUCCESS) return;
}
}
ref<std::string> decompress(const std::string & method, const std::string & in)
{
StringSource source(in);
StringSink sink;
decompress(method, source, sink);
return sink.s;
}
void decompress(const std::string & method, Source & source, Sink & sink)
{
if (method == "none")
return decompressNone(source, sink);
else if (method == "xz")
return decompressXZ(source, sink);
else if (method == "bzip2")
return decompressBzip2(source, sink);
else if (method == "br")
return decompressBrotli(source, sink);
else
throw UnknownCompressionMethod("unknown compression method '%s'", method);
}
struct NoneSink : CompressionSink struct NoneSink : CompressionSink
{ {
@ -217,28 +44,25 @@ struct NoneSink : CompressionSink
void write(const unsigned char * data, size_t len) override { nextSink(data, len); } void write(const unsigned char * data, size_t len) override { nextSink(data, len); }
}; };
struct XzSink : CompressionSink struct XzDecompressionSink : CompressionSink
{ {
Sink & nextSink; Sink & nextSink;
uint8_t outbuf[BUFSIZ]; uint8_t outbuf[BUFSIZ];
lzma_stream strm = LZMA_STREAM_INIT; lzma_stream strm = LZMA_STREAM_INIT;
bool finished = false; bool finished = false;
template <typename F> XzDecompressionSink(Sink & nextSink) : nextSink(nextSink)
XzSink(Sink & nextSink, F&& initEncoder) : nextSink(nextSink) { {
lzma_ret ret = initEncoder(); lzma_ret ret = lzma_stream_decoder(
&strm, UINT64_MAX, LZMA_CONCATENATED);
if (ret != LZMA_OK) if (ret != LZMA_OK)
throw CompressionError("unable to initialise lzma encoder"); throw CompressionError("unable to initialise lzma decoder");
// FIXME: apply the x86 BCJ filter?
strm.next_out = outbuf; strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf); strm.avail_out = sizeof(outbuf);
} }
XzSink(Sink & nextSink) : XzSink(nextSink, [this]() {
return lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64);
}) {}
~XzSink() ~XzDecompressionSink()
{ {
lzma_end(&strm); lzma_end(&strm);
} }
@ -246,43 +70,25 @@ struct XzSink : CompressionSink
void finish() override void finish() override
{ {
CompressionSink::flush(); CompressionSink::flush();
write(nullptr, 0);
assert(!finished);
finished = true;
while (true) {
checkInterrupt();
lzma_ret ret = lzma_code(&strm, LZMA_FINISH);
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
throw CompressionError("error while flushing xz file");
if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
if (ret == LZMA_STREAM_END) break;
}
} }
void write(const unsigned char * data, size_t len) override void write(const unsigned char * data, size_t len) override
{ {
assert(!finished);
strm.next_in = data; strm.next_in = data;
strm.avail_in = len; strm.avail_in = len;
while (strm.avail_in) { while (!finished && (!data || strm.avail_in)) {
checkInterrupt(); checkInterrupt();
lzma_ret ret = lzma_code(&strm, LZMA_RUN); lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH);
if (ret != LZMA_OK) if (ret != LZMA_OK && ret != LZMA_STREAM_END)
throw CompressionError("error while compressing xz file"); throw CompressionError("error %d while decompressing xz file", ret);
if (strm.avail_out == 0) { finished = ret == LZMA_STREAM_END;
nextSink(outbuf, sizeof(outbuf));
if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = outbuf; strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf); strm.avail_out = sizeof(outbuf);
} }
@ -290,10 +96,147 @@ struct XzSink : CompressionSink
} }
}; };
#ifdef HAVE_LZMA_MT struct BzipDecompressionSink : ChunkedCompressionSink
struct ParallelXzSink : public XzSink
{ {
ParallelXzSink(Sink &nextSink) : XzSink(nextSink, [this]() { Sink & nextSink;
bz_stream strm;
bool finished = false;
BzipDecompressionSink(Sink & nextSink) : nextSink(nextSink)
{
memset(&strm, 0, sizeof(strm));
int ret = BZ2_bzDecompressInit(&strm, 0, 0);
if (ret != BZ_OK)
throw CompressionError("unable to initialise bzip2 decoder");
strm.next_out = (char *) outbuf;
strm.avail_out = sizeof(outbuf);
}
~BzipDecompressionSink()
{
BZ2_bzDecompressEnd(&strm);
}
void finish() override
{
flush();
write(nullptr, 0);
}
void writeInternal(const unsigned char * data, size_t len)
{
assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());
strm.next_in = (char *) data;
strm.avail_in = len;
while (strm.avail_in) {
checkInterrupt();
int ret = BZ2_bzDecompress(&strm);
if (ret != BZ_OK && ret != BZ_STREAM_END)
throw CompressionError("error while decompressing bzip2 file");
finished = ret == BZ_STREAM_END;
if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = (char *) outbuf;
strm.avail_out = sizeof(outbuf);
}
}
}
};
struct BrotliDecompressionSink : ChunkedCompressionSink
{
Sink & nextSink;
BrotliDecoderState * state;
bool finished = false;
BrotliDecompressionSink(Sink & nextSink) : nextSink(nextSink)
{
state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
if (!state)
throw CompressionError("unable to initialize brotli decoder");
}
~BrotliDecompressionSink()
{
BrotliDecoderDestroyInstance(state);
}
void finish() override
{
flush();
writeInternal(nullptr, 0);
}
void writeInternal(const unsigned char * data, size_t len)
{
const uint8_t * next_in = data;
size_t avail_in = len;
uint8_t * next_out = outbuf;
size_t avail_out = sizeof(outbuf);
while (!finished && (!data || avail_in)) {
checkInterrupt();
if (!BrotliDecoderDecompressStream(state,
&avail_in, &next_in,
&avail_out, &next_out,
nullptr))
throw CompressionError("error while decompressing brotli file");
if (avail_out < sizeof(outbuf) || avail_in == 0) {
nextSink(outbuf, sizeof(outbuf) - avail_out);
next_out = outbuf;
avail_out = sizeof(outbuf);
}
finished = BrotliDecoderIsFinished(state);
}
}
};
ref<std::string> decompress(const std::string & method, const std::string & in)
{
StringSink ssink;
auto sink = makeDecompressionSink(method, ssink);
(*sink)(in);
sink->finish();
return ssink.s;
}
ref<CompressionSink> makeDecompressionSink(const std::string & method, Sink & nextSink)
{
if (method == "none" || method == "")
return make_ref<NoneSink>(nextSink);
else if (method == "xz")
return make_ref<XzDecompressionSink>(nextSink);
else if (method == "bzip2")
return make_ref<BzipDecompressionSink>(nextSink);
else if (method == "br")
return make_ref<BrotliDecompressionSink>(nextSink);
else
throw UnknownCompressionMethod("unknown compression method '%s'", method);
}
struct XzCompressionSink : CompressionSink
{
Sink & nextSink;
uint8_t outbuf[BUFSIZ];
lzma_stream strm = LZMA_STREAM_INIT;
bool finished = false;
XzCompressionSink(Sink & nextSink, bool parallel) : nextSink(nextSink)
{
lzma_ret ret;
bool done = false;
if (parallel) {
#ifdef HAVE_LZMA_MT
lzma_mt mt_options = {}; lzma_mt mt_options = {};
mt_options.flags = 0; mt_options.flags = 0;
mt_options.timeout = 300; // Using the same setting as the xz cmd line mt_options.timeout = 300; // Using the same setting as the xz cmd line
@ -306,30 +249,77 @@ struct ParallelXzSink : public XzSink
mt_options.threads = 1; mt_options.threads = 1;
// FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
// number of threads. // number of threads.
return lzma_stream_encoder_mt(&strm, &mt_options); ret = lzma_stream_encoder_mt(&strm, &mt_options);
}) {} done = true;
}; #else
printMsg(lvlError, "warning: parallel compression requested but not supported for metho d '%1%', falling back to single-threaded compression", method);
#endif #endif
}
struct BzipSink : CompressionSink if (!done)
ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64);
if (ret != LZMA_OK)
throw CompressionError("unable to initialise lzma encoder");
// FIXME: apply the x86 BCJ filter?
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
~XzCompressionSink()
{
lzma_end(&strm);
}
void finish() override
{
CompressionSink::flush();
write(nullptr, 0);
}
void write(const unsigned char * data, size_t len) override
{
strm.next_in = data;
strm.avail_in = len;
while (!finished && (!data || strm.avail_in)) {
checkInterrupt();
lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH);
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
throw CompressionError("error %d while compressing xz file", ret);
finished = ret == LZMA_STREAM_END;
if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
}
}
};
struct BzipCompressionSink : ChunkedCompressionSink
{ {
Sink & nextSink; Sink & nextSink;
char outbuf[BUFSIZ];
bz_stream strm; bz_stream strm;
bool finished = false; bool finished = false;
BzipSink(Sink & nextSink) : nextSink(nextSink) BzipCompressionSink(Sink & nextSink) : nextSink(nextSink)
{ {
memset(&strm, 0, sizeof(strm)); memset(&strm, 0, sizeof(strm));
int ret = BZ2_bzCompressInit(&strm, 9, 0, 30); int ret = BZ2_bzCompressInit(&strm, 9, 0, 30);
if (ret != BZ_OK) if (ret != BZ_OK)
throw CompressionError("unable to initialise bzip2 encoder"); throw CompressionError("unable to initialise bzip2 encoder");
strm.next_out = outbuf; strm.next_out = (char *) outbuf;
strm.avail_out = sizeof(outbuf); strm.avail_out = sizeof(outbuf);
} }
~BzipSink() ~BzipCompressionSink()
{ {
BZ2_bzCompressEnd(&strm); BZ2_bzCompressEnd(&strm);
} }
@ -337,78 +327,49 @@ struct BzipSink : CompressionSink
void finish() override void finish() override
{ {
flush(); flush();
writeInternal(nullptr, 0);
assert(!finished);
finished = true;
while (true) {
checkInterrupt();
int ret = BZ2_bzCompress(&strm, BZ_FINISH);
if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
throw CompressionError("error while flushing bzip2 file");
if (strm.avail_out == 0 || ret == BZ_STREAM_END) {
nextSink((unsigned char *) outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
if (ret == BZ_STREAM_END) break;
}
}
void write(const unsigned char * data, size_t len) override
{
/* Bzip2's 'avail_in' parameter is an unsigned int, so we need
to split the input into chunks of at most 4 GiB. */
while (len) {
auto n = std::min((size_t) std::numeric_limits<decltype(strm.avail_in)>::max(), len);
writeInternal(data, n);
data += n;
len -= n;
}
} }
void writeInternal(const unsigned char * data, size_t len) void writeInternal(const unsigned char * data, size_t len)
{ {
assert(!finished);
assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max()); assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());
strm.next_in = (char *) data; strm.next_in = (char *) data;
strm.avail_in = len; strm.avail_in = len;
while (strm.avail_in) { while (!finished && (!data || strm.avail_in)) {
checkInterrupt(); checkInterrupt();
int ret = BZ2_bzCompress(&strm, BZ_RUN); int ret = BZ2_bzCompress(&strm, data ? BZ_RUN : BZ_FINISH);
if (ret != BZ_OK) if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
CompressionError("error while compressing bzip2 file"); throw CompressionError("error %d while compressing bzip2 file", ret);
if (strm.avail_out == 0) { finished = ret == BZ_STREAM_END;
nextSink((unsigned char *) outbuf, sizeof(outbuf));
strm.next_out = outbuf; if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = (char *) outbuf;
strm.avail_out = sizeof(outbuf); strm.avail_out = sizeof(outbuf);
} }
} }
} }
}; };
struct BrotliSink : CompressionSink struct BrotliCompressionSink : ChunkedCompressionSink
{ {
Sink & nextSink; Sink & nextSink;
uint8_t outbuf[BUFSIZ]; uint8_t outbuf[BUFSIZ];
BrotliEncoderState *state; BrotliEncoderState *state;
bool finished = false; bool finished = false;
BrotliSink(Sink & nextSink) : nextSink(nextSink) BrotliCompressionSink(Sink & nextSink) : nextSink(nextSink)
{ {
state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
if (!state) if (!state)
throw CompressionError("unable to initialise brotli encoder"); throw CompressionError("unable to initialise brotli encoder");
} }
~BrotliSink() ~BrotliCompressionSink()
{ {
BrotliEncoderDestroyInstance(state); BrotliEncoderDestroyInstance(state);
} }
@ -416,89 +377,47 @@ struct BrotliSink : CompressionSink
void finish() override void finish() override
{ {
flush(); flush();
assert(!finished); writeInternal(nullptr, 0);
const uint8_t *next_in = nullptr;
size_t avail_in = 0;
uint8_t *next_out = outbuf;
size_t avail_out = sizeof(outbuf);
while (!finished) {
checkInterrupt();
if (!BrotliEncoderCompressStream(state,
BROTLI_OPERATION_FINISH,
&avail_in, &next_in,
&avail_out, &next_out,
nullptr))
throw CompressionError("error while finishing brotli file");
finished = BrotliEncoderIsFinished(state);
if (avail_out == 0 || finished) {
nextSink(outbuf, sizeof(outbuf) - avail_out);
next_out = outbuf;
avail_out = sizeof(outbuf);
}
}
}
void write(const unsigned char * data, size_t len) override
{
// Don't feed brotli too much at once
const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
while (len) {
size_t n = std::min(CHUNK_SIZE, len);
writeInternal(data, n);
data += n;
len -= n;
}
} }
void writeInternal(const unsigned char * data, size_t len) void writeInternal(const unsigned char * data, size_t len)
{ {
assert(!finished);
const uint8_t * next_in = data; const uint8_t * next_in = data;
size_t avail_in = len; size_t avail_in = len;
uint8_t * next_out = outbuf; uint8_t * next_out = outbuf;
size_t avail_out = sizeof(outbuf); size_t avail_out = sizeof(outbuf);
while (avail_in > 0) { while (!finished && (!data || avail_in)) {
checkInterrupt(); checkInterrupt();
if (!BrotliEncoderCompressStream(state, if (!BrotliEncoderCompressStream(state,
BROTLI_OPERATION_PROCESS, data ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
&avail_in, &next_in, &avail_in, &next_in,
&avail_out, &next_out, &avail_out, &next_out,
nullptr)) nullptr))
throw CompressionError("error while compressing brotli file"); throw CompressionError("error while compressing brotli compression");
if (avail_out < sizeof(outbuf) || avail_in == 0) { if (avail_out < sizeof(outbuf) || avail_in == 0) {
nextSink(outbuf, sizeof(outbuf) - avail_out); nextSink(outbuf, sizeof(outbuf) - avail_out);
next_out = outbuf; next_out = outbuf;
avail_out = sizeof(outbuf); avail_out = sizeof(outbuf);
} }
finished = BrotliEncoderIsFinished(state);
} }
} }
}; };
ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel) ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel)
{ {
if (parallel) {
#ifdef HAVE_LZMA_MT
if (method == "xz")
return make_ref<ParallelXzSink>(nextSink);
#endif
printMsg(lvlError, format("Warning: parallel compression requested but not supported for method '%1%', falling back to single-threaded compression") % method);
}
if (method == "none") if (method == "none")
return make_ref<NoneSink>(nextSink); return make_ref<NoneSink>(nextSink);
else if (method == "xz") else if (method == "xz")
return make_ref<XzSink>(nextSink); return make_ref<XzCompressionSink>(nextSink, parallel);
else if (method == "bzip2") else if (method == "bzip2")
return make_ref<BzipSink>(nextSink); return make_ref<BzipCompressionSink>(nextSink);
else if (method == "br") else if (method == "br")
return make_ref<BrotliSink>(nextSink); return make_ref<BrotliCompressionSink>(nextSink);
else else
throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); throw UnknownCompressionMethod(format("unknown compression method '%s'") % method);
} }

View file

@ -8,17 +8,17 @@
namespace nix { namespace nix {
ref<std::string> decompress(const std::string & method, const std::string & in);
void decompress(const std::string & method, Source & source, Sink & sink);
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false);
struct CompressionSink : BufferedSink struct CompressionSink : BufferedSink
{ {
virtual void finish() = 0; virtual void finish() = 0;
}; };
ref<std::string> decompress(const std::string & method, const std::string & in);
ref<CompressionSink> makeDecompressionSink(const std::string & method, Sink & nextSink);
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false);
ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false); ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false);
MakeError(UnknownCompressionMethod, Error); MakeError(UnknownCompressionMethod, Error);