From d3761f5f8bce1e4c8dcfdff3fa77c173157c0346 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 6 Aug 2018 15:40:29 +0200 Subject: [PATCH] Fix Brotli decompression in 'nix log' This didn't work anymore since decompression was only done in the non-coroutine case. Decompressors are now sinks, just like compressors. Also fixed a bug in bzip2 API handling (we have to handle BZ_RUN_OK rather than BZ_OK), which we didn't notice because there was a missing 'throw': if (ret != BZ_OK) CompressionError("error while compressing bzip2 file"); --- src/libstore/binary-cache-store.cc | 23 +- src/libstore/builtins/fetchurl.cc | 21 +- src/libstore/download.cc | 68 ++- src/libstore/download.hh | 3 - src/libstore/s3-binary-cache-store.cc | 6 +- src/libutil/compression.cc | 617 +++++++++++--------------- src/libutil/compression.hh | 12 +- 7 files changed, 339 insertions(+), 411 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 76c0a1a89..9c75c8599 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -217,17 +217,6 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink) { auto info = queryPathInfo(storePath).cast(); - auto source = sinkToSource([this, url{info->url}](Sink & sink) { - try { - getFile(url, sink); - } catch (NoSuchBinaryCacheFile & e) { - throw SubstituteGone(e.what()); - } - }); - - stats.narRead++; - //stats.narReadCompressedBytes += nar->size(); // FIXME - uint64_t narSize = 0; LambdaSink wrapperSink([&](const unsigned char * data, size_t len) { @@ -235,8 +224,18 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink) narSize += len; }); - decompress(info->compression, *source, wrapperSink); + auto decompressor = makeDecompressionSink(info->compression, wrapperSink); + try { + getFile(info->url, *decompressor); + } catch (NoSuchBinaryCacheFile & e) { + throw SubstituteGone(e.what()); + } + + decompressor->flush(); + + stats.narRead++; + //stats.narReadCompressedBytes += nar->size(); // FIXME stats.narReadBytes += narSize; } diff --git a/src/libstore/builtins/fetchurl.cc b/src/libstore/builtins/fetchurl.cc index 1f4abd374..b4dcb35f9 100644 --- a/src/libstore/builtins/fetchurl.cc +++ b/src/libstore/builtins/fetchurl.cc @@ -39,21 +39,16 @@ void builtinFetchurl(const BasicDerivation & drv, const std::string & netrcData) request.verifyTLS = false; request.decompress = false; - downloader->download(std::move(request), sink); + auto decompressor = makeDecompressionSink( + hasSuffix(mainUrl, ".xz") ? "xz" : "none", sink); + downloader->download(std::move(request), *decompressor); + decompressor->finish(); }); - if (get(drv.env, "unpack", "") == "1") { - - if (hasSuffix(mainUrl, ".xz")) { - auto source2 = sinkToSource([&](Sink & sink) { - decompress("xz", *source, sink); - }); - restorePath(storePath, *source2); - } else - restorePath(storePath, *source); - - } else - writeFile(storePath, *source); + if (get(drv.env, "unpack", "") == "1") + restorePath(storePath, *source); + else + writeFile(storePath, *source); auto executable = drv.env.find("executable"); if (executable != drv.env.end() && executable->second == "1") { diff --git a/src/libstore/download.cc b/src/libstore/download.cc index f0ea1995a..973fca0b1 100644 --- a/src/libstore/download.cc +++ b/src/libstore/download.cc @@ -58,16 +58,6 @@ std::string resolveUri(const std::string & uri) return uri; } -ref decodeContent(const std::string & encoding, ref data) -{ - if (encoding == "") - return data; - else if (encoding == "br") - return decompress(encoding, *data); - else - throw Error("unsupported Content-Encoding '%s'", encoding); -} - struct CurlDownloader : public Downloader { CURLM * curlm = 0; @@ -106,6 +96,12 @@ struct CurlDownloader : public Downloader fmt(request.data ? "uploading '%s'" : "downloading '%s'", request.uri), {request.uri}, request.parentAct) , callback(callback) + , finalSink([this](const unsigned char * data, size_t len) { + if (this->request.dataCallback) + this->request.dataCallback((char *) data, len); + else + this->result.data->append((char *) data, len); + }) { if (!request.expectedETag.empty()) requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + request.expectedETag).c_str()); @@ -129,23 +125,40 @@ struct CurlDownloader : public Downloader } } - template - void fail(const T & e) + void failEx(std::exception_ptr ex) { assert(!done); done = true; - callback.rethrow(std::make_exception_ptr(e)); + callback.rethrow(ex); } + template + void fail(const T & e) + { + failEx(std::make_exception_ptr(e)); + } + + LambdaSink finalSink; + std::shared_ptr decompressionSink; + + std::exception_ptr writeException; + size_t writeCallback(void * contents, size_t size, size_t nmemb) { - size_t realSize = size * nmemb; - result.bodySize += realSize; - if (request.dataCallback) - request.dataCallback((char *) contents, realSize); - else - result.data->append((char *) contents, realSize); - return realSize; + try { + size_t realSize = size * nmemb; + result.bodySize += realSize; + + if (!decompressionSink) + decompressionSink = makeDecompressionSink(encoding, finalSink); + + (*decompressionSink)((unsigned char *) contents, realSize); + + return realSize; + } catch (...) { + writeException = std::current_exception(); + return 0; + } } static size_t writeCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp) @@ -314,27 +327,33 @@ struct CurlDownloader : public Downloader debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes", request.verb(), request.uri, code, httpStatus, result.bodySize); + if (decompressionSink) + decompressionSink->finish(); + if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) { code = CURLE_OK; httpStatus = 304; } - if (code == CURLE_OK && + if (writeException) + failEx(writeException); + + else if (code == CURLE_OK && (httpStatus == 200 || httpStatus == 201 || httpStatus == 204 || httpStatus == 304 || httpStatus == 226 /* FTP */ || httpStatus == 0 /* other protocol */)) { result.cached = httpStatus == 304; done = true; try { - if (request.decompress) - result.data = decodeContent(encoding, ref(result.data)); act.progress(result.data->size(), result.data->size()); callback(std::move(result)); } catch (...) { done = true; callback.rethrow(); } - } else { + } + + else { // We treat most errors as transient, but won't retry when hopeless Error err = Transient; @@ -369,6 +388,7 @@ struct CurlDownloader : public Downloader case CURLE_UNKNOWN_OPTION: case CURLE_SSL_CACERT_BADFILE: case CURLE_TOO_MANY_REDIRECTS: + case CURLE_WRITE_ERROR: err = Misc; break; default: // Shut up warnings diff --git a/src/libstore/download.hh b/src/libstore/download.hh index ff38a2870..f0228f7d0 100644 --- a/src/libstore/download.hh +++ b/src/libstore/download.hh @@ -88,7 +88,4 @@ public: bool isUri(const string & s); -/* Decode data according to the Content-Encoding header. */ -ref decodeContent(const std::string & encoding, ref data); - } diff --git a/src/libstore/s3-binary-cache-store.cc b/src/libstore/s3-binary-cache-store.cc index 2f18e3f38..660583d31 100644 --- a/src/libstore/s3-binary-cache-store.cc +++ b/src/libstore/s3-binary-cache-store.cc @@ -153,10 +153,8 @@ S3Helper::DownloadResult S3Helper::getObject( auto result = checkAws(fmt("AWS error fetching '%s'", key), client->GetObject(request)); - res.data = decodeContent( - result.GetContentEncoding(), - make_ref( - dynamic_cast(result.GetBody()).str())); + res.data = decompress(result.GetContentEncoding(), + dynamic_cast(result.GetBody()).str()); } catch (S3Error & e) { if (e.err != Aws::S3::S3Errors::NO_SUCH_KEY) throw; diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc index 5de85ede1..53b62f62a 100644 --- a/src/libutil/compression.cc +++ b/src/libutil/compression.cc @@ -17,197 +17,24 @@ namespace nix { static const size_t bufSize = 32 * 1024; -static void decompressNone(Source & source, Sink & sink) +// Don't feed brotli too much at once. +struct ChunkedCompressionSink : CompressionSink { - std::vector buf(bufSize); - while (true) { - size_t n; - try { - n = source.read(buf.data(), buf.size()); - } catch (EndOfFile &) { - break; + uint8_t outbuf[BUFSIZ]; + + void write(const unsigned char * data, size_t len) override + { + const size_t CHUNK_SIZE = sizeof(outbuf) << 2; + while (len) { + size_t n = std::min(CHUNK_SIZE, len); + writeInternal(data, n); + data += n; + len -= n; } - sink(buf.data(), n); } -} -static void decompressXZ(Source & source, Sink & sink) -{ - lzma_stream strm(LZMA_STREAM_INIT); - - lzma_ret ret = lzma_stream_decoder( - &strm, UINT64_MAX, LZMA_CONCATENATED); - if (ret != LZMA_OK) - throw CompressionError("unable to initialise lzma decoder"); - - Finally free([&]() { lzma_end(&strm); }); - - lzma_action action = LZMA_RUN; - std::vector inbuf(bufSize), outbuf(bufSize); - strm.next_in = nullptr; - strm.avail_in = 0; - strm.next_out = outbuf.data(); - strm.avail_out = outbuf.size(); - bool eof = false; - - while (true) { - checkInterrupt(); - - if (strm.avail_in == 0 && !eof) { - strm.next_in = inbuf.data(); - try { - strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size()); - } catch (EndOfFile &) { - eof = true; - } - } - - if (strm.avail_in == 0) - action = LZMA_FINISH; - - lzma_ret ret = lzma_code(&strm, action); - - if (strm.avail_out < outbuf.size()) { - sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out); - strm.next_out = outbuf.data(); - strm.avail_out = outbuf.size(); - } - - if (ret == LZMA_STREAM_END) return; - - if (ret != LZMA_OK) - throw CompressionError("error %d while decompressing xz file", ret); - } -} - -static void decompressBzip2(Source & source, Sink & sink) -{ - bz_stream strm; - memset(&strm, 0, sizeof(strm)); - - int ret = BZ2_bzDecompressInit(&strm, 0, 0); - if (ret != BZ_OK) - throw CompressionError("unable to initialise bzip2 decoder"); - - Finally free([&]() { BZ2_bzDecompressEnd(&strm); }); - - std::vector inbuf(bufSize), outbuf(bufSize); - strm.next_in = nullptr; - strm.avail_in = 0; - strm.next_out = outbuf.data(); - strm.avail_out = outbuf.size(); - bool eof = false; - - while (true) { - checkInterrupt(); - - if (strm.avail_in == 0 && !eof) { - strm.next_in = inbuf.data(); - try { - strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size()); - } catch (EndOfFile &) { - eof = true; - } - } - - int ret = BZ2_bzDecompress(&strm); - - if (strm.avail_in == 0 && strm.avail_out == outbuf.size() && eof) - throw CompressionError("bzip2 data ends prematurely"); - - if (strm.avail_out < outbuf.size()) { - sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out); - strm.next_out = outbuf.data(); - strm.avail_out = outbuf.size(); - } - - if (ret == BZ_STREAM_END) return; - - if (ret != BZ_OK) - throw CompressionError("error while decompressing bzip2 file"); - } -} - -static void decompressBrotli(Source & source, Sink & sink) -{ - auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); - if (!s) - throw CompressionError("unable to initialize brotli decoder"); - - Finally free([s]() { BrotliDecoderDestroyInstance(s); }); - - std::vector inbuf(bufSize), outbuf(bufSize); - const uint8_t * next_in = nullptr; - size_t avail_in = 0; - bool eof = false; - - while (true) { - checkInterrupt(); - - if (avail_in == 0 && !eof) { - next_in = inbuf.data(); - try { - avail_in = source.read((unsigned char *) next_in, inbuf.size()); - } catch (EndOfFile &) { - eof = true; - } - } - - uint8_t * next_out = outbuf.data(); - size_t avail_out = outbuf.size(); - - auto ret = BrotliDecoderDecompressStream(s, - &avail_in, &next_in, - &avail_out, &next_out, - nullptr); - - switch (ret) { - case BROTLI_DECODER_RESULT_ERROR: - throw CompressionError("error while decompressing brotli file"); - case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: - if (eof) - throw CompressionError("incomplete or corrupt brotli file"); - break; - case BROTLI_DECODER_RESULT_SUCCESS: - if (avail_in != 0) - throw CompressionError("unexpected input after brotli decompression"); - break; - case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: - // I'm not sure if this can happen, but abort if this happens with empty buffer - if (avail_out == outbuf.size()) - throw CompressionError("brotli decompression requires larger buffer"); - break; - } - - // Always ensure we have full buffer for next invocation - if (avail_out < outbuf.size()) - sink((unsigned char *) outbuf.data(), outbuf.size() - avail_out); - - if (ret == BROTLI_DECODER_RESULT_SUCCESS) return; - } -} - -ref decompress(const std::string & method, const std::string & in) -{ - StringSource source(in); - StringSink sink; - decompress(method, source, sink); - return sink.s; -} - -void decompress(const std::string & method, Source & source, Sink & sink) -{ - if (method == "none") - return decompressNone(source, sink); - else if (method == "xz") - return decompressXZ(source, sink); - else if (method == "bzip2") - return decompressBzip2(source, sink); - else if (method == "br") - return decompressBrotli(source, sink); - else - throw UnknownCompressionMethod("unknown compression method '%s'", method); -} + virtual void writeInternal(const unsigned char * data, size_t len) = 0; +}; struct NoneSink : CompressionSink { @@ -217,28 +44,25 @@ struct NoneSink : CompressionSink void write(const unsigned char * data, size_t len) override { nextSink(data, len); } }; -struct XzSink : CompressionSink +struct XzDecompressionSink : CompressionSink { Sink & nextSink; uint8_t outbuf[BUFSIZ]; lzma_stream strm = LZMA_STREAM_INIT; bool finished = false; - template - XzSink(Sink & nextSink, F&& initEncoder) : nextSink(nextSink) { - lzma_ret ret = initEncoder(); + XzDecompressionSink(Sink & nextSink) : nextSink(nextSink) + { + lzma_ret ret = lzma_stream_decoder( + &strm, UINT64_MAX, LZMA_CONCATENATED); if (ret != LZMA_OK) - throw CompressionError("unable to initialise lzma encoder"); - // FIXME: apply the x86 BCJ filter? + throw CompressionError("unable to initialise lzma decoder"); strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } - XzSink(Sink & nextSink) : XzSink(nextSink, [this]() { - return lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64); - }) {} - ~XzSink() + ~XzDecompressionSink() { lzma_end(&strm); } @@ -246,43 +70,25 @@ struct XzSink : CompressionSink void finish() override { CompressionSink::flush(); - - assert(!finished); - finished = true; - - while (true) { - checkInterrupt(); - - lzma_ret ret = lzma_code(&strm, LZMA_FINISH); - if (ret != LZMA_OK && ret != LZMA_STREAM_END) - throw CompressionError("error while flushing xz file"); - - if (strm.avail_out == 0 || ret == LZMA_STREAM_END) { - nextSink(outbuf, sizeof(outbuf) - strm.avail_out); - strm.next_out = outbuf; - strm.avail_out = sizeof(outbuf); - } - - if (ret == LZMA_STREAM_END) break; - } + write(nullptr, 0); } void write(const unsigned char * data, size_t len) override { - assert(!finished); - strm.next_in = data; strm.avail_in = len; - while (strm.avail_in) { + while (!finished && (!data || strm.avail_in)) { checkInterrupt(); - lzma_ret ret = lzma_code(&strm, LZMA_RUN); - if (ret != LZMA_OK) - throw CompressionError("error while compressing xz file"); + lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + throw CompressionError("error %d while decompressing xz file", ret); - if (strm.avail_out == 0) { - nextSink(outbuf, sizeof(outbuf)); + finished = ret == LZMA_STREAM_END; + + if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } @@ -290,89 +96,36 @@ struct XzSink : CompressionSink } }; -#ifdef HAVE_LZMA_MT -struct ParallelXzSink : public XzSink -{ - ParallelXzSink(Sink &nextSink) : XzSink(nextSink, [this]() { - lzma_mt mt_options = {}; - mt_options.flags = 0; - mt_options.timeout = 300; // Using the same setting as the xz cmd line - mt_options.preset = LZMA_PRESET_DEFAULT; - mt_options.filters = NULL; - mt_options.check = LZMA_CHECK_CRC64; - mt_options.threads = lzma_cputhreads(); - mt_options.block_size = 0; - if (mt_options.threads == 0) - mt_options.threads = 1; - // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the - // number of threads. - return lzma_stream_encoder_mt(&strm, &mt_options); - }) {} -}; -#endif - -struct BzipSink : CompressionSink +struct BzipDecompressionSink : ChunkedCompressionSink { Sink & nextSink; - char outbuf[BUFSIZ]; bz_stream strm; bool finished = false; - BzipSink(Sink & nextSink) : nextSink(nextSink) + BzipDecompressionSink(Sink & nextSink) : nextSink(nextSink) { memset(&strm, 0, sizeof(strm)); - int ret = BZ2_bzCompressInit(&strm, 9, 0, 30); + int ret = BZ2_bzDecompressInit(&strm, 0, 0); if (ret != BZ_OK) - throw CompressionError("unable to initialise bzip2 encoder"); + throw CompressionError("unable to initialise bzip2 decoder"); - strm.next_out = outbuf; + strm.next_out = (char *) outbuf; strm.avail_out = sizeof(outbuf); } - ~BzipSink() + ~BzipDecompressionSink() { - BZ2_bzCompressEnd(&strm); + BZ2_bzDecompressEnd(&strm); } void finish() override { flush(); - - assert(!finished); - finished = true; - - while (true) { - checkInterrupt(); - - int ret = BZ2_bzCompress(&strm, BZ_FINISH); - if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) - throw CompressionError("error while flushing bzip2 file"); - - if (strm.avail_out == 0 || ret == BZ_STREAM_END) { - nextSink((unsigned char *) outbuf, sizeof(outbuf) - strm.avail_out); - strm.next_out = outbuf; - strm.avail_out = sizeof(outbuf); - } - - if (ret == BZ_STREAM_END) break; - } - } - - void write(const unsigned char * data, size_t len) override - { - /* Bzip2's 'avail_in' parameter is an unsigned int, so we need - to split the input into chunks of at most 4 GiB. */ - while (len) { - auto n = std::min((size_t) std::numeric_limits::max(), len); - writeInternal(data, n); - data += n; - len -= n; - } + write(nullptr, 0); } void writeInternal(const unsigned char * data, size_t len) { - assert(!finished); assert(len <= std::numeric_limits::max()); strm.next_in = (char *) data; @@ -381,12 +134,167 @@ struct BzipSink : CompressionSink while (strm.avail_in) { checkInterrupt(); - int ret = BZ2_bzCompress(&strm, BZ_RUN); - if (ret != BZ_OK) - CompressionError("error while compressing bzip2 file"); + int ret = BZ2_bzDecompress(&strm); + if (ret != BZ_OK && ret != BZ_STREAM_END) + throw CompressionError("error while decompressing bzip2 file"); - if (strm.avail_out == 0) { - nextSink((unsigned char *) outbuf, sizeof(outbuf)); + finished = ret == BZ_STREAM_END; + + if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - strm.avail_out); + strm.next_out = (char *) outbuf; + strm.avail_out = sizeof(outbuf); + } + } + } +}; + +struct BrotliDecompressionSink : ChunkedCompressionSink +{ + Sink & nextSink; + BrotliDecoderState * state; + bool finished = false; + + BrotliDecompressionSink(Sink & nextSink) : nextSink(nextSink) + { + state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); + if (!state) + throw CompressionError("unable to initialize brotli decoder"); + } + + ~BrotliDecompressionSink() + { + BrotliDecoderDestroyInstance(state); + } + + void finish() override + { + flush(); + writeInternal(nullptr, 0); + } + + void writeInternal(const unsigned char * data, size_t len) + { + const uint8_t * next_in = data; + size_t avail_in = len; + uint8_t * next_out = outbuf; + size_t avail_out = sizeof(outbuf); + + while (!finished && (!data || avail_in)) { + checkInterrupt(); + + if (!BrotliDecoderDecompressStream(state, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr)) + throw CompressionError("error while decompressing brotli file"); + + if (avail_out < sizeof(outbuf) || avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + + finished = BrotliDecoderIsFinished(state); + } + } +}; + +ref decompress(const std::string & method, const std::string & in) +{ + StringSink ssink; + auto sink = makeDecompressionSink(method, ssink); + (*sink)(in); + sink->finish(); + return ssink.s; +} + +ref makeDecompressionSink(const std::string & method, Sink & nextSink) +{ + if (method == "none" || method == "") + return make_ref(nextSink); + else if (method == "xz") + return make_ref(nextSink); + else if (method == "bzip2") + return make_ref(nextSink); + else if (method == "br") + return make_ref(nextSink); + else + throw UnknownCompressionMethod("unknown compression method '%s'", method); +} + +struct XzCompressionSink : CompressionSink +{ + Sink & nextSink; + uint8_t outbuf[BUFSIZ]; + lzma_stream strm = LZMA_STREAM_INIT; + bool finished = false; + + XzCompressionSink(Sink & nextSink, bool parallel) : nextSink(nextSink) + { + lzma_ret ret; + bool done = false; + + if (parallel) { +#ifdef HAVE_LZMA_MT + lzma_mt mt_options = {}; + mt_options.flags = 0; + mt_options.timeout = 300; // Using the same setting as the xz cmd line + mt_options.preset = LZMA_PRESET_DEFAULT; + mt_options.filters = NULL; + mt_options.check = LZMA_CHECK_CRC64; + mt_options.threads = lzma_cputhreads(); + mt_options.block_size = 0; + if (mt_options.threads == 0) + mt_options.threads = 1; + // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the + // number of threads. + ret = lzma_stream_encoder_mt(&strm, &mt_options); + done = true; +#else + printMsg(lvlError, "warning: parallel compression requested but not supported for metho d '%1%', falling back to single-threaded compression", method); +#endif + } + + if (!done) + ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64); + + if (ret != LZMA_OK) + throw CompressionError("unable to initialise lzma encoder"); + + // FIXME: apply the x86 BCJ filter? + + strm.next_out = outbuf; + strm.avail_out = sizeof(outbuf); + } + + ~XzCompressionSink() + { + lzma_end(&strm); + } + + void finish() override + { + CompressionSink::flush(); + write(nullptr, 0); + } + + void write(const unsigned char * data, size_t len) override + { + strm.next_in = data; + strm.avail_in = len; + + while (!finished && (!data || strm.avail_in)) { + checkInterrupt(); + + lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + throw CompressionError("error %d while compressing xz file", ret); + + finished = ret == LZMA_STREAM_END; + + if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } @@ -394,21 +302,74 @@ struct BzipSink : CompressionSink } }; -struct BrotliSink : CompressionSink +struct BzipCompressionSink : ChunkedCompressionSink +{ + Sink & nextSink; + bz_stream strm; + bool finished = false; + + BzipCompressionSink(Sink & nextSink) : nextSink(nextSink) + { + memset(&strm, 0, sizeof(strm)); + int ret = BZ2_bzCompressInit(&strm, 9, 0, 30); + if (ret != BZ_OK) + throw CompressionError("unable to initialise bzip2 encoder"); + + strm.next_out = (char *) outbuf; + strm.avail_out = sizeof(outbuf); + } + + ~BzipCompressionSink() + { + BZ2_bzCompressEnd(&strm); + } + + void finish() override + { + flush(); + writeInternal(nullptr, 0); + } + + void writeInternal(const unsigned char * data, size_t len) + { + assert(len <= std::numeric_limits::max()); + + strm.next_in = (char *) data; + strm.avail_in = len; + + while (!finished && (!data || strm.avail_in)) { + checkInterrupt(); + + int ret = BZ2_bzCompress(&strm, data ? BZ_RUN : BZ_FINISH); + if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END) + throw CompressionError("error %d while compressing bzip2 file", ret); + + finished = ret == BZ_STREAM_END; + + if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - strm.avail_out); + strm.next_out = (char *) outbuf; + strm.avail_out = sizeof(outbuf); + } + } + } +}; + +struct BrotliCompressionSink : ChunkedCompressionSink { Sink & nextSink; uint8_t outbuf[BUFSIZ]; BrotliEncoderState *state; bool finished = false; - BrotliSink(Sink & nextSink) : nextSink(nextSink) + BrotliCompressionSink(Sink & nextSink) : nextSink(nextSink) { state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); if (!state) throw CompressionError("unable to initialise brotli encoder"); } - ~BrotliSink() + ~BrotliCompressionSink() { BrotliEncoderDestroyInstance(state); } @@ -416,89 +377,47 @@ struct BrotliSink : CompressionSink void finish() override { flush(); - assert(!finished); - - const uint8_t *next_in = nullptr; - size_t avail_in = 0; - uint8_t *next_out = outbuf; - size_t avail_out = sizeof(outbuf); - while (!finished) { - checkInterrupt(); - - if (!BrotliEncoderCompressStream(state, - BROTLI_OPERATION_FINISH, - &avail_in, &next_in, - &avail_out, &next_out, - nullptr)) - throw CompressionError("error while finishing brotli file"); - - finished = BrotliEncoderIsFinished(state); - if (avail_out == 0 || finished) { - nextSink(outbuf, sizeof(outbuf) - avail_out); - next_out = outbuf; - avail_out = sizeof(outbuf); - } - } - } - - void write(const unsigned char * data, size_t len) override - { - // Don't feed brotli too much at once - const size_t CHUNK_SIZE = sizeof(outbuf) << 2; - while (len) { - size_t n = std::min(CHUNK_SIZE, len); - writeInternal(data, n); - data += n; - len -= n; - } + writeInternal(nullptr, 0); } void writeInternal(const unsigned char * data, size_t len) { - assert(!finished); - - const uint8_t *next_in = data; + const uint8_t * next_in = data; size_t avail_in = len; - uint8_t *next_out = outbuf; + uint8_t * next_out = outbuf; size_t avail_out = sizeof(outbuf); - while (avail_in > 0) { + while (!finished && (!data || avail_in)) { checkInterrupt(); if (!BrotliEncoderCompressStream(state, - BROTLI_OPERATION_PROCESS, - &avail_in, &next_in, - &avail_out, &next_out, - nullptr)) - throw CompressionError("error while compressing brotli file"); + data ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr)) + throw CompressionError("error while compressing brotli compression"); if (avail_out < sizeof(outbuf) || avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - avail_out); next_out = outbuf; avail_out = sizeof(outbuf); } + + finished = BrotliEncoderIsFinished(state); } } }; ref makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel) { - if (parallel) { -#ifdef HAVE_LZMA_MT - if (method == "xz") - return make_ref(nextSink); -#endif - printMsg(lvlError, format("Warning: parallel compression requested but not supported for method '%1%', falling back to single-threaded compression") % method); - } - if (method == "none") return make_ref(nextSink); else if (method == "xz") - return make_ref(nextSink); + return make_ref(nextSink, parallel); else if (method == "bzip2") - return make_ref(nextSink); + return make_ref(nextSink); else if (method == "br") - return make_ref(nextSink); + return make_ref(nextSink); else throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); } diff --git a/src/libutil/compression.hh b/src/libutil/compression.hh index f7a3e3fbd..dd666a4e1 100644 --- a/src/libutil/compression.hh +++ b/src/libutil/compression.hh @@ -8,17 +8,17 @@ namespace nix { -ref decompress(const std::string & method, const std::string & in); - -void decompress(const std::string & method, Source & source, Sink & sink); - -ref compress(const std::string & method, const std::string & in, const bool parallel = false); - struct CompressionSink : BufferedSink { virtual void finish() = 0; }; +ref decompress(const std::string & method, const std::string & in); + +ref makeDecompressionSink(const std::string & method, Sink & nextSink); + +ref compress(const std::string & method, const std::string & in, const bool parallel = false); + ref makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false); MakeError(UnknownCompressionMethod, Error);