lix/src/libutil/compression.cc

301 lines
9 KiB
C++
Raw Normal View History

#include "compression.hh"
2019-12-10 08:47:38 +00:00
#include "tarfile.hh"
2016-04-22 16:15:02 +00:00
#include "util.hh"
#include "finally.hh"
#include "logging.hh"
2019-12-10 08:47:38 +00:00
#include <archive.h>
#include <archive_entry.h>
#include <cstdio>
#include <cstring>
#include <brotli/decode.h>
#include <brotli/encode.h>
2019-12-13 08:29:33 +00:00
#include <zlib.h>
2016-05-04 13:46:25 +00:00
#include <iostream>
2016-05-04 13:46:25 +00:00
namespace nix {
// Don't feed brotli too much at once.
struct ChunkedCompressionSink : CompressionSink
{
2018-08-21 13:20:23 +00:00
uint8_t outbuf[32 * 1024];
2020-12-02 13:00:43 +00:00
void write(std::string_view data) override
{
const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
2020-12-02 13:00:43 +00:00
while (!data.empty()) {
size_t n = std::min(CHUNK_SIZE, data.size());
writeInternal(data.substr(0, n));
2020-12-02 13:00:43 +00:00
data.remove_prefix(n);
}
}
2020-12-02 13:00:43 +00:00
virtual void writeInternal(std::string_view data) = 0;
};
2019-12-10 08:47:38 +00:00
struct ArchiveDecompressionSource : Source
2019-12-13 08:29:33 +00:00
{
2019-12-10 08:47:38 +00:00
std::unique_ptr<TarArchive> archive = 0;
Source & src;
ArchiveDecompressionSource(Source & src) : src(src) {}
~ArchiveDecompressionSource() override {}
size_t read(char * data, size_t len) override {
struct archive_entry * ae;
2019-12-10 08:47:38 +00:00
if (!archive) {
archive = std::make_unique<TarArchive>(src, true);
this->archive->check(archive_read_next_header(this->archive->archive, &ae),
"failed to read header (%s)");
2019-12-10 08:47:38 +00:00
if (archive_filter_count(this->archive->archive) < 2) {
throw CompressionError("input compression not recognized");
2019-12-13 08:29:33 +00:00
}
}
2019-12-10 08:47:38 +00:00
ssize_t result = archive_read_data(this->archive->archive, data, len);
if (result > 0) return result;
if (result == 0) {
throw EndOfFile("reached end of compressed file");
}
this->archive->check(result, "failed to read compressed data (%s)");
2019-12-10 08:47:38 +00:00
return result;
2019-12-13 08:29:33 +00:00
}
};
2019-12-10 08:47:38 +00:00
struct ArchiveCompressionSink : CompressionSink
{
Sink & nextSink;
struct archive * archive;
2019-12-10 08:47:38 +00:00
ArchiveCompressionSink(Sink & nextSink, std::string format, bool parallel) : nextSink(nextSink) {
archive = archive_write_new();
if (!archive) throw Error("failed to initialize libarchive");
check(archive_write_add_filter_by_name(archive, format.c_str()), "couldn't initialize compression (%s)");
2019-12-10 08:47:38 +00:00
check(archive_write_set_format_raw(archive));
if (format == "xz" && parallel) {
check(archive_write_set_filter_option(archive, format.c_str(), "threads", "0"));
}
// disable internal buffering
check(archive_write_set_bytes_per_block(archive, 0));
// disable output padding
check(archive_write_set_bytes_in_last_block(archive, 1));
open();
}
~ArchiveCompressionSink() override
{
2019-12-10 08:47:38 +00:00
if (archive) archive_write_free(archive);
}
void finish() override
{
2019-12-10 08:47:38 +00:00
flush();
check(archive_write_close(archive));
}
void check(int err, const std::string & reason = "failed to compress (%s)")
{
2019-12-10 08:47:38 +00:00
if (err == ARCHIVE_EOF)
throw EndOfFile("reached end of archive");
else if (err != ARCHIVE_OK)
throw Error(reason, archive_error_string(this->archive));
}
void write(std::string_view data) override
{
2019-12-10 08:47:38 +00:00
ssize_t result = archive_write_data(archive, data.data(), data.length());
if (result <= 0) check(result);
}
2019-12-10 08:47:38 +00:00
private:
void open()
{
check(archive_write_open(archive, this, nullptr, ArchiveCompressionSink::callback_write, nullptr));
auto ae = archive_entry_new();
2019-12-10 08:47:38 +00:00
archive_entry_set_filetype(ae, AE_IFREG);
check(archive_write_header(archive, ae));
archive_entry_free(ae);
}
static ssize_t callback_write(struct archive * archive, void * _self, const void * buffer, size_t length)
{
auto self = (ArchiveCompressionSink *) _self;
self->nextSink({(const char *) buffer, length});
2019-12-10 08:47:38 +00:00
return length;
}
2019-12-10 08:47:38 +00:00
};
2019-12-10 08:47:38 +00:00
struct NoneSink : CompressionSink
{
Sink & nextSink;
NoneSink(Sink & nextSink) : nextSink(nextSink) { }
void finish() override { flush(); }
void write(std::string_view data) override { nextSink(data); }
};
struct BrotliDecompressionSink : ChunkedCompressionSink
{
Sink & nextSink;
BrotliDecoderState * state;
bool finished = false;
BrotliDecompressionSink(Sink & nextSink) : nextSink(nextSink)
{
state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
if (!state)
throw CompressionError("unable to initialize brotli decoder");
}
~BrotliDecompressionSink()
{
BrotliDecoderDestroyInstance(state);
}
void finish() override
{
flush();
2020-12-02 13:00:43 +00:00
writeInternal({});
}
2020-12-02 13:00:43 +00:00
void writeInternal(std::string_view data) override
{
2020-12-02 13:00:43 +00:00
auto next_in = (const uint8_t *) data.data();
size_t avail_in = data.size();
uint8_t * next_out = outbuf;
size_t avail_out = sizeof(outbuf);
2020-12-02 13:00:43 +00:00
while (!finished && (!data.data() || avail_in)) {
checkInterrupt();
if (!BrotliDecoderDecompressStream(state,
&avail_in, &next_in,
&avail_out, &next_out,
nullptr))
throw CompressionError("error while decompressing brotli file");
if (avail_out < sizeof(outbuf) || avail_in == 0) {
2020-12-02 13:00:43 +00:00
nextSink({(char *) outbuf, sizeof(outbuf) - avail_out});
next_out = outbuf;
avail_out = sizeof(outbuf);
}
finished = BrotliDecoderIsFinished(state);
}
}
};
ref<std::string> decompress(const std::string & method, const std::string & in)
2016-05-04 13:46:25 +00:00
{
if (method == "none" || method == "")
return make_ref<std::string>(in);
else if (method == "br") {
2019-12-10 08:47:38 +00:00
StringSink ssink;
auto sink = makeDecompressionSink(method, ssink);
(*sink)(in);
sink->finish();
return ssink.s;
} else {
StringSource ssrc(in);
auto src = makeDecompressionSource(ssrc);
return make_ref<std::string>(src->drain());
}
2016-05-04 13:46:25 +00:00
}
2019-12-10 08:47:38 +00:00
std::unique_ptr<FinishSink> makeDecompressionSink(const std::string & method, Sink & nextSink)
{
if (method == "none" || method == "")
2019-12-10 08:47:38 +00:00
return std::make_unique<NoneSink>(nextSink);
else if (method == "br")
2019-12-10 08:47:38 +00:00
return std::make_unique<BrotliDecompressionSink>(nextSink);
else
return sourceToSink([&](Source & source) {
auto decompressionSource = makeDecompressionSource(source);
decompressionSource->drainInto(nextSink);
});
}
struct BrotliCompressionSink : ChunkedCompressionSink
{
Sink & nextSink;
uint8_t outbuf[BUFSIZ];
BrotliEncoderState * state;
bool finished = false;
BrotliCompressionSink(Sink & nextSink) : nextSink(nextSink)
{
state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
if (!state)
throw CompressionError("unable to initialise brotli encoder");
}
~BrotliCompressionSink()
{
BrotliEncoderDestroyInstance(state);
}
void finish() override
{
flush();
2020-12-02 13:00:43 +00:00
writeInternal({});
}
2020-12-02 13:00:43 +00:00
void writeInternal(std::string_view data) override
{
2020-12-02 13:00:43 +00:00
auto next_in = (const uint8_t *) data.data();
size_t avail_in = data.size();
uint8_t * next_out = outbuf;
size_t avail_out = sizeof(outbuf);
2020-12-02 13:00:43 +00:00
while (!finished && (!data.data() || avail_in)) {
checkInterrupt();
if (!BrotliEncoderCompressStream(state,
2020-12-02 13:00:43 +00:00
data.data() ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
&avail_in, &next_in,
&avail_out, &next_out,
nullptr))
throw CompressionError("error while compressing brotli compression");
if (avail_out < sizeof(outbuf) || avail_in == 0) {
2020-12-02 13:00:43 +00:00
nextSink({(const char *) outbuf, sizeof(outbuf) - avail_out});
next_out = outbuf;
avail_out = sizeof(outbuf);
}
finished = BrotliEncoderIsFinished(state);
}
}
};
std::unique_ptr<Source> makeDecompressionSource(Source & prev)
{
2019-12-10 08:47:38 +00:00
return std::unique_ptr<Source>(new ArchiveDecompressionSource(prev));
}
ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel)
{
2019-12-10 08:47:38 +00:00
std::vector<std::string> la_supports = {
"bzip2", "compress", "grzip", "gzip", "lrzip", "lz4", "lzip", "lzma", "lzop", "xz", "zstd"
};
if (std::find(la_supports.begin(), la_supports.end(), method) != la_supports.end()) {
return make_ref<ArchiveCompressionSink>(nextSink, method, parallel);
}
if (method == "none")
2016-05-04 13:46:25 +00:00
return make_ref<NoneSink>(nextSink);
else if (method == "br")
return make_ref<BrotliCompressionSink>(nextSink);
else
throw UnknownCompressionMethod("unknown compression method '%s'", method);
}
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel)
{
StringSink ssink;
auto sink = makeCompressionSink(method, ssink, parallel);
(*sink)(in);
sink->finish();
return ssink.s;
}
}