use libbrotli directly when available

* Look for both 'brotli' and 'bro' as external command,
  since upstream has renamed it in newer versions.
  If neither are found, current runtime behavior
  is preserved: try to find 'bro' on PATH.
* Limit amount handed to BrotliEncoderCompressStream
  to ensure interrupts are processed in a timely manner.
  Testing shows negligible performance impact.
  (Other compression sinks don't seem to require this)
This commit is contained in:
Will Dietz 2017-12-29 14:42:14 -06:00
parent 6a0dd63508
commit 9dd2b8ac7b
7 changed files with 212 additions and 18 deletions

View file

@ -6,6 +6,7 @@ CXXFLAGS = @CXXFLAGS@
ENABLE_S3 = @ENABLE_S3@ ENABLE_S3 = @ENABLE_S3@
HAVE_SODIUM = @HAVE_SODIUM@ HAVE_SODIUM = @HAVE_SODIUM@
HAVE_READLINE = @HAVE_READLINE@ HAVE_READLINE = @HAVE_READLINE@
HAVE_BROTLI = @HAVE_BROTLI@
LIBCURL_LIBS = @LIBCURL_LIBS@ LIBCURL_LIBS = @LIBCURL_LIBS@
OPENSSL_LIBS = @OPENSSL_LIBS@ OPENSSL_LIBS = @OPENSSL_LIBS@
PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_NAME = @PACKAGE_NAME@
@ -13,9 +14,10 @@ PACKAGE_VERSION = @PACKAGE_VERSION@
SODIUM_LIBS = @SODIUM_LIBS@ SODIUM_LIBS = @SODIUM_LIBS@
LIBLZMA_LIBS = @LIBLZMA_LIBS@ LIBLZMA_LIBS = @LIBLZMA_LIBS@
SQLITE3_LIBS = @SQLITE3_LIBS@ SQLITE3_LIBS = @SQLITE3_LIBS@
LIBBROTLI_LIBS = @LIBBROTLI_LIBS@
bash = @bash@ bash = @bash@
bindir = @bindir@ bindir = @bindir@
bro = @bro@ brotli = @brotli@
lsof = @lsof@ lsof = @lsof@
datadir = @datadir@ datadir = @datadir@
datarootdir = @datarootdir@ datarootdir = @datarootdir@

View file

@ -127,7 +127,7 @@ NEED_PROG(gzip, gzip)
NEED_PROG(xz, xz) NEED_PROG(xz, xz)
AC_PATH_PROG(dot, dot) AC_PATH_PROG(dot, dot)
AC_PATH_PROG(pv, pv, pv) AC_PATH_PROG(pv, pv, pv)
AC_PATH_PROG(bro, bro, bro) AC_PATH_PROGS(brotli, brotli bro, bro)
AC_PATH_PROG(lsof, lsof, lsof) AC_PATH_PROG(lsof, lsof, lsof)
@ -176,6 +176,13 @@ AC_SUBST(HAVE_SODIUM, [$have_sodium])
PKG_CHECK_MODULES([LIBLZMA], [liblzma], [CXXFLAGS="$LIBLZMA_CFLAGS $CXXFLAGS"]) PKG_CHECK_MODULES([LIBLZMA], [liblzma], [CXXFLAGS="$LIBLZMA_CFLAGS $CXXFLAGS"])
# Look for libbrotli{enc,dec}, optional dependencies
PKG_CHECK_MODULES([LIBBROTLI], [libbrotlienc libbrotlidec],
[AC_DEFINE([HAVE_BROTLI], [1], [Whether to use libbrotli.])
CXXFLAGS="$LIBBROTLI_CFLAGS $CXXFLAGS"]
have_brotli=1], [have_brotli=])
AC_SUBST(HAVE_BROTLI, [$have_brotli])
# Look for libseccomp, required for Linux sandboxing. # Look for libseccomp, required for Linux sandboxing.
if test "$sys_name" = linux; then if test "$sys_name" = linux; then
PKG_CHECK_MODULES([LIBSECCOMP], [libseccomp], PKG_CHECK_MODULES([LIBSECCOMP], [libseccomp],

View file

@ -7,6 +7,11 @@
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#if HAVE_BROTLI
#include <brotli/decode.h>
#include <brotli/encode.h>
#endif // HAVE_BROTLI
#include <iostream> #include <iostream>
namespace nix { namespace nix {
@ -94,8 +99,56 @@ static ref<std::string> decompressBzip2(const std::string & in)
static ref<std::string> decompressBrotli(const std::string & in) static ref<std::string> decompressBrotli(const std::string & in)
{ {
// FIXME: use libbrotli #if !HAVE_BROTLI
return make_ref<std::string>(runProgram(BRO, true, {"-d"}, {in})); return make_ref<std::string>(runProgram(BROTLI, true, {"-d"}, {in}));
#else
auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
if (!s)
throw CompressionError("unable to initialize brotli decoder");
Finally free([s]() { BrotliDecoderDestroyInstance(s); });
uint8_t outbuf[BUFSIZ];
ref<std::string> res = make_ref<std::string>();
const uint8_t *next_in = (uint8_t *)in.c_str();
size_t avail_in = in.size();
uint8_t *next_out = outbuf;
size_t avail_out = sizeof(outbuf);
while (true) {
checkInterrupt();
auto ret = BrotliDecoderDecompressStream(s,
&avail_in, &next_in,
&avail_out, &next_out,
nullptr);
switch (ret) {
case BROTLI_DECODER_RESULT_ERROR:
throw CompressionError("error while decompressing brotli file");
case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
throw CompressionError("incomplete or corrupt brotli file");
case BROTLI_DECODER_RESULT_SUCCESS:
if (avail_in != 0)
throw CompressionError("unexpected input after brotli decompression");
break;
case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
// I'm not sure if this can happen, but abort if this happens with empty buffer
if (avail_out == sizeof(outbuf))
throw CompressionError("brotli decompression requires larger buffer");
break;
}
// Always ensure we have full buffer for next invocation
if (avail_out < sizeof(outbuf)) {
res->append((char*)outbuf, sizeof(outbuf) - avail_out);
next_out = outbuf;
avail_out = sizeof(outbuf);
}
if (ret == BROTLI_DECODER_RESULT_SUCCESS) return res;
}
#endif // HAVE_BROTLI
} }
ref<std::string> compress(const std::string & method, const std::string & in) ref<std::string> compress(const std::string & method, const std::string & in)
@ -270,25 +323,22 @@ struct BzipSink : CompressionSink
} }
}; };
struct BrotliSink : CompressionSink struct LambdaCompressionSink : CompressionSink
{ {
Sink & nextSink; Sink & nextSink;
std::string data; std::string data;
using CompressFnTy = std::function<std::string(const std::string&)>;
BrotliSink(Sink & nextSink) : nextSink(nextSink) CompressFnTy compressFn;
LambdaCompressionSink(Sink& nextSink, CompressFnTy compressFn)
: nextSink(nextSink)
, compressFn(std::move(compressFn))
{ {
} };
~BrotliSink()
{
}
// FIXME: use libbrotli
void finish() override void finish() override
{ {
flush(); flush();
nextSink(runProgram(BRO, true, {}, data)); nextSink(compressFn(data));
} }
void write(const unsigned char * data, size_t len) override void write(const unsigned char * data, size_t len) override
@ -298,6 +348,107 @@ struct BrotliSink : CompressionSink
} }
}; };
struct BrotliCmdSink : LambdaCompressionSink
{
BrotliCmdSink(Sink& nextSink)
: LambdaCompressionSink(nextSink, [](const std::string& data) {
return runProgram(BROTLI, true, {}, data);
})
{
}
};
#if HAVE_BROTLI
struct BrotliSink : CompressionSink
{
Sink & nextSink;
uint8_t outbuf[BUFSIZ];
BrotliEncoderState *state;
bool finished = false;
BrotliSink(Sink & nextSink) : nextSink(nextSink)
{
state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
if (!state)
throw CompressionError("unable to initialise brotli encoder");
}
~BrotliSink()
{
BrotliEncoderDestroyInstance(state);
}
void finish() override
{
flush();
assert(!finished);
const uint8_t *next_in = nullptr;
size_t avail_in = 0;
uint8_t *next_out = outbuf;
size_t avail_out = sizeof(outbuf);
while (!finished) {
checkInterrupt();
if (!BrotliEncoderCompressStream(state,
BROTLI_OPERATION_FINISH,
&avail_in, &next_in,
&avail_out, &next_out,
nullptr))
throw CompressionError("error while finishing brotli file");
finished = BrotliEncoderIsFinished(state);
if (avail_out == 0 || finished) {
nextSink(outbuf, sizeof(outbuf) - avail_out);
next_out = outbuf;
avail_out = sizeof(outbuf);
}
}
}
void write(const unsigned char * data, size_t len) override
{
assert(!finished);
// Don't feed brotli too much at once
const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
while (len) {
size_t n = std::min(CHUNK_SIZE, len);
writeInternal(data, n);
data += n;
len -= n;
}
}
private:
void writeInternal(const unsigned char * data, size_t len)
{
assert(!finished);
const uint8_t *next_in = data;
size_t avail_in = len;
uint8_t *next_out = outbuf;
size_t avail_out = sizeof(outbuf);
while (avail_in > 0) {
checkInterrupt();
if (!BrotliEncoderCompressStream(state,
BROTLI_OPERATION_PROCESS,
&avail_in, &next_in,
&avail_out, &next_out,
nullptr))
throw CompressionError("error while compressing brotli file");
if (avail_out < sizeof(outbuf) || avail_in == 0) {
nextSink(outbuf, sizeof(outbuf) - avail_out);
next_out = outbuf;
avail_out = sizeof(outbuf);
}
}
}
};
#endif // HAVE_BROTLI
ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink) ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink)
{ {
if (method == "none") if (method == "none")
@ -307,7 +458,11 @@ ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & next
else if (method == "bzip2") else if (method == "bzip2")
return make_ref<BzipSink>(nextSink); return make_ref<BzipSink>(nextSink);
else if (method == "br") else if (method == "br")
#if HAVE_BROTLI
return make_ref<BrotliSink>(nextSink); return make_ref<BrotliSink>(nextSink);
#else
return make_ref<BrotliCmdSink>(nextSink);
#endif
else else
throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); throw UnknownCompressionMethod(format("unknown compression method '%s'") % method);
} }

View file

@ -6,8 +6,8 @@ libutil_DIR := $(d)
libutil_SOURCES := $(wildcard $(d)/*.cc) libutil_SOURCES := $(wildcard $(d)/*.cc)
libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS)
libutil_LIBS = libformat libutil_LIBS = libformat
libutil_CXXFLAGS = -DBRO=\"$(bro)\" libutil_CXXFLAGS = -DBROTLI=\"$(brotli)\"

28
tests/brotli.sh Normal file
View file

@ -0,0 +1,28 @@
source common.sh
# Only test if we found brotli libraries
# (CLI tool is likely unavailable if libraries are missing)
if [ -n "$HAVE_BROTLI" ]; then
clearStore
clearCache
cacheURI="file://$cacheDir?compression=br"
outPath=$(nix-build dependencies.nix --no-out-link)
nix copy --to $cacheURI $outPath
HASH=$(nix hash-path $outPath)
clearStore
clearCacheCache
nix copy --from $cacheURI $outPath --no-check-sigs
HASH2=$(nix hash-path $outPath)
[[ $HASH = $HASH2 ]]
fi # HAVE_BROTLI

View file

@ -32,6 +32,7 @@ export xmllint="@xmllint@"
export SHELL="@bash@" export SHELL="@bash@"
export PAGER=cat export PAGER=cat
export HAVE_SODIUM="@HAVE_SODIUM@" export HAVE_SODIUM="@HAVE_SODIUM@"
export HAVE_BROTLI="@HAVE_BROTLI@"
export version=@PACKAGE_VERSION@ export version=@PACKAGE_VERSION@
export system=@system@ export system=@system@

View file

@ -19,7 +19,8 @@ nix_tests = \
fetchGit.sh \ fetchGit.sh \
fetchMercurial.sh \ fetchMercurial.sh \
signing.sh \ signing.sh \
run.sh run.sh \
brotli.sh
# parallel.sh # parallel.sh
install-tests += $(foreach x, $(nix_tests), tests/$(x)) install-tests += $(foreach x, $(nix_tests), tests/$(x))