2015-10-30 11:33:40 +00:00
# include "compression.hh"
2016-04-22 16:15:02 +00:00
# include "util.hh"
2016-04-29 15:43:37 +00:00
# include "finally.hh"
2018-02-11 18:47:42 +00:00
# include "logging.hh"
2015-10-30 11:33:40 +00:00
# include <lzma.h>
2016-04-29 15:43:37 +00:00
# include <bzlib.h>
2015-12-31 13:18:20 +00:00
# include <cstdio>
2016-04-29 15:43:37 +00:00
# include <cstring>
2015-10-30 11:33:40 +00:00
2017-12-29 20:42:14 +00:00
# if HAVE_BROTLI
# include <brotli/decode.h>
# include <brotli/encode.h>
# endif // HAVE_BROTLI
2016-05-04 13:46:25 +00:00
# include <iostream>
2016-02-15 20:45:56 +00:00
2016-05-04 13:46:25 +00:00
namespace nix {
2016-02-15 20:45:56 +00:00
2018-03-16 15:59:31 +00:00
static const size_t bufSize = 32 * 1024 ;
static void decompressNone ( Source & source , Sink & sink )
{
std : : vector < unsigned char > buf ( bufSize ) ;
while ( true ) {
size_t n ;
try {
n = source . read ( buf . data ( ) , buf . size ( ) ) ;
} catch ( EndOfFile & ) {
break ;
}
sink ( buf . data ( ) , n ) ;
}
}
static void decompressXZ ( Source & source , Sink & sink )
2015-10-30 11:33:40 +00:00
{
2016-05-04 09:50:12 +00:00
lzma_stream strm ( LZMA_STREAM_INIT ) ;
2015-10-30 11:33:40 +00:00
lzma_ret ret = lzma_stream_decoder (
2016-04-29 15:43:37 +00:00
& strm , UINT64_MAX , LZMA_CONCATENATED ) ;
2015-10-30 11:33:40 +00:00
if ( ret ! = LZMA_OK )
2017-03-21 18:23:07 +00:00
throw CompressionError ( " unable to initialise lzma decoder " ) ;
2015-10-30 11:33:40 +00:00
2016-04-29 15:43:37 +00:00
Finally free ( [ & ] ( ) { lzma_end ( & strm ) ; } ) ;
2015-10-30 11:33:40 +00:00
lzma_action action = LZMA_RUN ;
2018-03-16 15:59:31 +00:00
std : : vector < uint8_t > inbuf ( bufSize ) , outbuf ( bufSize ) ;
strm . next_in = nullptr ;
strm . avail_in = 0 ;
strm . next_out = outbuf . data ( ) ;
strm . avail_out = outbuf . size ( ) ;
bool eof = false ;
2015-10-30 11:33:40 +00:00
while ( true ) {
2016-04-22 16:15:02 +00:00
checkInterrupt ( ) ;
2015-10-30 11:33:40 +00:00
2018-03-16 15:59:31 +00:00
if ( strm . avail_in = = 0 & & ! eof ) {
strm . next_in = inbuf . data ( ) ;
try {
strm . avail_in = source . read ( ( unsigned char * ) strm . next_in , inbuf . size ( ) ) ;
} catch ( EndOfFile & ) {
eof = true ;
}
}
2016-04-29 15:43:37 +00:00
if ( strm . avail_in = = 0 )
2015-10-30 11:33:40 +00:00
action = LZMA_FINISH ;
2016-04-29 15:43:37 +00:00
lzma_ret ret = lzma_code ( & strm , action ) ;
2015-10-30 11:33:40 +00:00
2018-03-16 15:59:31 +00:00
if ( strm . avail_out < outbuf . size ( ) ) {
sink ( ( unsigned char * ) outbuf . data ( ) , outbuf . size ( ) - strm . avail_out ) ;
strm . next_out = outbuf . data ( ) ;
strm . avail_out = outbuf . size ( ) ;
2015-10-30 11:33:40 +00:00
}
2018-03-16 15:59:31 +00:00
if ( ret = = LZMA_STREAM_END ) return ;
2015-10-30 11:33:40 +00:00
if ( ret ! = LZMA_OK )
2017-03-22 10:53:33 +00:00
throw CompressionError ( " error %d while decompressing xz file " , ret ) ;
2015-10-30 11:33:40 +00:00
}
}
2018-03-16 15:59:31 +00:00
static void decompressBzip2 ( Source & source , Sink & sink )
2016-04-29 15:43:37 +00:00
{
bz_stream strm ;
memset ( & strm , 0 , sizeof ( strm ) ) ;
int ret = BZ2_bzDecompressInit ( & strm , 0 , 0 ) ;
if ( ret ! = BZ_OK )
2017-03-21 18:23:07 +00:00
throw CompressionError ( " unable to initialise bzip2 decoder " ) ;
2016-04-29 15:43:37 +00:00
Finally free ( [ & ] ( ) { BZ2_bzDecompressEnd ( & strm ) ; } ) ;
2018-03-16 15:59:31 +00:00
std : : vector < char > inbuf ( bufSize ) , outbuf ( bufSize ) ;
strm . next_in = nullptr ;
strm . avail_in = 0 ;
strm . next_out = outbuf . data ( ) ;
strm . avail_out = outbuf . size ( ) ;
bool eof = false ;
2016-04-29 15:43:37 +00:00
while ( true ) {
checkInterrupt ( ) ;
2018-03-16 15:59:31 +00:00
if ( strm . avail_in = = 0 & & ! eof ) {
strm . next_in = inbuf . data ( ) ;
try {
strm . avail_in = source . read ( ( unsigned char * ) strm . next_in , inbuf . size ( ) ) ;
} catch ( EndOfFile & ) {
eof = true ;
}
}
2016-04-29 15:43:37 +00:00
int ret = BZ2_bzDecompress ( & strm ) ;
2018-03-16 15:59:31 +00:00
if ( strm . avail_in = = 0 & & strm . avail_out = = outbuf . size ( ) & & eof )
throw CompressionError ( " bzip2 data ends prematurely " ) ;
if ( strm . avail_out < outbuf . size ( ) ) {
sink ( ( unsigned char * ) outbuf . data ( ) , outbuf . size ( ) - strm . avail_out ) ;
strm . next_out = outbuf . data ( ) ;
strm . avail_out = outbuf . size ( ) ;
2016-04-29 15:43:37 +00:00
}
2018-03-16 15:59:31 +00:00
if ( ret = = BZ_STREAM_END ) return ;
2016-04-29 15:43:37 +00:00
if ( ret ! = BZ_OK )
2017-03-21 18:23:07 +00:00
throw CompressionError ( " error while decompressing bzip2 file " ) ;
2016-04-29 15:43:37 +00:00
}
}
2018-03-16 15:59:31 +00:00
static void decompressBrotli ( Source & source , Sink & sink )
2017-03-13 13:40:15 +00:00
{
2017-12-29 20:42:14 +00:00
# if !HAVE_BROTLI
2018-03-16 15:59:31 +00:00
RunOptions options ( BROTLI , { " -d " } ) ;
2018-03-22 12:46:19 +00:00
options . standardIn = & source ;
options . standardOut = & sink ;
2018-03-16 15:59:31 +00:00
runProgram2 ( options ) ;
2017-12-29 20:42:14 +00:00
# else
auto * s = BrotliDecoderCreateInstance ( nullptr , nullptr , nullptr ) ;
if ( ! s )
throw CompressionError ( " unable to initialize brotli decoder " ) ;
Finally free ( [ s ] ( ) { BrotliDecoderDestroyInstance ( s ) ; } ) ;
2018-03-16 15:59:31 +00:00
std : : vector < uint8_t > inbuf ( bufSize ) , outbuf ( bufSize ) ;
const uint8_t * next_in = nullptr ;
size_t avail_in = 0 ;
bool eof = false ;
2017-12-29 20:42:14 +00:00
while ( true ) {
checkInterrupt ( ) ;
2018-03-16 15:59:31 +00:00
if ( avail_in = = 0 & & ! eof ) {
next_in = inbuf . data ( ) ;
try {
avail_in = source . read ( ( unsigned char * ) next_in , inbuf . size ( ) ) ;
} catch ( EndOfFile & ) {
eof = true ;
}
}
uint8_t * next_out = outbuf . data ( ) ;
size_t avail_out = outbuf . size ( ) ;
2017-12-29 20:42:14 +00:00
auto ret = BrotliDecoderDecompressStream ( s ,
& avail_in , & next_in ,
& avail_out , & next_out ,
nullptr ) ;
switch ( ret ) {
case BROTLI_DECODER_RESULT_ERROR :
throw CompressionError ( " error while decompressing brotli file " ) ;
case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT :
2018-03-16 15:59:31 +00:00
if ( eof )
throw CompressionError ( " incomplete or corrupt brotli file " ) ;
break ;
2017-12-29 20:42:14 +00:00
case BROTLI_DECODER_RESULT_SUCCESS :
if ( avail_in ! = 0 )
throw CompressionError ( " unexpected input after brotli decompression " ) ;
break ;
case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT :
// I'm not sure if this can happen, but abort if this happens with empty buffer
2018-03-16 15:59:31 +00:00
if ( avail_out = = outbuf . size ( ) )
2017-12-29 20:42:14 +00:00
throw CompressionError ( " brotli decompression requires larger buffer " ) ;
break ;
}
// Always ensure we have full buffer for next invocation
2018-03-16 15:59:31 +00:00
if ( avail_out < outbuf . size ( ) )
sink ( ( unsigned char * ) outbuf . data ( ) , outbuf . size ( ) - avail_out ) ;
2017-12-29 20:42:14 +00:00
2018-03-16 15:59:31 +00:00
if ( ret = = BROTLI_DECODER_RESULT_SUCCESS ) return ;
2017-12-29 20:42:14 +00:00
}
# endif // HAVE_BROTLI
2017-03-13 13:40:15 +00:00
}
2018-03-16 15:59:31 +00:00
ref < std : : string > decompress ( const std : : string & method , const std : : string & in )
2016-05-04 13:46:25 +00:00
{
2018-03-16 15:59:31 +00:00
StringSource source ( in ) ;
StringSink sink ;
decompress ( method , source , sink ) ;
return sink . s ;
2016-05-04 13:46:25 +00:00
}
2018-03-16 15:59:31 +00:00
void decompress ( const std : : string & method , Source & source , Sink & sink )
2016-04-29 15:02:57 +00:00
{
if ( method = = " none " )
2018-03-16 15:59:31 +00:00
return decompressNone ( source , sink ) ;
2016-04-29 15:02:57 +00:00
else if ( method = = " xz " )
2018-03-16 15:59:31 +00:00
return decompressXZ ( source , sink ) ;
2016-04-29 15:43:37 +00:00
else if ( method = = " bzip2 " )
2018-03-16 15:59:31 +00:00
return decompressBzip2 ( source , sink ) ;
2017-03-13 13:40:15 +00:00
else if ( method = = " br " )
2018-03-16 15:59:31 +00:00
return decompressBrotli ( source , sink ) ;
2016-04-29 15:02:57 +00:00
else
2018-03-16 15:59:31 +00:00
throw UnknownCompressionMethod ( " unknown compression method '%s' " , method ) ;
2016-04-29 15:02:57 +00:00
}
2016-05-04 13:46:25 +00:00
struct NoneSink : CompressionSink
{
Sink & nextSink ;
NoneSink ( Sink & nextSink ) : nextSink ( nextSink ) { }
void finish ( ) override { flush ( ) ; }
void write ( const unsigned char * data , size_t len ) override { nextSink ( data , len ) ; }
} ;
struct XzSink : CompressionSink
{
Sink & nextSink ;
uint8_t outbuf [ BUFSIZ ] ;
lzma_stream strm = LZMA_STREAM_INIT ;
bool finished = false ;
2018-02-11 19:23:31 +00:00
template < typename F >
XzSink ( Sink & nextSink , F & & initEncoder ) : nextSink ( nextSink ) {
lzma_ret ret = initEncoder ( ) ;
2016-05-04 13:46:25 +00:00
if ( ret ! = LZMA_OK )
2017-03-21 18:23:07 +00:00
throw CompressionError ( " unable to initialise lzma encoder " ) ;
2016-05-04 13:46:25 +00:00
// FIXME: apply the x86 BCJ filter?
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
2018-02-11 19:23:31 +00:00
XzSink ( Sink & nextSink ) : XzSink ( nextSink , [ this ] ( ) {
return lzma_easy_encoder ( & strm , 6 , LZMA_CHECK_CRC64 ) ;
} ) { }
2016-05-04 13:46:25 +00:00
~ XzSink ( )
{
lzma_end ( & strm ) ;
}
void finish ( ) override
{
CompressionSink : : flush ( ) ;
assert ( ! finished ) ;
finished = true ;
while ( true ) {
checkInterrupt ( ) ;
lzma_ret ret = lzma_code ( & strm , LZMA_FINISH ) ;
if ( ret ! = LZMA_OK & & ret ! = LZMA_STREAM_END )
2017-03-21 18:23:07 +00:00
throw CompressionError ( " error while flushing xz file " ) ;
2016-05-04 13:46:25 +00:00
if ( strm . avail_out = = 0 | | ret = = LZMA_STREAM_END ) {
nextSink ( outbuf , sizeof ( outbuf ) - strm . avail_out ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
if ( ret = = LZMA_STREAM_END ) break ;
}
}
void write ( const unsigned char * data , size_t len ) override
{
assert ( ! finished ) ;
strm . next_in = data ;
strm . avail_in = len ;
while ( strm . avail_in ) {
checkInterrupt ( ) ;
lzma_ret ret = lzma_code ( & strm , LZMA_RUN ) ;
if ( ret ! = LZMA_OK )
2017-03-21 18:23:07 +00:00
throw CompressionError ( " error while compressing xz file " ) ;
2016-05-04 13:46:25 +00:00
if ( strm . avail_out = = 0 ) {
nextSink ( outbuf , sizeof ( outbuf ) ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
}
}
} ;
2018-02-11 19:23:31 +00:00
# ifdef HAVE_LZMA_MT
struct ParallelXzSink : public XzSink
{
ParallelXzSink ( Sink & nextSink ) : XzSink ( nextSink , [ this ] ( ) {
lzma_mt mt_options = { } ;
mt_options . flags = 0 ;
mt_options . timeout = 300 ; // Using the same setting as the xz cmd line
mt_options . preset = LZMA_PRESET_DEFAULT ;
mt_options . filters = NULL ;
mt_options . check = LZMA_CHECK_CRC64 ;
mt_options . threads = lzma_cputhreads ( ) ;
mt_options . block_size = 0 ;
if ( mt_options . threads = = 0 )
mt_options . threads = 1 ;
// FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
// number of threads.
return lzma_stream_encoder_mt ( & strm , & mt_options ) ;
} ) { }
} ;
# endif
2016-05-04 13:46:25 +00:00
struct BzipSink : CompressionSink
{
Sink & nextSink ;
char outbuf [ BUFSIZ ] ;
bz_stream strm ;
bool finished = false ;
BzipSink ( Sink & nextSink ) : nextSink ( nextSink )
{
memset ( & strm , 0 , sizeof ( strm ) ) ;
int ret = BZ2_bzCompressInit ( & strm , 9 , 0 , 30 ) ;
if ( ret ! = BZ_OK )
2017-03-21 18:23:07 +00:00
throw CompressionError ( " unable to initialise bzip2 encoder " ) ;
2016-05-04 13:46:25 +00:00
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
~ BzipSink ( )
{
BZ2_bzCompressEnd ( & strm ) ;
}
void finish ( ) override
{
flush ( ) ;
assert ( ! finished ) ;
finished = true ;
while ( true ) {
checkInterrupt ( ) ;
int ret = BZ2_bzCompress ( & strm , BZ_FINISH ) ;
if ( ret ! = BZ_FINISH_OK & & ret ! = BZ_STREAM_END )
2017-03-21 18:23:07 +00:00
throw CompressionError ( " error while flushing bzip2 file " ) ;
2016-05-04 13:46:25 +00:00
if ( strm . avail_out = = 0 | | ret = = BZ_STREAM_END ) {
nextSink ( ( unsigned char * ) outbuf , sizeof ( outbuf ) - strm . avail_out ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
if ( ret = = BZ_STREAM_END ) break ;
}
}
void write ( const unsigned char * data , size_t len ) override
2018-05-02 10:54:30 +00:00
{
/* Bzip2's 'avail_in' parameter is an unsigned int, so we need
to split the input into chunks of at most 4 GiB . */
while ( len ) {
auto n = std : : min ( ( size_t ) std : : numeric_limits < decltype ( strm . avail_in ) > : : max ( ) , len ) ;
writeInternal ( data , n ) ;
data + = n ;
len - = n ;
}
}
void writeInternal ( const unsigned char * data , size_t len )
2016-05-04 13:46:25 +00:00
{
assert ( ! finished ) ;
2018-05-02 10:54:30 +00:00
assert ( len < = std : : numeric_limits < decltype ( strm . avail_in ) > : : max ( ) ) ;
2016-05-04 13:46:25 +00:00
strm . next_in = ( char * ) data ;
strm . avail_in = len ;
while ( strm . avail_in ) {
checkInterrupt ( ) ;
int ret = BZ2_bzCompress ( & strm , BZ_RUN ) ;
if ( ret ! = BZ_OK )
2017-03-21 18:23:07 +00:00
CompressionError ( " error while compressing bzip2 file " ) ;
2016-05-04 13:46:25 +00:00
if ( strm . avail_out = = 0 ) {
nextSink ( ( unsigned char * ) outbuf , sizeof ( outbuf ) ) ;
strm . next_out = outbuf ;
strm . avail_out = sizeof ( outbuf ) ;
}
}
}
} ;
2017-12-29 20:42:14 +00:00
struct LambdaCompressionSink : CompressionSink
2017-03-14 14:03:53 +00:00
{
Sink & nextSink ;
std : : string data ;
2017-12-29 20:42:14 +00:00
using CompressFnTy = std : : function < std : : string ( const std : : string & ) > ;
CompressFnTy compressFn ;
LambdaCompressionSink ( Sink & nextSink , CompressFnTy compressFn )
: nextSink ( nextSink )
, compressFn ( std : : move ( compressFn ) )
{
} ;
void finish ( ) override
{
flush ( ) ;
nextSink ( compressFn ( data ) ) ;
}
void write ( const unsigned char * data , size_t len ) override
{
checkInterrupt ( ) ;
this - > data . append ( ( const char * ) data , len ) ;
}
} ;
struct BrotliCmdSink : LambdaCompressionSink
{
BrotliCmdSink ( Sink & nextSink )
: LambdaCompressionSink ( nextSink , [ ] ( const std : : string & data ) {
return runProgram ( BROTLI , true , { } , data ) ;
} )
{
}
} ;
# if HAVE_BROTLI
struct BrotliSink : CompressionSink
{
Sink & nextSink ;
uint8_t outbuf [ BUFSIZ ] ;
BrotliEncoderState * state ;
bool finished = false ;
2017-03-14 14:03:53 +00:00
BrotliSink ( Sink & nextSink ) : nextSink ( nextSink )
{
2017-12-29 20:42:14 +00:00
state = BrotliEncoderCreateInstance ( nullptr , nullptr , nullptr ) ;
if ( ! state )
throw CompressionError ( " unable to initialise brotli encoder " ) ;
2017-03-14 14:03:53 +00:00
}
~ BrotliSink ( )
{
2017-12-29 20:42:14 +00:00
BrotliEncoderDestroyInstance ( state ) ;
2017-03-14 14:03:53 +00:00
}
void finish ( ) override
{
flush ( ) ;
2017-12-29 20:42:14 +00:00
assert ( ! finished ) ;
const uint8_t * next_in = nullptr ;
size_t avail_in = 0 ;
uint8_t * next_out = outbuf ;
size_t avail_out = sizeof ( outbuf ) ;
while ( ! finished ) {
checkInterrupt ( ) ;
if ( ! BrotliEncoderCompressStream ( state ,
BROTLI_OPERATION_FINISH ,
& avail_in , & next_in ,
& avail_out , & next_out ,
nullptr ) )
throw CompressionError ( " error while finishing brotli file " ) ;
finished = BrotliEncoderIsFinished ( state ) ;
if ( avail_out = = 0 | | finished ) {
nextSink ( outbuf , sizeof ( outbuf ) - avail_out ) ;
next_out = outbuf ;
avail_out = sizeof ( outbuf ) ;
}
}
2017-03-14 14:03:53 +00:00
}
void write ( const unsigned char * data , size_t len ) override
{
2017-12-29 20:42:14 +00:00
// Don't feed brotli too much at once
const size_t CHUNK_SIZE = sizeof ( outbuf ) < < 2 ;
while ( len ) {
size_t n = std : : min ( CHUNK_SIZE , len ) ;
writeInternal ( data , n ) ;
data + = n ;
len - = n ;
}
}
2018-05-02 10:54:30 +00:00
2017-12-29 20:42:14 +00:00
void writeInternal ( const unsigned char * data , size_t len )
{
assert ( ! finished ) ;
const uint8_t * next_in = data ;
size_t avail_in = len ;
uint8_t * next_out = outbuf ;
size_t avail_out = sizeof ( outbuf ) ;
while ( avail_in > 0 ) {
checkInterrupt ( ) ;
if ( ! BrotliEncoderCompressStream ( state ,
BROTLI_OPERATION_PROCESS ,
& avail_in , & next_in ,
& avail_out , & next_out ,
nullptr ) )
throw CompressionError ( " error while compressing brotli file " ) ;
if ( avail_out < sizeof ( outbuf ) | | avail_in = = 0 ) {
nextSink ( outbuf , sizeof ( outbuf ) - avail_out ) ;
next_out = outbuf ;
avail_out = sizeof ( outbuf ) ;
}
}
2017-03-14 14:03:53 +00:00
}
} ;
2017-12-29 20:42:14 +00:00
# endif // HAVE_BROTLI
2017-03-14 14:03:53 +00:00
2018-02-07 16:54:08 +00:00
ref < CompressionSink > makeCompressionSink ( const std : : string & method , Sink & nextSink , const bool parallel )
2016-04-29 15:02:57 +00:00
{
2018-02-11 19:23:31 +00:00
if ( parallel ) {
# ifdef HAVE_LZMA_MT
if ( method = = " xz " )
return make_ref < ParallelXzSink > ( nextSink ) ;
# endif
2018-02-11 18:47:42 +00:00
printMsg ( lvlError , format ( " Warning: parallel compression requested but not supported for method '%1%', falling back to single-threaded compression " ) % method ) ;
2018-02-11 19:23:31 +00:00
}
2018-02-11 18:47:42 +00:00
2016-04-29 15:02:57 +00:00
if ( method = = " none " )
2016-05-04 13:46:25 +00:00
return make_ref < NoneSink > ( nextSink ) ;
2016-04-29 15:02:57 +00:00
else if ( method = = " xz " )
2018-02-11 19:23:31 +00:00
return make_ref < XzSink > ( nextSink ) ;
2016-04-29 15:43:37 +00:00
else if ( method = = " bzip2 " )
2016-05-04 13:46:25 +00:00
return make_ref < BzipSink > ( nextSink ) ;
2017-03-14 14:03:53 +00:00
else if ( method = = " br " )
2017-12-29 20:42:14 +00:00
# if HAVE_BROTLI
2017-03-14 14:03:53 +00:00
return make_ref < BrotliSink > ( nextSink ) ;
2017-12-29 20:42:14 +00:00
# else
return make_ref < BrotliCmdSink > ( nextSink ) ;
# endif
2016-04-29 15:02:57 +00:00
else
2017-07-30 11:27:57 +00:00
throw UnknownCompressionMethod ( format ( " unknown compression method '%s' " ) % method ) ;
2016-04-29 15:02:57 +00:00
}
2018-03-16 15:59:31 +00:00
ref < std : : string > compress ( const std : : string & method , const std : : string & in , const bool parallel )
{
StringSink ssink ;
auto sink = makeCompressionSink ( method , ssink , parallel ) ;
( * sink ) ( in ) ;
sink - > finish ( ) ;
return ssink . s ;
}
2015-10-30 11:33:40 +00:00
}