From 4911a10a4e51102a21a5d123a852c75d2ec92dbc Mon Sep 17 00:00:00 2001
From: Eelco Dolstra <eelco.dolstra@logicblox.com>
Date: Fri, 29 Jun 2012 14:26:31 -0400
Subject: [PATCH] Use XZ compression in binary caches

XZ compresses significantly better than bzip2.  Here are the
compression ratios and execution times (using 4 cores in parallel) on
my /var/run/current-system (3.1 GiB):

  bzip2: total compressed size 849.56 MiB, 30.8% [2m08]
  xz -6: total compressed size 641.84 MiB, 23.4% [6m53]
  xz -7: total compressed size 621.82 MiB, 22.6% [7m19]
  xz -8: total compressed size 599.33 MiB, 21.8% [7m18]
  xz -9: total compressed size 588.18 MiB, 21.4% [7m40]

Note that compression takes much longer.  More importantly, however,
decompression is much faster:

  bzip2: 1m47.274s
  xz -6: 0m55.446s
  xz -7: 0m54.119s
  xz -8: 0m52.388s
  xz -9: 0m51.842s

The only downside to using -9 is that decompression takes a fair
amount (~65 MB) of memory.
---
 configure.ac              |  1 +
 corepkgs/config.nix.in    |  3 ++-
 corepkgs/nar.nix          |  9 +++++----
 perl/lib/Nix/Config.pm.in |  3 ++-
 scripts/nix-push.in       | 26 +++++++++++++-------------
 substitute.mk             |  1 +
 6 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7b814dedc..28959198d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -161,6 +161,7 @@ NEED_PROG(perl, perl)
 NEED_PROG(sed, sed)
 NEED_PROG(tar, tar)
 NEED_PROG(bzip2, bzip2)
+NEED_PROG(xz, xz)
 AC_PATH_PROG(dot, dot)
 AC_PATH_PROG(dblatex, dblatex)
 AC_PATH_PROG(gzip, gzip)
diff --git a/corepkgs/config.nix.in b/corepkgs/config.nix.in
index b324d732a..1be4bd090 100644
--- a/corepkgs/config.nix.in
+++ b/corepkgs/config.nix.in
@@ -6,7 +6,8 @@ in {
   perl = "@perl@";
   shell = "@shell@";
   coreutils = "@coreutils@";
-  bzip2 = fromEnv "NIX_BZIP2" "@bzip2@";
+  bzip2 = "@bzip2@";
+  xz = "@xz@";
   tar = "@tar@";
   tr = "@tr@";
   nixBinDir = fromEnv "NIX_BIN_DIR" "@bindir@";
diff --git a/corepkgs/nar.nix b/corepkgs/nar.nix
index 4747dc31d..c5155dcc9 100644
--- a/corepkgs/nar.nix
+++ b/corepkgs/nar.nix
@@ -8,14 +8,15 @@ let
 
       echo "packing ‘$storePath’..."
       mkdir $out
-      dst=$out/tmp.nar.bz2
+      dst=$out/tmp.nar.xz
 
       set -o pipefail
-      nix-store --dump "$storePath" | ${bzip2} > $dst
+      nix-store --dump "$storePath" | ${xz} -9 > $dst
 
-      nix-hash --flat --type $hashAlgo --base32 $dst > $out/narbz2-hash
+      hash=$(nix-hash --flat --type $hashAlgo --base32 $dst)
+      echo -n $hash > $out/nar-compressed-hash
 
-      mv $out/tmp.nar.bz2 $out/$(cat $out/narbz2-hash).nar.bz2
+      mv $dst $out/$hash.nar.xz
     '';
 
 in
diff --git a/perl/lib/Nix/Config.pm.in b/perl/lib/Nix/Config.pm.in
index b6d346651..5adc1ffba 100644
--- a/perl/lib/Nix/Config.pm.in
+++ b/perl/lib/Nix/Config.pm.in
@@ -7,7 +7,8 @@ $manifestDir = $ENV{"NIX_MANIFESTS_DIR"} || "@localstatedir@/nix/manifests";
 $logDir = $ENV{"NIX_LOG_DIR"} || "@localstatedir@/log/nix";
 $confDir = $ENV{"NIX_CONF_DIR"} || "@sysconfdir@/nix";
 
-$bzip2 = $ENV{"NIX_BZIP2"} || "@bzip2@";
+$bzip2 = "@bzip2@";
+$xz = "@xz@";
 $curl = "@curl@";
 
 $useBindings = "@perlbindings@" eq "yes";
diff --git a/scripts/nix-push.in b/scripts/nix-push.in
index db94b51fd..9edd87319 100755
--- a/scripts/nix-push.in
+++ b/scripts/nix-push.in
@@ -118,7 +118,7 @@ close READ or die "nix-build failed: $?";
 print STDERR "uploading/copying archives...\n";
 
 my $totalNarSize = 0;
-my $totalNarBz2Size = 0;
+my $totalCompressedSize = 0;
 
 for (my $n = 0; $n < scalar @storePaths; $n++) {
     my $storePath = $storePaths[$n];
@@ -146,22 +146,22 @@ for (my $n = 0; $n < scalar @storePaths; $n++) {
     $totalNarSize += $narSize;
     
     # Get info about the compressed NAR.
-    open HASH, "$narDir/narbz2-hash" or die "cannot open narbz2-hash";
-    my $narBz2Hash = <HASH>;
-    chomp $narBz2Hash;
-    $narBz2Hash =~ /^[0-9a-z]+$/ or die "invalid hash";
+    open HASH, "$narDir/nar-compressed-hash" or die "cannot open nar-compressed-hash";
+    my $compressedHash = <HASH>;
+    chomp $compressedHash;
+    $compressedHash =~ /^[0-9a-z]+$/ or die "invalid hash";
     close HASH;
 
-    my $narName = "$narBz2Hash.nar.bz2";
+    my $narName = "$compressedHash.nar.bz2";
 
     my $narFile = "$narDir/$narName";
     (-f $narFile) or die "NAR file for $storePath not found";
 
-    my $narBz2Size = stat($narFile)->size;    
-    $totalNarBz2Size += $narBz2Size;
+    my $compressedSize = stat($narFile)->size;    
+    $totalCompressedSize += $compressedSize;
 
     printf STDERR "%s [%.2f MiB, %.1f%%]\n", $storePath,
-        $narBz2Size / (1024 * 1024), $narBz2Size / $narSize * 100;
+        $compressedSize / (1024 * 1024), $compressedSize / $narSize * 100;
 
     # Upload the compressed NAR.
     if ($localCopy) {
@@ -184,13 +184,13 @@ for (my $n = 0; $n < scalar @storePaths; $n++) {
     my $info;
     $info .= "StorePath: $storePath\n";
     $info .= "URL: $narName\n";
-    $info .= "CompressedHash: sha256:$narBz2Hash\n";
-    $info .= "CompressedSize: $narBz2Size\n";
+    $info .= "CompressedHash: sha256:$compressedHash\n";
+    $info .= "CompressedSize: $compressedSize\n";
     $info .= "NarHash: $narHash\n";
     $info .= "NarSize: $narSize\n";
     $info .= "References: " . join(" ", map { basename $_ } @{$refs}) . "\n";
     if (defined $deriver) {
-        $info .= "Deriver: " . basename $deriver, "\n";
+        $info .= "Deriver: " . basename $deriver . "\n";
         if (isValidPath($deriver)) {
             my $drv = derivationFromPath($deriver);
             $info .= "System: $drv->{platform}\n";
@@ -214,4 +214,4 @@ for (my $n = 0; $n < scalar @storePaths; $n++) {
 }
 
 printf STDERR "total compressed size %.2f MiB, %.1f%%\n",
-    $totalNarBz2Size / (1024 * 1024), $totalNarBz2Size / $totalNarSize * 100;
+    $totalCompressedSize / (1024 * 1024), $totalCompressedSize / $totalNarSize * 100;
diff --git a/substitute.mk b/substitute.mk
index eb489c97a..77c5afc28 100644
--- a/substitute.mk
+++ b/substitute.mk
@@ -16,6 +16,7 @@
 	 -e "s^@shell\@^$(bash)^g" \
 	 -e "s^@curl\@^$(curl)^g" \
 	 -e "s^@bzip2\@^$(bzip2)^g" \
+	 -e "s^@xz\@^$(xz)^g" \
 	 -e "s^@perl\@^$(perl)^g" \
 	 -e "s^@perlFlags\@^$(perlFlags)^g" \
 	 -e "s^@coreutils\@^$(coreutils)^g" \