bae75ca5a1
path can be created by copying it from another location in the file system. This is useful in the NixOS installation.
403 lines
11 KiB
Perl
Executable file
403 lines
11 KiB
Perl
Executable file
#! @perl@ -w -I@libexecdir@/nix
|
|
|
|
use strict;
|
|
use File::Temp qw(tempdir);
|
|
use readmanifest;
|
|
|
|
|
|
# Some hard-coded options.
|
|
my $maxNarSize = 100 * 1024 * 1024; # max size of NAR archives to generate patches for
|
|
my $maxPatchFraction = 0.60; # if patch is bigger than this fraction of full archive, reject
|
|
|
|
|
|
die unless scalar @ARGV == 5;
|
|
|
|
my $hashAlgo = "sha256";
|
|
|
|
my $cacheDir = $ARGV[0];
|
|
my $patchesDir = $ARGV[1];
|
|
my $patchesURL = $ARGV[2];
|
|
my $srcDir = $ARGV[3];
|
|
my $dstDir = $ARGV[4];
|
|
|
|
my $tmpDir = tempdir("nix-generate-patches.XXXXXX", CLEANUP => 1, TMPDIR => 1)
|
|
or die "cannot create a temporary directory";
|
|
|
|
print "TEMP = $tmpDir\n";
|
|
|
|
#END { rmdir $tmpDir; }
|
|
|
|
my %srcNarFiles;
|
|
my %srcLocalPaths;
|
|
my %srcPatches;
|
|
|
|
my %dstNarFiles;
|
|
my %dstLocalPaths;
|
|
my %dstPatches;
|
|
|
|
readManifest "$srcDir/MANIFEST",
|
|
\%srcNarFiles, \%srcLocalPaths, \%srcPatches;
|
|
|
|
readManifest "$dstDir/MANIFEST",
|
|
\%dstNarFiles, \%dstLocalPaths, \%dstPatches;
|
|
|
|
|
|
sub findOutputPaths {
|
|
my $narFiles = shift;
|
|
|
|
my %outPaths;
|
|
|
|
foreach my $p (keys %{$narFiles}) {
|
|
|
|
# Ignore store expressions.
|
|
next if ($p =~ /\.store$/);
|
|
next if ($p =~ /\.drv$/);
|
|
|
|
# Ignore builders (too much ambiguity -- they're all called
|
|
# `builder.sh').
|
|
next if ($p =~ /\.sh$/);
|
|
next if ($p =~ /\.patch$/);
|
|
|
|
# Don't bother including tar files etc.
|
|
next if ($p =~ /\.tar\.(gz|bz2)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/);
|
|
|
|
$outPaths{$p} = 1;
|
|
}
|
|
|
|
return %outPaths;
|
|
}
|
|
|
|
print "finding src output paths...\n";
|
|
my %srcOutPaths = findOutputPaths \%srcNarFiles;
|
|
|
|
print "finding dst output paths...\n";
|
|
my %dstOutPaths = findOutputPaths \%dstNarFiles;
|
|
|
|
|
|
sub getNameVersion {
|
|
my $p = shift;
|
|
$p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/;
|
|
my $name = $1;
|
|
my $version = $2;
|
|
$name =~ s/^-//;
|
|
$version =~ s/^-//;
|
|
return ($name, $version);
|
|
}
|
|
|
|
|
|
# A quick hack to get a measure of the `distance' between two
|
|
# versions: it's just the position of the first character that differs
|
|
# (or 999 if they are the same).
|
|
sub versionDiff {
|
|
my $s = shift;
|
|
my $t = shift;
|
|
my $i;
|
|
return 999 if $s eq $t;
|
|
for ($i = 0; $i < length $s; $i++) {
|
|
return $i if $i >= length $t or
|
|
substr($s, $i, 1) ne substr($t, $i, 1);
|
|
}
|
|
return $i;
|
|
}
|
|
|
|
|
|
sub getNarBz2 {
|
|
my $narFiles = shift;
|
|
my $storePath = shift;
|
|
|
|
my $narFileList = $$narFiles{$storePath};
|
|
die "missing store expression $storePath" unless defined $narFileList;
|
|
|
|
my $narFile = @{$narFileList}[0];
|
|
die unless defined $narFile;
|
|
|
|
$narFile->{url} =~ /\/([^\/]+)$/;
|
|
die unless defined $1;
|
|
return "$cacheDir/$1";
|
|
}
|
|
|
|
|
|
sub containsPatch {
|
|
my $patches = shift;
|
|
my $storePath = shift;
|
|
my $basePath = shift;
|
|
my $patchList = $$patches{$storePath};
|
|
return 0 if !defined $patchList;
|
|
my $found = 0;
|
|
foreach my $patch (@{$patchList}) {
|
|
# !!! baseHash might differ
|
|
return 1 if $patch->{basePath} eq $basePath;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
# Compute the "weighted" number of uses of a path in the build graph.
|
|
sub computeUses {
|
|
my $narFiles = shift;
|
|
my $path = shift;
|
|
|
|
# Find the deriver of $path.
|
|
return 1 unless defined $$narFiles{$path};
|
|
my $deriver = @{$$narFiles{$path}}[0]->{deriver};
|
|
return 1 unless defined $deriver && $deriver ne "";
|
|
|
|
# print " DERIVER $deriver\n";
|
|
|
|
# Optimisation: build the referrers graph from the references
|
|
# graph.
|
|
my %referrers;
|
|
foreach my $q (keys %{$narFiles}) {
|
|
my @refs = split " ", @{$$narFiles{$q}}[0]->{references};
|
|
foreach my $r (@refs) {
|
|
$referrers{$r} = [] unless defined $referrers{$r};
|
|
push @{$referrers{$r}}, $q;
|
|
}
|
|
}
|
|
|
|
# Determine the shortest path from $deriver to all other reachable
|
|
# paths in the `referrers' graph.
|
|
|
|
my %dist;
|
|
$dist{$deriver} = 0;
|
|
|
|
my @queue = ($deriver);
|
|
my $pos = 0;
|
|
|
|
while ($pos < scalar @queue) {
|
|
my $p = $queue[$pos];
|
|
$pos++;
|
|
|
|
foreach my $q (@{$referrers{$p}}) {
|
|
if (!defined $dist{$q}) {
|
|
$dist{$q} = $dist{$p} + 1;
|
|
# print " $q $dist{$q}\n";
|
|
push @queue, $q;
|
|
}
|
|
}
|
|
}
|
|
|
|
my $wuse = 1.0;
|
|
foreach my $user (keys %dist) {
|
|
next if $user eq $deriver;
|
|
# print " $user $dist{$user}\n";
|
|
$wuse += 1.0 / 2.0**$dist{$user};
|
|
}
|
|
|
|
# print " XXX $path $wuse\n";
|
|
|
|
return $wuse;
|
|
}
|
|
|
|
|
|
# For each output path in the destination, see if we need to / can
|
|
# create a patch.
|
|
|
|
print "creating patches...\n";
|
|
|
|
foreach my $p (keys %dstOutPaths) {
|
|
|
|
# If exactly the same path already exists in the source, skip it.
|
|
next if defined $srcOutPaths{$p};
|
|
|
|
print " $p\n";
|
|
|
|
# If not, then we should find the paths in the source that are
|
|
# `most' likely to be present on a system that wants to install
|
|
# this path.
|
|
|
|
(my $name, my $version) = getNameVersion $p;
|
|
|
|
my @closest = ();
|
|
my $closestVersion;
|
|
my $minDist = -1; # actually, larger means closer
|
|
|
|
# Find all source paths with the same name.
|
|
|
|
foreach my $q (keys %srcOutPaths) {
|
|
(my $name2, my $version2) = getNameVersion $q;
|
|
if ($name eq $name2) {
|
|
|
|
# If the sizes differ too much, then skip. This
|
|
# disambiguates between, e.g., a real component and a
|
|
# wrapper component (cf. Firefox in Nixpkgs).
|
|
my $srcSize = @{$srcNarFiles{$q}}[0]->{size};
|
|
my $dstSize = @{$dstNarFiles{$p}}[0]->{size};
|
|
my $ratio = $srcSize / $dstSize;
|
|
$ratio = 1 / $ratio if $ratio < 1;
|
|
# print " SIZE $srcSize $dstSize $ratio $q\n";
|
|
|
|
if ($ratio >= 3) {
|
|
print " SKIPPING $q due to size ratio $ratio ($srcSize $dstSize)\n";
|
|
next;
|
|
}
|
|
|
|
# If the numbers of weighted uses differ too much, then
|
|
# skip. This disambiguates between, e.g., the bootstrap
|
|
# GCC and the final GCC in Nixpkgs.
|
|
my $srcUses = computeUses \%srcNarFiles, $q;
|
|
my $dstUses = computeUses \%dstNarFiles, $p;
|
|
$ratio = $srcUses / $dstUses;
|
|
$ratio = 1 / $ratio if $ratio < 1;
|
|
print " USE $srcUses $dstUses $ratio $q\n";
|
|
|
|
# if ($ratio >= 2) {
|
|
# print " SKIPPING $q due to use ratio $ratio ($srcUses $dstUses)\n";
|
|
# next;
|
|
# }
|
|
|
|
# If there are multiple matching names, include the ones
|
|
# with the closest version numbers.
|
|
my $dist = versionDiff $version, $version2;
|
|
if ($dist > $minDist) {
|
|
$minDist = $dist;
|
|
@closest = ($q);
|
|
$closestVersion = $version2;
|
|
} elsif ($dist == $minDist) {
|
|
push @closest, $q;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (scalar(@closest) == 0) {
|
|
print " NO BASE: $p\n";
|
|
next;
|
|
}
|
|
|
|
foreach my $closest (@closest) {
|
|
|
|
# Generate a patch between $closest and $p.
|
|
print " $p <- $closest\n";
|
|
|
|
# If the patch already exists, skip it.
|
|
if (containsPatch(\%srcPatches, $p, $closest) ||
|
|
containsPatch(\%dstPatches, $p, $closest))
|
|
{
|
|
print " skipping, already exists\n";
|
|
next;
|
|
}
|
|
|
|
# next;
|
|
|
|
my $srcNarBz2 = getNarBz2 \%srcNarFiles, $closest;
|
|
my $dstNarBz2 = getNarBz2 \%dstNarFiles, $p;
|
|
|
|
system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0
|
|
or die "cannot unpack $srcNarBz2";
|
|
|
|
if ((stat "$tmpDir/A")[7] >= $maxNarSize) {
|
|
print " skipping, source is too large\n";
|
|
next;
|
|
}
|
|
|
|
system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0
|
|
or die "cannot unpack $dstNarBz2";
|
|
|
|
if ((stat "$tmpDir/B")[7] >= $maxNarSize) {
|
|
print " skipping, destination is too large\n";
|
|
next;
|
|
}
|
|
|
|
system("@libexecdir@/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF") == 0
|
|
or die "cannot compute binary diff";
|
|
|
|
my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die;
|
|
chomp $baseHash;
|
|
|
|
my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die;
|
|
chomp $narHash;
|
|
|
|
my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die;
|
|
chomp $narDiffHash;
|
|
|
|
my $narDiffSize = (stat "$tmpDir/DIFF")[7];
|
|
my $dstNarBz2Size = (stat $dstNarBz2)[7];
|
|
|
|
print " size $narDiffSize; full size $dstNarBz2Size\n";
|
|
|
|
if ($narDiffSize >= $dstNarBz2Size) {
|
|
print " rejecting; patch bigger than full archive\n";
|
|
next;
|
|
}
|
|
|
|
if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) {
|
|
print " rejecting; patch too large relative to full archive\n";
|
|
next;
|
|
}
|
|
|
|
my $finalName =
|
|
"$narDiffHash.nar-bsdiff";
|
|
|
|
if (-e "$patchesDir/$finalName") {
|
|
print " not copying, already exists\n";
|
|
}
|
|
|
|
else {
|
|
|
|
system("cp '$tmpDir/DIFF' '$patchesDir/$finalName.tmp'") == 0
|
|
or die "cannot copy diff";
|
|
|
|
rename("$patchesDir/$finalName.tmp", "$patchesDir/$finalName")
|
|
or die "cannot rename $patchesDir/$finalName.tmp";
|
|
|
|
}
|
|
|
|
# Add the patch to the manifest.
|
|
addPatch \%dstPatches, $p,
|
|
{ url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash"
|
|
, size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash"
|
|
, narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff"
|
|
}, 0;
|
|
}
|
|
}
|
|
|
|
|
|
# Add in any potentially useful patches in the source (namely, those
|
|
# patches that produce either paths in the destination or paths that
|
|
# can be used as the base for other useful patches).
|
|
|
|
print "propagating patches...\n";
|
|
|
|
my $changed;
|
|
do {
|
|
# !!! we repeat this to reach the transitive closure; inefficient
|
|
$changed = 0;
|
|
|
|
print "loop\n";
|
|
|
|
my %dstBasePaths;
|
|
foreach my $q (keys %dstPatches) {
|
|
foreach my $patch (@{$dstPatches{$q}}) {
|
|
$dstBasePaths{$patch->{basePath}} = 1;
|
|
}
|
|
}
|
|
|
|
foreach my $p (keys %srcPatches) {
|
|
my $patchList = $srcPatches{$p};
|
|
|
|
my $include = 0;
|
|
|
|
# Is path $p included in the destination? If so, include
|
|
# patches that produce it.
|
|
$include = 1 if defined $dstNarFiles{$p};
|
|
|
|
# Is path $p a path that serves as a base for paths in the
|
|
# destination? If so, include patches that produce it.
|
|
# !!! check baseHash
|
|
$include = 1 if defined $dstBasePaths{$p};
|
|
|
|
if ($include) {
|
|
foreach my $patch (@{$patchList}) {
|
|
$changed = 1 if addPatch \%dstPatches, $p, $patch;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
} while $changed;
|
|
|
|
|
|
# Rewrite the manifest of the destination (with the new patches).
|
|
writeManifest "$dstDir/MANIFEST",
|
|
\%dstNarFiles, \%dstPatches;
|