forked from lix-project/lix
* Move the patch generator into a module.
This commit is contained in:
parent
9737a7eba0
commit
d3bba0c2d8
4 changed files with 345 additions and 437 deletions
335
scripts/GeneratePatches.pm.in
Executable file
335
scripts/GeneratePatches.pm.in
Executable file
|
@ -0,0 +1,335 @@
|
||||||
|
#! @perl@ -w -I@libexecdir@/nix
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use File::Temp qw(tempdir);
|
||||||
|
use readmanifest;
|
||||||
|
|
||||||
|
|
||||||
|
# Some patch generations options.
|
||||||
|
|
||||||
|
# Max size of NAR archives to generate patches for.
|
||||||
|
my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"};
|
||||||
|
$maxNarSize = 160 * 1024 * 1024 if !defined $maxNarSize;
|
||||||
|
|
||||||
|
# If patch is bigger than this fraction of full archive, reject.
|
||||||
|
my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"};
|
||||||
|
$maxPatchFraction = 0.60 if !defined $maxPatchFraction;
|
||||||
|
|
||||||
|
my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"};
|
||||||
|
$timeLimit = 180 if !defined $timeLimit;
|
||||||
|
|
||||||
|
my $hashAlgo = "sha256";
|
||||||
|
|
||||||
|
|
||||||
|
sub findOutputPaths {
|
||||||
|
my $narFiles = shift;
|
||||||
|
|
||||||
|
my %outPaths;
|
||||||
|
|
||||||
|
foreach my $p (keys %{$narFiles}) {
|
||||||
|
|
||||||
|
# Ignore derivations.
|
||||||
|
next if ($p =~ /\.drv$/);
|
||||||
|
|
||||||
|
# Ignore builders (too much ambiguity -- they're all called
|
||||||
|
# `builder.sh').
|
||||||
|
next if ($p =~ /\.sh$/);
|
||||||
|
next if ($p =~ /\.patch$/);
|
||||||
|
|
||||||
|
# Don't bother including tar files etc.
|
||||||
|
next if ($p =~ /\.tar$/ || $p =~ /\.tar\.(gz|bz2|Z|lzma|xz)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/ || $p =~ /\.tgz$/ || $p =~ /\.rpm$/ || $p =~ /cvs-export$/ || $p =~ /fetchhg$/);
|
||||||
|
|
||||||
|
$outPaths{$p} = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return %outPaths;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub getNameVersion {
|
||||||
|
my $p = shift;
|
||||||
|
$p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/;
|
||||||
|
my $name = $1;
|
||||||
|
my $version = $2;
|
||||||
|
return undef unless defined $name && defined $version;
|
||||||
|
$name =~ s/^-//;
|
||||||
|
$version =~ s/^-//;
|
||||||
|
return ($name, $version);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# A quick hack to get a measure of the `distance' between two
|
||||||
|
# versions: it's just the position of the first character that differs
|
||||||
|
# (or 999 if they are the same).
|
||||||
|
sub versionDiff {
|
||||||
|
my $s = shift;
|
||||||
|
my $t = shift;
|
||||||
|
my $i;
|
||||||
|
return 999 if $s eq $t;
|
||||||
|
for ($i = 0; $i < length $s; $i++) {
|
||||||
|
return $i if $i >= length $t or
|
||||||
|
substr($s, $i, 1) ne substr($t, $i, 1);
|
||||||
|
}
|
||||||
|
return $i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub getNarBz2 {
|
||||||
|
my $narPath = shift;
|
||||||
|
my $narFiles = shift;
|
||||||
|
my $storePath = shift;
|
||||||
|
|
||||||
|
my $narFileList = $$narFiles{$storePath};
|
||||||
|
die "missing path $storePath" unless defined $narFileList;
|
||||||
|
|
||||||
|
my $narFile = @{$narFileList}[0];
|
||||||
|
die unless defined $narFile;
|
||||||
|
|
||||||
|
$narFile->{url} =~ /\/([^\/]+)$/;
|
||||||
|
die unless defined $1;
|
||||||
|
return "$narPath/$1";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub containsPatch {
|
||||||
|
my $patches = shift;
|
||||||
|
my $storePath = shift;
|
||||||
|
my $basePath = shift;
|
||||||
|
my $patchList = $$patches{$storePath};
|
||||||
|
return 0 if !defined $patchList;
|
||||||
|
my $found = 0;
|
||||||
|
foreach my $patch (@{$patchList}) {
|
||||||
|
# !!! baseHash might differ
|
||||||
|
return 1 if $patch->{basePath} eq $basePath;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub generatePatches {
|
||||||
|
my ($srcNarFiles, $dstNarFiles, $srcPatches, $dstPatches, $narPath, $patchesPath, $patchesURL, $tmpDir) = @_;
|
||||||
|
|
||||||
|
my %srcOutPaths = findOutputPaths $srcNarFiles;
|
||||||
|
my %dstOutPaths = findOutputPaths $dstNarFiles;
|
||||||
|
|
||||||
|
# For each output path in the destination, see if we need to / can
|
||||||
|
# create a patch.
|
||||||
|
|
||||||
|
print STDERR "creating patches...\n";
|
||||||
|
|
||||||
|
foreach my $p (keys %dstOutPaths) {
|
||||||
|
|
||||||
|
# If exactly the same path already exists in the source, skip it.
|
||||||
|
next if defined $srcOutPaths{$p};
|
||||||
|
|
||||||
|
print " $p\n";
|
||||||
|
|
||||||
|
# If not, then we should find the paths in the source that are
|
||||||
|
# `most' likely to be present on a system that wants to
|
||||||
|
# install this path.
|
||||||
|
|
||||||
|
(my $name, my $version) = getNameVersion $p;
|
||||||
|
next unless defined $name && defined $version;
|
||||||
|
|
||||||
|
my @closest = ();
|
||||||
|
my $closestVersion;
|
||||||
|
my $minDist = -1; # actually, larger means closer
|
||||||
|
|
||||||
|
# Find all source paths with the same name.
|
||||||
|
|
||||||
|
foreach my $q (keys %srcOutPaths) {
|
||||||
|
(my $name2, my $version2) = getNameVersion $q;
|
||||||
|
next unless defined $name2 && defined $version2;
|
||||||
|
|
||||||
|
if ($name eq $name2) {
|
||||||
|
|
||||||
|
my $srcSystem = @{$$dstNarFiles{$p}}[0]->{system};
|
||||||
|
my $dstSystem = @{$$srcNarFiles{$q}}[0]->{system};
|
||||||
|
if (defined $srcSystem && defined $dstSystem && $srcSystem ne $dstSystem) {
|
||||||
|
print " SKIPPING $q due to different systems ($srcSystem vs. $dstSystem)\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
# If the sizes differ too much, then skip. This
|
||||||
|
# disambiguates between, e.g., a real component and a
|
||||||
|
# wrapper component (cf. Firefox in Nixpkgs).
|
||||||
|
my $srcSize = @{$$srcNarFiles{$q}}[0]->{size};
|
||||||
|
my $dstSize = @{$$dstNarFiles{$p}}[0]->{size};
|
||||||
|
my $ratio = $srcSize / $dstSize;
|
||||||
|
$ratio = 1 / $ratio if $ratio < 1;
|
||||||
|
# print " SIZE $srcSize $dstSize $ratio $q\n";
|
||||||
|
|
||||||
|
if ($ratio >= 3) {
|
||||||
|
print " SKIPPING $q due to size ratio $ratio ($srcSize vs. $dstSize)\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
# If there are multiple matching names, include the
|
||||||
|
# ones with the closest version numbers.
|
||||||
|
my $dist = versionDiff $version, $version2;
|
||||||
|
if ($dist > $minDist) {
|
||||||
|
$minDist = $dist;
|
||||||
|
@closest = ($q);
|
||||||
|
$closestVersion = $version2;
|
||||||
|
} elsif ($dist == $minDist) {
|
||||||
|
push @closest, $q;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scalar(@closest) == 0) {
|
||||||
|
print " NO BASE: $p\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach my $closest (@closest) {
|
||||||
|
|
||||||
|
# Generate a patch between $closest and $p.
|
||||||
|
print STDERR " $p <- $closest\n";
|
||||||
|
|
||||||
|
# If the patch already exists, skip it.
|
||||||
|
if (containsPatch($srcPatches, $p, $closest) ||
|
||||||
|
containsPatch($dstPatches, $p, $closest))
|
||||||
|
{
|
||||||
|
print " skipping, already exists\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $srcNarBz2 = getNarBz2 $narPath, $srcNarFiles, $closest;
|
||||||
|
my $dstNarBz2 = getNarBz2 $narPath, $dstNarFiles, $p;
|
||||||
|
|
||||||
|
if (! -f $srcNarBz2) {
|
||||||
|
warn "patch source archive $srcNarBz2 is missing\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0
|
||||||
|
or die "cannot unpack $srcNarBz2";
|
||||||
|
|
||||||
|
if ((stat "$tmpDir/A")[7] >= $maxNarSize) {
|
||||||
|
print " skipping, source is too large\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0
|
||||||
|
or die "cannot unpack $dstNarBz2";
|
||||||
|
|
||||||
|
if ((stat "$tmpDir/B")[7] >= $maxNarSize) {
|
||||||
|
print " skipping, destination is too large\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $time1 = time();
|
||||||
|
my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF");
|
||||||
|
my $time2 = time();
|
||||||
|
if ($res) {
|
||||||
|
warn "binary diff computation aborted after ", $time2 - $time1, " seconds\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die;
|
||||||
|
chomp $baseHash;
|
||||||
|
|
||||||
|
my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die;
|
||||||
|
chomp $narHash;
|
||||||
|
|
||||||
|
my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die;
|
||||||
|
chomp $narDiffHash;
|
||||||
|
|
||||||
|
my $narDiffSize = (stat "$tmpDir/DIFF")[7];
|
||||||
|
my $dstNarBz2Size = (stat $dstNarBz2)[7];
|
||||||
|
|
||||||
|
print " size $narDiffSize; full size $dstNarBz2Size; ", $time2 - $time1, " seconds\n";
|
||||||
|
|
||||||
|
if ($narDiffSize >= $dstNarBz2Size) {
|
||||||
|
print " rejecting; patch bigger than full archive\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) {
|
||||||
|
print " rejecting; patch too large relative to full archive\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $finalName = "$narDiffHash.nar-bsdiff";
|
||||||
|
|
||||||
|
if (-e "$patchesPath/$finalName") {
|
||||||
|
print " not copying, already exists\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
system("cp '$tmpDir/DIFF' '$patchesPath/$finalName.tmp'") == 0
|
||||||
|
or die "cannot copy diff";
|
||||||
|
rename("$patchesPath/$finalName.tmp", "$patchesPath/$finalName")
|
||||||
|
or die "cannot rename $patchesPath/$finalName.tmp";
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add the patch to the manifest.
|
||||||
|
addPatch $dstPatches, $p,
|
||||||
|
{ url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash"
|
||||||
|
, size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash"
|
||||||
|
, narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Propagate useful patches from $srcPatches to $dstPatches. A patch
|
||||||
|
# is useful if it produces either paths in the $dstNarFiles or paths
|
||||||
|
# that can be used as the base for other useful patches.
|
||||||
|
sub propagatePatches {
|
||||||
|
my ($srcPatches, $dstNarFiles, $dstPatches) = @_;
|
||||||
|
|
||||||
|
print STDERR "propagating patches...\n";
|
||||||
|
|
||||||
|
my $changed;
|
||||||
|
do {
|
||||||
|
# !!! we repeat this to reach the transitive closure; inefficient
|
||||||
|
$changed = 0;
|
||||||
|
|
||||||
|
print STDERR "loop\n";
|
||||||
|
|
||||||
|
my %dstBasePaths;
|
||||||
|
foreach my $q (keys %{$dstPatches}) {
|
||||||
|
foreach my $patch (@{$$dstPatches{$q}}) {
|
||||||
|
$dstBasePaths{$patch->{basePath}} = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach my $p (keys %{$srcPatches}) {
|
||||||
|
my $patchList = $$srcPatches{$p};
|
||||||
|
|
||||||
|
my $include = 0;
|
||||||
|
|
||||||
|
# Is path $p included in the destination? If so, include
|
||||||
|
# patches that produce it.
|
||||||
|
$include = 1 if defined $$dstNarFiles{$p};
|
||||||
|
|
||||||
|
# Is path $p a path that serves as a base for paths in the
|
||||||
|
# destination? If so, include patches that produce it.
|
||||||
|
# !!! check baseHash
|
||||||
|
$include = 1 if defined $dstBasePaths{$p};
|
||||||
|
|
||||||
|
if ($include) {
|
||||||
|
foreach my $patch (@{$patchList}) {
|
||||||
|
$changed = 1 if addPatch $dstPatches, $p, $patch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} while $changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Add all new patches in $srcPatches to $dstPatches.
|
||||||
|
sub copyPatches {
|
||||||
|
my ($srcPatches, $dstPatches) = @_;
|
||||||
|
foreach my $p (keys %{$srcPatches}) {
|
||||||
|
addPatch $dstPatches, $p, $_ foreach @{$$srcPatches{$p}};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return 1;
|
|
@ -3,7 +3,7 @@ bin_SCRIPTS = nix-collect-garbage \
|
||||||
nix-install-package nix-channel nix-build \
|
nix-install-package nix-channel nix-build \
|
||||||
nix-copy-closure
|
nix-copy-closure
|
||||||
|
|
||||||
noinst_SCRIPTS = nix-profile.sh generate-patches.pl \
|
noinst_SCRIPTS = nix-profile.sh GeneratePatches.pm \
|
||||||
find-runtime-roots.pl build-remote.pl nix-reduce-build \
|
find-runtime-roots.pl build-remote.pl nix-reduce-build \
|
||||||
copy-from-other-stores.pl nix-http-export.cgi
|
copy-from-other-stores.pl nix-http-export.cgi
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ install-exec-local: readmanifest.pm download-using-manifests.pl copy-from-other-
|
||||||
$(INSTALL_DATA) readconfig.pm $(DESTDIR)$(libexecdir)/nix
|
$(INSTALL_DATA) readconfig.pm $(DESTDIR)$(libexecdir)/nix
|
||||||
$(INSTALL_DATA) ssh.pm $(DESTDIR)$(libexecdir)/nix
|
$(INSTALL_DATA) ssh.pm $(DESTDIR)$(libexecdir)/nix
|
||||||
$(INSTALL_PROGRAM) find-runtime-roots.pl $(DESTDIR)$(libexecdir)/nix
|
$(INSTALL_PROGRAM) find-runtime-roots.pl $(DESTDIR)$(libexecdir)/nix
|
||||||
$(INSTALL_PROGRAM) generate-patches.pl $(DESTDIR)$(libexecdir)/nix
|
$(INSTALL_PROGRAM) GeneratePatches.pm $(DESTDIR)$(libexecdir)/nix
|
||||||
$(INSTALL_PROGRAM) build-remote.pl $(DESTDIR)$(libexecdir)/nix
|
$(INSTALL_PROGRAM) build-remote.pl $(DESTDIR)$(libexecdir)/nix
|
||||||
$(INSTALL) -d $(DESTDIR)$(libexecdir)/nix/substituters
|
$(INSTALL) -d $(DESTDIR)$(libexecdir)/nix/substituters
|
||||||
$(INSTALL_PROGRAM) download-using-manifests.pl $(DESTDIR)$(libexecdir)/nix/substituters
|
$(INSTALL_PROGRAM) download-using-manifests.pl $(DESTDIR)$(libexecdir)/nix/substituters
|
||||||
|
@ -36,7 +36,7 @@ EXTRA_DIST = nix-collect-garbage.in \
|
||||||
nix-build.in \
|
nix-build.in \
|
||||||
download-using-manifests.pl.in \
|
download-using-manifests.pl.in \
|
||||||
copy-from-other-stores.pl.in \
|
copy-from-other-stores.pl.in \
|
||||||
generate-patches.pl.in \
|
GeneratePatches.pm.in \
|
||||||
nix-copy-closure.in \
|
nix-copy-closure.in \
|
||||||
find-runtime-roots.pl.in \
|
find-runtime-roots.pl.in \
|
||||||
build-remote.pl.in \
|
build-remote.pl.in \
|
||||||
|
|
|
@ -1,429 +0,0 @@
|
||||||
#! @perl@ -w -I@libexecdir@/nix
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use File::Temp qw(tempdir);
|
|
||||||
use readmanifest;
|
|
||||||
|
|
||||||
|
|
||||||
# Some patch generations options.
|
|
||||||
|
|
||||||
# Max size of NAR archives to generate patches for.
|
|
||||||
my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"};
|
|
||||||
$maxNarSize = 100 * 1024 * 1024 if !defined $maxNarSize;
|
|
||||||
|
|
||||||
# If patch is bigger than this fraction of full archive, reject.
|
|
||||||
my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"};
|
|
||||||
$maxPatchFraction = 0.60 if !defined $maxPatchFraction;
|
|
||||||
|
|
||||||
my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"};
|
|
||||||
$timeLimit = 180 if !defined $timeLimit;
|
|
||||||
|
|
||||||
|
|
||||||
die unless scalar @ARGV == 5;
|
|
||||||
|
|
||||||
my $hashAlgo = "sha256";
|
|
||||||
|
|
||||||
my $narDir = $ARGV[0];
|
|
||||||
my $patchesDir = $ARGV[1];
|
|
||||||
my $patchesURL = $ARGV[2];
|
|
||||||
my $srcManifest = $ARGV[3];
|
|
||||||
my $dstManifest = $ARGV[4];
|
|
||||||
|
|
||||||
my $tmpDir = tempdir("nix-generate-patches.XXXXXX", CLEANUP => 1, TMPDIR => 1)
|
|
||||||
or die "cannot create a temporary directory";
|
|
||||||
|
|
||||||
#END { rmdir $tmpDir; }
|
|
||||||
|
|
||||||
my %srcNarFiles;
|
|
||||||
my %srcLocalPaths;
|
|
||||||
my %srcPatches;
|
|
||||||
|
|
||||||
my %dstNarFiles;
|
|
||||||
my %dstLocalPaths;
|
|
||||||
my %dstPatches;
|
|
||||||
|
|
||||||
readManifest "$srcManifest",
|
|
||||||
\%srcNarFiles, \%srcLocalPaths, \%srcPatches;
|
|
||||||
|
|
||||||
readManifest "$dstManifest",
|
|
||||||
\%dstNarFiles, \%dstLocalPaths, \%dstPatches;
|
|
||||||
|
|
||||||
|
|
||||||
sub findOutputPaths {
|
|
||||||
my $narFiles = shift;
|
|
||||||
|
|
||||||
my %outPaths;
|
|
||||||
|
|
||||||
foreach my $p (keys %{$narFiles}) {
|
|
||||||
|
|
||||||
# Ignore derivations.
|
|
||||||
next if ($p =~ /\.drv$/);
|
|
||||||
|
|
||||||
# Ignore builders (too much ambiguity -- they're all called
|
|
||||||
# `builder.sh').
|
|
||||||
next if ($p =~ /\.sh$/);
|
|
||||||
next if ($p =~ /\.patch$/);
|
|
||||||
|
|
||||||
# Don't bother including tar files etc.
|
|
||||||
next if ($p =~ /\.tar$/ || $p =~ /\.tar\.(gz|bz2|Z|lzma|xz)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/ || $p =~ /\.tgz$/ || $p =~ /\.rpm$/ || $p =~ /cvs-export$/ || $p =~ /fetchhg$/);
|
|
||||||
|
|
||||||
$outPaths{$p} = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return %outPaths;
|
|
||||||
}
|
|
||||||
|
|
||||||
print "finding src output paths...\n";
|
|
||||||
my %srcOutPaths = findOutputPaths \%srcNarFiles;
|
|
||||||
|
|
||||||
print "finding dst output paths...\n";
|
|
||||||
my %dstOutPaths = findOutputPaths \%dstNarFiles;
|
|
||||||
|
|
||||||
|
|
||||||
sub getNameVersion {
|
|
||||||
my $p = shift;
|
|
||||||
$p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/;
|
|
||||||
my $name = $1;
|
|
||||||
my $version = $2;
|
|
||||||
return undef unless defined $name && defined $version;
|
|
||||||
$name =~ s/^-//;
|
|
||||||
$version =~ s/^-//;
|
|
||||||
return ($name, $version);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# A quick hack to get a measure of the `distance' between two
|
|
||||||
# versions: it's just the position of the first character that differs
|
|
||||||
# (or 999 if they are the same).
|
|
||||||
sub versionDiff {
|
|
||||||
my $s = shift;
|
|
||||||
my $t = shift;
|
|
||||||
my $i;
|
|
||||||
return 999 if $s eq $t;
|
|
||||||
for ($i = 0; $i < length $s; $i++) {
|
|
||||||
return $i if $i >= length $t or
|
|
||||||
substr($s, $i, 1) ne substr($t, $i, 1);
|
|
||||||
}
|
|
||||||
return $i;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
sub getNarBz2 {
|
|
||||||
my $narFiles = shift;
|
|
||||||
my $storePath = shift;
|
|
||||||
|
|
||||||
my $narFileList = $$narFiles{$storePath};
|
|
||||||
die "missing path $storePath" unless defined $narFileList;
|
|
||||||
|
|
||||||
my $narFile = @{$narFileList}[0];
|
|
||||||
die unless defined $narFile;
|
|
||||||
|
|
||||||
$narFile->{url} =~ /\/([^\/]+)$/;
|
|
||||||
die unless defined $1;
|
|
||||||
return "$narDir/$1";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
sub containsPatch {
|
|
||||||
my $patches = shift;
|
|
||||||
my $storePath = shift;
|
|
||||||
my $basePath = shift;
|
|
||||||
my $patchList = $$patches{$storePath};
|
|
||||||
return 0 if !defined $patchList;
|
|
||||||
my $found = 0;
|
|
||||||
foreach my $patch (@{$patchList}) {
|
|
||||||
# !!! baseHash might differ
|
|
||||||
return 1 if $patch->{basePath} eq $basePath;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Compute the "weighted" number of uses of a path in the build graph.
|
|
||||||
sub computeUses {
|
|
||||||
my $narFiles = shift;
|
|
||||||
my $path = shift;
|
|
||||||
|
|
||||||
# Find the deriver of $path.
|
|
||||||
return 1 unless defined $$narFiles{$path};
|
|
||||||
my $deriver = @{$$narFiles{$path}}[0]->{deriver};
|
|
||||||
return 1 unless defined $deriver && $deriver ne "";
|
|
||||||
|
|
||||||
# print " DERIVER $deriver\n";
|
|
||||||
|
|
||||||
# Optimisation: build the referrers graph from the references
|
|
||||||
# graph.
|
|
||||||
my %referrers;
|
|
||||||
foreach my $q (keys %{$narFiles}) {
|
|
||||||
my @refs = split " ", @{$$narFiles{$q}}[0]->{references};
|
|
||||||
foreach my $r (@refs) {
|
|
||||||
$referrers{$r} = [] unless defined $referrers{$r};
|
|
||||||
push @{$referrers{$r}}, $q;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Determine the shortest path from $deriver to all other reachable
|
|
||||||
# paths in the `referrers' graph.
|
|
||||||
|
|
||||||
my %dist;
|
|
||||||
$dist{$deriver} = 0;
|
|
||||||
|
|
||||||
my @queue = ($deriver);
|
|
||||||
my $pos = 0;
|
|
||||||
|
|
||||||
while ($pos < scalar @queue) {
|
|
||||||
my $p = $queue[$pos];
|
|
||||||
$pos++;
|
|
||||||
|
|
||||||
foreach my $q (@{$referrers{$p}}) {
|
|
||||||
if (!defined $dist{$q}) {
|
|
||||||
$dist{$q} = $dist{$p} + 1;
|
|
||||||
# print " $q $dist{$q}\n";
|
|
||||||
push @queue, $q;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
my $wuse = 1.0;
|
|
||||||
foreach my $user (keys %dist) {
|
|
||||||
next if $user eq $deriver;
|
|
||||||
# print " $user $dist{$user}\n";
|
|
||||||
$wuse += 1.0 / 2.0**$dist{$user};
|
|
||||||
}
|
|
||||||
|
|
||||||
# print " XXX $path $wuse\n";
|
|
||||||
|
|
||||||
return $wuse;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# For each output path in the destination, see if we need to / can
|
|
||||||
# create a patch.
|
|
||||||
|
|
||||||
print "creating patches...\n";
|
|
||||||
|
|
||||||
foreach my $p (keys %dstOutPaths) {
|
|
||||||
|
|
||||||
# If exactly the same path already exists in the source, skip it.
|
|
||||||
next if defined $srcOutPaths{$p};
|
|
||||||
|
|
||||||
print " $p\n";
|
|
||||||
|
|
||||||
# If not, then we should find the paths in the source that are
|
|
||||||
# `most' likely to be present on a system that wants to install
|
|
||||||
# this path.
|
|
||||||
|
|
||||||
(my $name, my $version) = getNameVersion $p;
|
|
||||||
next unless defined $name && defined $version;
|
|
||||||
|
|
||||||
my @closest = ();
|
|
||||||
my $closestVersion;
|
|
||||||
my $minDist = -1; # actually, larger means closer
|
|
||||||
|
|
||||||
# Find all source paths with the same name.
|
|
||||||
|
|
||||||
foreach my $q (keys %srcOutPaths) {
|
|
||||||
(my $name2, my $version2) = getNameVersion $q;
|
|
||||||
next unless defined $name2 && defined $version2;
|
|
||||||
|
|
||||||
if ($name eq $name2) {
|
|
||||||
|
|
||||||
my $srcSystem = @{$dstNarFiles{$p}}[0]->{system};
|
|
||||||
my $dstSystem = @{$srcNarFiles{$q}}[0]->{system};
|
|
||||||
if (defined $srcSystem && defined $dstSystem && $srcSystem ne $dstSystem) {
|
|
||||||
print " SKIPPING $q due to different systems ($srcSystem vs. $dstSystem)\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
# If the sizes differ too much, then skip. This
|
|
||||||
# disambiguates between, e.g., a real component and a
|
|
||||||
# wrapper component (cf. Firefox in Nixpkgs).
|
|
||||||
my $srcSize = @{$srcNarFiles{$q}}[0]->{size};
|
|
||||||
my $dstSize = @{$dstNarFiles{$p}}[0]->{size};
|
|
||||||
my $ratio = $srcSize / $dstSize;
|
|
||||||
$ratio = 1 / $ratio if $ratio < 1;
|
|
||||||
# print " SIZE $srcSize $dstSize $ratio $q\n";
|
|
||||||
|
|
||||||
if ($ratio >= 3) {
|
|
||||||
print " SKIPPING $q due to size ratio $ratio ($srcSize vs. $dstSize)\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
# If the numbers of weighted uses differ too much, then
|
|
||||||
# skip. This disambiguates between, e.g., the bootstrap
|
|
||||||
# GCC and the final GCC in Nixpkgs.
|
|
||||||
# my $srcUses = computeUses \%srcNarFiles, $q;
|
|
||||||
# my $dstUses = computeUses \%dstNarFiles, $p;
|
|
||||||
# $ratio = $srcUses / $dstUses;
|
|
||||||
# $ratio = 1 / $ratio if $ratio < 1;
|
|
||||||
# print " USE $srcUses $dstUses $ratio $q\n";
|
|
||||||
|
|
||||||
# if ($ratio >= 2) {
|
|
||||||
# print " SKIPPING $q due to use ratio $ratio ($srcUses $dstUses)\n";
|
|
||||||
# next;
|
|
||||||
# }
|
|
||||||
|
|
||||||
# If there are multiple matching names, include the ones
|
|
||||||
# with the closest version numbers.
|
|
||||||
my $dist = versionDiff $version, $version2;
|
|
||||||
if ($dist > $minDist) {
|
|
||||||
$minDist = $dist;
|
|
||||||
@closest = ($q);
|
|
||||||
$closestVersion = $version2;
|
|
||||||
} elsif ($dist == $minDist) {
|
|
||||||
push @closest, $q;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (scalar(@closest) == 0) {
|
|
||||||
print " NO BASE: $p\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach my $closest (@closest) {
|
|
||||||
|
|
||||||
# Generate a patch between $closest and $p.
|
|
||||||
print " $p <- $closest\n";
|
|
||||||
|
|
||||||
# If the patch already exists, skip it.
|
|
||||||
if (containsPatch(\%srcPatches, $p, $closest) ||
|
|
||||||
containsPatch(\%dstPatches, $p, $closest))
|
|
||||||
{
|
|
||||||
print " skipping, already exists\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
# next;
|
|
||||||
|
|
||||||
my $srcNarBz2 = getNarBz2 \%srcNarFiles, $closest;
|
|
||||||
my $dstNarBz2 = getNarBz2 \%dstNarFiles, $p;
|
|
||||||
|
|
||||||
if (! -f $srcNarBz2) {
|
|
||||||
warn "patch source archive $srcNarBz2 is missing\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0
|
|
||||||
or die "cannot unpack $srcNarBz2";
|
|
||||||
|
|
||||||
if ((stat "$tmpDir/A")[7] >= $maxNarSize) {
|
|
||||||
print " skipping, source is too large\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0
|
|
||||||
or die "cannot unpack $dstNarBz2";
|
|
||||||
|
|
||||||
if ((stat "$tmpDir/B")[7] >= $maxNarSize) {
|
|
||||||
print " skipping, destination is too large\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
my $time1 = time();
|
|
||||||
my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF");
|
|
||||||
my $time2 = time();
|
|
||||||
if ($res) {
|
|
||||||
warn "binary diff computation aborted after ", $time2 - $time1, " seconds\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die;
|
|
||||||
chomp $baseHash;
|
|
||||||
|
|
||||||
my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die;
|
|
||||||
chomp $narHash;
|
|
||||||
|
|
||||||
my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die;
|
|
||||||
chomp $narDiffHash;
|
|
||||||
|
|
||||||
my $narDiffSize = (stat "$tmpDir/DIFF")[7];
|
|
||||||
my $dstNarBz2Size = (stat $dstNarBz2)[7];
|
|
||||||
|
|
||||||
print " size $narDiffSize; full size $dstNarBz2Size; ", $time2 - $time1, " seconds\n";
|
|
||||||
|
|
||||||
if ($narDiffSize >= $dstNarBz2Size) {
|
|
||||||
print " rejecting; patch bigger than full archive\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) {
|
|
||||||
print " rejecting; patch too large relative to full archive\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
my $finalName =
|
|
||||||
"$narDiffHash.nar-bsdiff";
|
|
||||||
|
|
||||||
if (-e "$patchesDir/$finalName") {
|
|
||||||
print " not copying, already exists\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
else {
|
|
||||||
|
|
||||||
system("cp '$tmpDir/DIFF' '$patchesDir/$finalName.tmp'") == 0
|
|
||||||
or die "cannot copy diff";
|
|
||||||
|
|
||||||
rename("$patchesDir/$finalName.tmp", "$patchesDir/$finalName")
|
|
||||||
or die "cannot rename $patchesDir/$finalName.tmp";
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add the patch to the manifest.
|
|
||||||
addPatch \%dstPatches, $p,
|
|
||||||
{ url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash"
|
|
||||||
, size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash"
|
|
||||||
, narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff"
|
|
||||||
}, 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Add in any potentially useful patches in the source (namely, those
|
|
||||||
# patches that produce either paths in the destination or paths that
|
|
||||||
# can be used as the base for other useful patches).
|
|
||||||
|
|
||||||
print "propagating patches...\n";
|
|
||||||
|
|
||||||
my $changed;
|
|
||||||
do {
|
|
||||||
# !!! we repeat this to reach the transitive closure; inefficient
|
|
||||||
$changed = 0;
|
|
||||||
|
|
||||||
print "loop\n";
|
|
||||||
|
|
||||||
my %dstBasePaths;
|
|
||||||
foreach my $q (keys %dstPatches) {
|
|
||||||
foreach my $patch (@{$dstPatches{$q}}) {
|
|
||||||
$dstBasePaths{$patch->{basePath}} = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach my $p (keys %srcPatches) {
|
|
||||||
my $patchList = $srcPatches{$p};
|
|
||||||
|
|
||||||
my $include = 0;
|
|
||||||
|
|
||||||
# Is path $p included in the destination? If so, include
|
|
||||||
# patches that produce it.
|
|
||||||
$include = 1 if defined $dstNarFiles{$p};
|
|
||||||
|
|
||||||
# Is path $p a path that serves as a base for paths in the
|
|
||||||
# destination? If so, include patches that produce it.
|
|
||||||
# !!! check baseHash
|
|
||||||
$include = 1 if defined $dstBasePaths{$p};
|
|
||||||
|
|
||||||
if ($include) {
|
|
||||||
foreach my $patch (@{$patchList}) {
|
|
||||||
$changed = 1 if addPatch \%dstPatches, $p, $patch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
} while $changed;
|
|
||||||
|
|
||||||
|
|
||||||
# Rewrite the manifest of the destination (with the new patches).
|
|
||||||
writeManifest "${dstManifest}",
|
|
||||||
\%dstNarFiles, \%dstPatches;
|
|
|
@ -146,7 +146,7 @@ sub readManifest {
|
||||||
|
|
||||||
|
|
||||||
sub writeManifest {
|
sub writeManifest {
|
||||||
my ($manifest, $narFiles, $patches) = @_;
|
my ($manifest, $narFiles, $patches, $noCompress) = @_;
|
||||||
|
|
||||||
open MANIFEST, ">$manifest.tmp"; # !!! check exclusive
|
open MANIFEST, ">$manifest.tmp"; # !!! check exclusive
|
||||||
|
|
||||||
|
@ -198,12 +198,14 @@ sub writeManifest {
|
||||||
|
|
||||||
|
|
||||||
# Create a bzipped manifest.
|
# Create a bzipped manifest.
|
||||||
|
unless (defined $noCompress) {
|
||||||
system("@bzip2@ < $manifest > $manifest.bz2.tmp") == 0
|
system("@bzip2@ < $manifest > $manifest.bz2.tmp") == 0
|
||||||
or die "cannot compress manifest";
|
or die "cannot compress manifest";
|
||||||
|
|
||||||
rename("$manifest.bz2.tmp", "$manifest.bz2")
|
rename("$manifest.bz2.tmp", "$manifest.bz2")
|
||||||
or die "cannot rename $manifest.bz2.tmp: $!";
|
or die "cannot rename $manifest.bz2.tmp: $!";
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
Loading…
Reference in a new issue