From 862f4c154e883611ba9dfefe921c87e6423075ea Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 13 Dec 2004 13:47:38 +0000 Subject: [PATCH] * Patch deployment. `download.pl' (intended to be used in the substitute mechanism) creates a store path by downloading full NAR archives and/or patches specified in the available manifests. Any combination of present paths, full downloads, and patches can be used to construct the target path. In particular, patches can be chained in sequence; and full NAR archives of the target path can be omitted (i.e., patch-only deployment is possible). A shortest path algorithm is used to find the smallest set of files to be downloaded (the edge weights are currently file sizes, but one can imagine taking the network speed to the various source into account). Patches are binary deltas between two store paths. To be precise, they are the output of the `bsdiff' program applied to the NAR archives obtained by dumping (`nix-store --dump') the two store paths. The advantage of diff'ing NAR archives (and not, say, doing file-by-file diffs) is that file renames/moves are handled automatically. The disadvantage is that we cannot optimise creation of unchanged files (by hard-linking). --- corepkgs/nix-pull/download.pl | 218 ++++++++++++++++++++++++++++++++++ scripts/nix-pull.in | 18 ++- scripts/readmanifest.pm.in | 121 ++++++++++++++----- 3 files changed, 323 insertions(+), 34 deletions(-) create mode 100755 corepkgs/nix-pull/download.pl diff --git a/corepkgs/nix-pull/download.pl b/corepkgs/nix-pull/download.pl new file mode 100755 index 000000000..5c7b33a99 --- /dev/null +++ b/corepkgs/nix-pull/download.pl @@ -0,0 +1,218 @@ +#! /usr/bin/perl -w -I/home/eelco/Dev/nix/scripts + +use strict; +use readmanifest; + +my $manifestDir = "/home/eelco/Dev/nix/patch/test"; + + +# Check the arguments. +die unless scalar @ARGV == 1; +my $targetPath = $ARGV[0]; + + +# Load all manifests. +my %narFiles; +my %patches; +my %successors; + +for my $manifest (glob "$manifestDir/*.nixmanifest") { + print STDERR "reading $manifest\n"; + readManifest $manifest, \%narFiles, \%patches, \%successors; +} + + +# Build a graph of all store paths that might contribute to the +# construction of $targetPath, and the special node "start". The +# edges are either patch operations, or downloads of full NAR files. +# The latter edges only occur between "start" and a store path. + +my %graph; + +$graph{"start"} = {d => 0, pred => undef, edges => []}; + +my @queue = (); +my $queueFront = 0; +my %done; + +sub addToQueue { + my $v = shift; + return if defined $done{$v}; + $done{$v} = 1; + push @queue, $v; +} + +sub addNode { + my $u = shift; + $graph{$u} = {d => 999999999999, pred => undef, edges => []} + unless defined $graph{$u}; +} + +sub addEdge { + my $u = shift; + my $v = shift; + my $w = shift; + my $type = shift; + my $info = shift; + addNode $u; + push @{$graph{$u}->{edges}}, + {weight => $w, start => $u, end => $v, type => $type, info => $info}; + my $n = scalar @{$graph{$u}->{edges}}; +} + +addToQueue $targetPath; + +while ($queueFront < scalar @queue) { + my $u = $queue[$queueFront++]; + print "$u\n"; + + addNode $u; + + # If the path already exists, it has distance 0 from the "start" + # node. + system "nix-store --isvalid '$u' 2> /dev/null"; + if ($? == 0) { + addEdge "start", $u, 0, "present", undef; + } + + else { + + # Add patch edges. + my $patchList = $patches{$u}; + foreach my $patch (@{$patchList}) { + # !!! this should be cached + my $hash = `nix-hash "$patch->{basePath}"`; + chomp $hash; + print " MY HASH is $hash\n"; + if ($hash ne $patch->{baseHash}) { + print " REJECTING PATCH from $patch->{basePath}\n"; + next; + } + print " PATCH from $patch->{basePath}\n"; + addToQueue $patch->{basePath}; + addEdge $patch->{basePath}, $u, $patch->{size}, "patch", $patch; + } + + # Add NAR file edges to the start node. + my $narFileList = $narFiles{$u}; + foreach my $narFile (@{$narFileList}) { + print " NAR from $narFile->{url}\n"; + addEdge "start", $u, $narFile->{size}, "narfile", $narFile; + } + + } +} + + +# Run Dijkstra's shortest path algorithm to determine the shortest +# sequence of download and/or patch actions that will produce +# $targetPath. + +sub byDistance { # sort by distance, reversed + return -($graph{$a}->{d} <=> $graph{$b}->{d}); +} + +my @todo = keys %graph; + +while (scalar @todo > 0) { + + # Remove the closest element from the todo list. + @todo = sort byDistance @todo; + my $u = pop @todo; + + my $u_ = $graph{$u}; + + print "IN $u $u_->{d}\n"; + + foreach my $edge (@{$u_->{edges}}) { + my $v_ = $graph{$edge->{end}}; + if ($v_->{d} > $u_->{d} + $edge->{weight}) { + $v_->{d} = $u_->{d} + $edge->{weight}; + # Store the edge; to edge->start is actually the + # predecessor. + $v_->{pred} = $edge; + print " RELAX $edge->{end} $v_->{d}\n"; + } + } +} + + +# Retrieve the shortest path from "start" to $targetPath. +my @path = (); +my $cur = $targetPath; +die "don't know how to produce $targetPath\n" + unless defined $graph{$targetPath}->{pred}; +while ($cur ne "start") { + push @path, $graph{$cur}->{pred}; + $cur = $graph{$cur}->{pred}->{start}; +} + + +# Traverse the shortest path, perform the actions described by the +# edges. +my $curStep = 1; +my $maxStep = scalar @path; + +sub downloadFile { + my $url = shift; + my $hash = shift; + $ENV{"PRINT_PATH"} = 1; + $ENV{"QUIET"} = 1; + my ($hash2, $path) = `nix-prefetch-url '$url' '$hash'`; + chomp $hash2; + chomp $path; + die "hash mismatch" if $hash ne $hash2; + return $path; +} + +while (scalar @path > 0) { + my $edge = pop @path; + my $u = $edge->{start}; + my $v = $edge->{end}; + + print "\n*** Step $curStep/$maxStep: "; + $curStep++; + + if ($edge->{type} eq "present") { + print "using already present path `$v'\n"; + } + + elsif ($edge->{type} eq "patch") { + my $patch = $edge->{info}; + print "applying patch `$patch->{url}' to `$u' to create `$v'\n"; + + # Download the patch. + print " downloading patch...\n"; + my $patchPath = downloadFile "$patch->{url}", "$patch->{hash}"; + + # Turn the base path into a NAR archive, to which we can + # actually apply the patch. + print " packing base path...\n"; + system "nix-store --dump $patch->{basePath} > /tmp/nar"; + die "cannot dump `$patch->{basePath}'" if ($? != 0); + + # Apply the patch. + print " applying patch...\n"; + system "bspatch /tmp/nar /tmp/nar2 $patchPath"; + die "cannot apply patch `$patchPath' to /tmp/nar" if ($? != 0); + + # Unpack the resulting NAR archive into the target path. + print " unpacking patched archive...\n"; + system "nix-store --restore $targetPath < /tmp/nar2"; + die "cannot unpack /tmp/nar2 into `$targetPath'" if ($? != 0); + } + + elsif ($edge->{type} eq "narfile") { + my $narFile = $edge->{info}; + print "downloading `$narFile->{url}' into `$v'\n"; + + # Download the archive. + print " downloading archive...\n"; + my $narFilePath = downloadFile "$narFile->{url}", "$narFile->{hash}"; + + # Unpack the archive into the target path. + print " unpacking archive...\n"; + system "bunzip2 < '$narFilePath' | nix-store --restore '$targetPath'"; + die "cannot unpack `$narFilePath' into `$targetPath'" if ($? != 0); + } +} diff --git a/scripts/nix-pull.in b/scripts/nix-pull.in index a802760a5..66d99ff7c 100644 --- a/scripts/nix-pull.in +++ b/scripts/nix-pull.in @@ -19,14 +19,24 @@ my $confFile = "@sysconfdir@/nix/prebuilts.conf"; my %storePaths2urls; my %urls2hashes; my %successors; -sub doURL { + +sub processURL { my $url = shift; - processURL $manifest, $url, \%storePaths2urls, \%urls2hashes, \%successors; + + $url =~ s/\/$//; + print "obtaining list of Nix archives at $url...\n"; + + system("@curl@ --fail --silent --show-error --location --max-redirs 20 " . + "'$url' > '$manifest'") == 0 + or die "curl failed: $?"; + + readManifest $manifest, \%storePaths2urls, \%urls2hashes, \%successors; } + if (scalar @ARGV > 0) { while (@ARGV) { my $url = shift @ARGV; - doURL $url; + processURL $url; } } else { open CONFFILE, "<$confFile"; @@ -34,7 +44,7 @@ if (scalar @ARGV > 0) { chomp; if (/^\s*(\S+)\s*(\#.*)?$/) { my $url = $1; - doURL $url; + processURL $url; } } close CONFFILE; diff --git a/scripts/readmanifest.pm.in b/scripts/readmanifest.pm.in index d5527bf3b..8d6694ff2 100644 --- a/scripts/readmanifest.pm.in +++ b/scripts/readmanifest.pm.in @@ -1,27 +1,24 @@ use strict; -sub processURL { +sub readManifest { my $manifest = shift; - my $url = shift; - my $storePaths2urls = shift; - my $urls2hashes = shift; + my $narFiles = shift; + my $patches = shift; my $successors = shift; - $url =~ s/\/$//; - print "obtaining list of Nix archives at $url...\n"; - - system("@curl@ --fail --silent --show-error --location --max-redirs 20 " . - "'$url' > '$manifest'") == 0 - or die "curl failed: $?"; - open MANIFEST, "<$manifest"; my $inside = 0; + my $type; my $storePath; - my $narurl; + my $url; my $hash; + my $size; my @preds; + my $basePath; + my $baseHash; + my $patchType; while () { chomp; @@ -29,38 +26,102 @@ sub processURL { next if (/^$/); if (!$inside) { - if (/^\{$/) { + if (/^\{$/) { + $type = "narfile"; $inside = 1; undef $storePath; - undef $narurl; + undef $url; undef $hash; + $size = 999999999; @preds = (); } + elsif (/^patch \{$/) { + $type = "patch"; + $inside = 1; + undef $url; + undef $hash; + undef $size; + undef $basePath; + undef $baseHash; + undef $patchType; + } else { die "bad line: $_"; } } else { + if (/^\}$/) { $inside = 0; - $$storePaths2urls{$storePath} = $narurl; - $$urls2hashes{$narurl} = $hash; + if ($type eq "narfile") { - foreach my $p (@preds) { - $$successors{$p} = $storePath; + $$narFiles{$storePath} = [] + unless defined $$narFiles{$storePath}; + + my $narFileList = $$narFiles{$storePath}; + + my $found = 0; + foreach my $narFile (@{$narFileList}) { + if ($narFile->{url} eq $url) { + if ($narFile->{hash} eq $hash) { + $found = 1; + } else { + die "conflicting hashes for URL $url, " . + "namely $narFile->{hash} and $hash"; + } + } + } + if (!$found) { + push @{$narFileList}, + {url => $url, hash => $hash, size => $size}; + } + + foreach my $p (@preds) { + $$successors{$p} = $storePath; + } + + } + + elsif ($type eq "patch") { + + $$patches{$storePath} = [] + unless defined $$patches{$storePath}; + + my $patchList = $$patches{$storePath}; + + my $found = 0; + foreach my $patch (@{$patchList}) { + if ($patch->{url} eq $url) { + if ($patch->{hash} eq $hash) { + $found = 1 if ($patch->{basePath} eq $basePath); + } else { + die "conflicting hashes for URL $url, " . + "namely $patch->{hash} and $hash"; + } + } + } + if (!$found) { + push @{$patchList}, + { url => $url, hash => $hash, size => $size + , basePath => $basePath, baseHash => $baseHash + }; + } + } } - elsif (/^\s*StorePath:\s*(\/\S+)\s*$/) { - $storePath = $1; - } - elsif (/^\s*NarURL:\s*(\S+)\s*$/) { - $narurl = $1; - } - elsif (/^\s*MD5:\s*(\S+)\s*$/) { - $hash = $1; - } - elsif (/^\s*SuccOf:\s*(\/\S+)\s*$/) { - push @preds, $1; - } + + elsif (/^\s*StorePath:\s*(\/\S+)\s*$/) { $storePath = $1; } + elsif (/^\s*Hash:\s*(\S+)\s*$/) { $hash = $1; } + elsif (/^\s*URL:\s*(\S+)\s*$/) { $url = $1; } + elsif (/^\s*Size:\s*(\d+)\s*$/) { $size = $1; } + elsif (/^\s*SuccOf:\s*(\/\S+)\s*$/) { push @preds, $1; } + elsif (/^\s*BasePath:\s*(\/\S+)\s*$/) { $basePath = $1; } + elsif (/^\s*BaseHash:\s*(\S+)\s*$/) { $baseHash = $1; } + elsif (/^\s*Type:\s*(\S+)\s*$/) { $patchType = $1; } + + # Compatibility; + elsif (/^\s*NarURL:\s*(\S+)\s*$/) { $url = $1; } + elsif (/^\s*MD5:\s*(\S+)\s*$/) { $hash = $1; } + else { die "bad line: $_"; } } }