lix/scripts/download-using-manifests.pl.in

#! @perl@ -w @perlFlags@

use utf8;
use strict;
use Nix::Config;
use Nix::Manifest;
use Nix::Store;
use Nix::Utils;
use POSIX qw(strftime);

STDOUT->autoflush(1);
binmode STDERR, ":encoding(utf8)";

my $logFile = "$Nix::Config::logDir/downloads";

# For queries, skip expensive calls to nix-hash etc.  We're just
# estimating the expected download size.
my $fast = 1;

# ‘--insecure’ is fine because Nix verifies the hash of the result.
my $curl = "$Nix::Config::curl --fail --location --insecure";


# Open the manifest cache and update it if necessary.
my $dbh = updateManifestDB();
exit 0 unless defined $dbh; # exit if there are no manifests
print "\n";


# $hashCache->{$algo}->{$path} yields the $algo-hash of $path.
my $hashCache;


sub parseHash {
    my $hash = shift;
    if ($hash =~ /^(.+):(.+)$/) {
        return ($1, $2);
    } else {
        return ("md5", $hash);
    }
}


# Compute the most efficient sequence of downloads to produce the
# given path.
sub computeSmallestDownload {
    my $targetPath = shift;

    # Build a graph of all store paths that might contribute to the
    # construction of $targetPath, and the special node "start".  The
    # edges are either patch operations, or downloads of full NAR
    # files.  The latter edges only occur between "start" and a store
    # path.
    my %graph;

    $graph{"start"} = {d => 0, pred => undef, edges => []};

    my @queue = ();
    my $queueFront = 0;
    my %done;

    sub addNode {
        my $graph = shift;
        my $u = shift;
        $$graph{$u} = {d => 999999999999, pred => undef, edges => []}
            unless defined $$graph{$u};
    }

    sub addEdge {
        my $graph = shift;
        my $u = shift;
        my $v = shift;
        my $w = shift;
        my $type = shift;
        my $info = shift;
        addNode $graph, $u;
        push @{$$graph{$u}->{edges}},
            {weight => $w, start => $u, end => $v, type => $type, info => $info};
        my $n = scalar @{$$graph{$u}->{edges}};
    }

    push @queue, $targetPath;

    while ($queueFront < scalar @queue) {
        my $u = $queue[$queueFront++];
        next if defined $done{$u};
        $done{$u} = 1;

        addNode \%graph, $u;

        # If the path already exists, it has distance 0 from the
        # "start" node.
        if (isValidPath($u)) {
            addEdge \%graph, "start", $u, 0, "present", undef;
        }

        else {

            # Add patch edges.
            my $patchList = $dbh->selectall_arrayref(
                "select * from Patches where storePath = ?",
                { Slice => {} }, $u);

            foreach my $patch (@{$patchList}) {
                if (isValidPath($patch->{basePath})) {
                    my ($baseHashAlgo, $baseHash) = parseHash $patch->{baseHash};

                    my $hash = $hashCache->{$baseHashAlgo}->{$patch->{basePath}};
                    if (!defined $hash) {
                        $hash = $fast && $baseHashAlgo eq "sha256"
                            ? queryPathHash($patch->{basePath})
                            : hashPath($baseHashAlgo, $baseHashAlgo ne "md5", $patch->{basePath});
                        $hash =~ s/.*://;
                        $hashCache->{$baseHashAlgo}->{$patch->{basePath}} = $hash;
                    }

                    next if $hash ne $baseHash;
                }
                push @queue, $patch->{basePath};
                addEdge \%graph, $patch->{basePath}, $u, $patch->{size}, "patch", $patch;
            }

            # Add NAR file edges to the start node.
            my $narFileList = $dbh->selectall_arrayref(
                "select * from NARs where storePath = ?",
                { Slice => {} }, $u);

            foreach my $narFile (@{$narFileList}) {
                # !!! how to handle files whose size is not known in advance?
                # For now, assume some arbitrary size (1 GB).
                # This has the side-effect of preferring non-Hydra downloads.
                addEdge \%graph, "start", $u, ($narFile->{size} || 1000000000), "narfile", $narFile;
            }
        }
    }


    # Run Dijkstra's shortest path algorithm to determine the shortest
    # sequence of download and/or patch actions that will produce
    # $targetPath.

    my @todo = keys %graph;

    while (scalar @todo > 0) {

        # Remove the closest element from the todo list.
        # !!! inefficient, use a priority queue
        @todo = sort { -($graph{$a}->{d} <=> $graph{$b}->{d}) } @todo;
        my $u = pop @todo;

        my $u_ = $graph{$u};

        foreach my $edge (@{$u_->{edges}}) {
            my $v_ = $graph{$edge->{end}};
            if ($v_->{d} > $u_->{d} + $edge->{weight}) {
                $v_->{d} = $u_->{d} + $edge->{weight};
                # Store the edge; to edge->start is actually the
                # predecessor.
                $v_->{pred} = $edge;
            }
        }
    }


    # Retrieve the shortest path from "start" to $targetPath.
    my @path = ();
    my $cur = $targetPath;
    return () unless defined $graph{$targetPath}->{pred};
    while ($cur ne "start") {
        push @path, $graph{$cur}->{pred};
        $cur = $graph{$cur}->{pred}->{start};
    }

    return @path;
}


# Parse the arguments.

if ($ARGV[0] eq "--query") {

    while (<STDIN>) {
        chomp;
        my ($cmd, @args) = split " ", $_;

        if ($cmd eq "have") {
            foreach my $storePath (@args) {
                print "$storePath\n" if scalar @{$dbh->selectcol_arrayref("select 1 from NARs where storePath = ?", {}, $storePath)} > 0;
            }
            print "\n";
        }

        elsif ($cmd eq "info") {
            foreach my $storePath (@args) {

                my $infos = $dbh->selectall_arrayref(
                    "select * from NARs where storePath = ?",
                    { Slice => {} }, $storePath);

                next unless scalar @{$infos} > 0;
                my $info = @{$infos}[0];

                print "$storePath\n";
                print "$info->{deriver}\n";
                my @references = split " ", $info->{refs};
                print scalar @references, "\n";
                print "$_\n" foreach @references;

                my @path = computeSmallestDownload $storePath;

                my $downloadSize = 0;
                while (scalar @path > 0) {
                    my $edge = pop @path;
                    my $u = $edge->{start};
                    my $v = $edge->{end};
                    if ($edge->{type} eq "patch") {
                        $downloadSize += $edge->{info}->{size} || 0;
                    }
                    elsif ($edge->{type} eq "narfile") {
                        $downloadSize += $edge->{info}->{size} || 0;
                    }
                }

                print "$downloadSize\n";

                my $narSize = $info->{narSize} || 0;
                print "$narSize\n";
            }

            print "\n";
        }

        else { die "unknown command ‘$cmd’"; }
    }

    exit 0;
}

elsif ($ARGV[0] ne "--substitute") {
    die;
}


die unless scalar @ARGV == 3;
my $targetPath = $ARGV[1];
my $destPath = $ARGV[2];
$fast = 0;


# Create a temporary directory.
my $tmpDir = mkTempDir("nix-download");

my $tmpNar = "$tmpDir/nar";
my $tmpNar2 = "$tmpDir/nar2";


open LOGFILE, ">>$logFile" or die "cannot open log file $logFile";

my $date = strftime ("%F %H:%M:%S UTC", gmtime (time));
print LOGFILE "$$ get $targetPath $date\n";

print STDERR "\n*** Trying to download/patch ‘$targetPath’\n";


# Compute the shortest path.
my @path = computeSmallestDownload $targetPath;
die "don't know how to produce $targetPath\n" if scalar @path == 0;


# We don't need the manifest anymore, so close it as an optimisation:
# if we still have SQLite locks blocking other processes (we
# shouldn't), this gets rid of them.
$dbh->disconnect;


# Traverse the shortest path, perform the actions described by the
# edges.
my $curStep = 1;
my $maxStep = scalar @path;

my $finalNarHash;

while (scalar @path > 0) {
    my $edge = pop @path;
    my $u = $edge->{start};
    my $v = $edge->{end};

    print STDERR "\n*** Step $curStep/$maxStep: ";

    if ($edge->{type} eq "present") {
        print STDERR "using already present path ‘$v’\n";
        print LOGFILE "$$ present $v\n";

        if ($curStep < $maxStep) {
            # Since this is not the last step, the path will be used
            # as a base to one or more patches.  So turn the base path
            # into a NAR archive, to which we can apply the patch.
            print STDERR "  packing base path...\n";
            system("$Nix::Config::binDir/nix-store --dump $v > $tmpNar") == 0
                or die "cannot dump ‘$v’";
        }
    }

    elsif ($edge->{type} eq "patch") {
        my $patch = $edge->{info};
        print STDERR "applying patch ‘$patch->{url}’ to ‘$u’ to create ‘$v’\n";

        print LOGFILE "$$ patch $patch->{url} $patch->{size} $patch->{baseHash} $u $v\n";

        # Download the patch.
        print STDERR "  downloading patch...\n";
        my $patchPath = "$tmpDir/patch";
        checkURL $patch->{url};
        system("$curl '$patch->{url}' -o $patchPath") == 0
            or die "cannot download patch ‘$patch->{url}’\n";

        # Apply the patch to the NAR archive produced in step 1 (for
        # the already present path) or a later step (for patch sequences).
        print STDERR "  applying patch...\n";
        system("$Nix::Config::libexecDir/nix/bspatch $tmpNar $tmpNar2 $patchPath") == 0
            or die "cannot apply patch ‘$patchPath’ to $tmpNar\n";

        if ($curStep < $maxStep) {
            # The archive will be used as the base of the next patch.
            rename "$tmpNar2", "$tmpNar" or die "cannot rename NAR archive: $!";
        } else {
            # This was the last patch.  Unpack the final NAR archive
            # into the target path.
            print STDERR "  unpacking patched archive...\n";
            system("$Nix::Config::binDir/nix-store --restore $destPath < $tmpNar2") == 0
                or die "cannot unpack $tmpNar2 to ‘$v’\n";
        }

        $finalNarHash = $patch->{narHash};
    }

    elsif ($edge->{type} eq "narfile") {
        my $narFile = $edge->{info};
        print STDERR "downloading ‘$narFile->{url}’ to ‘$v’\n";

        my $size = $narFile->{size} || -1;
        print LOGFILE "$$ narfile $narFile->{url} $size $v\n";

        checkURL $narFile->{url};

        my $decompressor =
            $narFile->{compressionType} eq "bzip2" ? "| $Nix::Config::bzip2 -d" :
            $narFile->{compressionType} eq "xz" ? "| $Nix::Config::xz -d" :
            $narFile->{compressionType} eq "none" ? "" :
            die "unknown compression type ‘$narFile->{compressionType}’";

        if ($curStep < $maxStep) {
            # The archive will be used a base to a patch.
            system("$curl '$narFile->{url}' $decompressor > $tmpNar") == 0
                or die "cannot download and unpack ‘$narFile->{url}’ to ‘$v’\n";
        } else {
            # Unpack the archive to the target path.
            system("$curl '$narFile->{url}' $decompressor | $Nix::Config::binDir/nix-store --restore '$destPath'") == 0
                or die "cannot download and unpack ‘$narFile->{url}’ to ‘$v’\n";
        }

        $finalNarHash = $narFile->{narHash};
    }

    $curStep++;
}


# Tell Nix about the expected hash so it can verify it.
die "cannot check integrity of the downloaded path since its hash is not known\n"
    unless defined $finalNarHash;
print "$finalNarHash\n";


print STDERR "\n";
print LOGFILE "$$ success\n";
close LOGFILE;