download-from-binary-cache: use WWW::Curl

Using WWW::Curl rather than running an external curl process for every
NAR info file halves the time it takes to get info thanks to libcurl's
support for persistent HTTP connections.  (We save a roundtrip per
file.)  But the real gain will come from using parallel and/or
pipelined requests.
This commit is contained in:
Eelco Dolstra 2012-07-06 00:30:40 -04:00
parent ae60643c15
commit cd94665f38

View file

@ -1,10 +1,13 @@
#! @perl@ -w @perlFlags@
use strict;
use DBI;
use File::Basename;
use IO::Select;
use Nix::Config;
use Nix::Store;
use DBI;
use WWW::Curl::Easy;
use WWW::Curl::Multi;
use strict;
my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /, ($ENV{"NIX_BINARY_CACHES"} || ""));
@ -12,6 +15,58 @@ my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /, ($ENV{"NIX_BINARY_CACHES"}
my ($dbh, $insertNAR, $queryNAR, $insertNegativeNAR, $queryNegativeNAR);
my %cacheIds;
my $curlm = WWW::Curl::Multi->new;
my $activeRequests = 0;
my $curlIdCount = 1;
my %curlHandles;
my $caBundle = $ENV{"CURL_CA_BUNDLE"} || $ENV{"OPENSSL_X509_CERT_FILE"};
sub addRequest {
my ($url) = @_;
my $curl = WWW::Curl::Easy->new;
my $curlId = $curlIdCount++;
$curlHandles{$curlId} = { handle => $curl, content => "" };
$curl->setopt(CURLOPT_PRIVATE, $curlId);
$curl->setopt(CURLOPT_URL, $url);
$curl->setopt(CURLOPT_WRITEDATA, \$curlHandles{$curlId}->{content});
$curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
$curl->setopt(CURLOPT_CAINFO, $caBundle) if defined $caBundle;
$curlm->add_handle($curl);
$activeRequests++;
return $curlHandles{$curlId};
}
sub processRequests {
while ($activeRequests) {
my ($rfds, $wfds, $efds) = $curlm->fdset();
#print STDERR "R = @{$rfds}, W = @{$wfds}, E = @{$efds}\n";
# Sleep until we can read or write some data.
if (scalar @{$rfds} + scalar @{$wfds} + scalar @{$efds} > 0) {
IO::Select->select(IO::Select->new(@{$rfds}), IO::Select->new(@{$wfds}), IO::Select->new(@{$efds}), 0.1);
}
if ($curlm->perform() != $activeRequests) {
while (my ($id, $result) = $curlm->info_read) {
if ($id) {
my $handle = $curlHandles{$id}->{handle};
$curlHandles{$id}->{result} = $result;
$curlHandles{$id}->{httpStatus} = $handle->getinfo(CURLINFO_HTTP_CODE);
#print STDERR "\nRequest completed ($id, $result, $curlHandles{$id}->{httpStatus})\n";
$activeRequests--;
delete $curlHandles{$id}->{handle};
}
}
}
}
}
sub initCache {
my $dbPath = "$Nix::Config::stateDir/binary-cache-v1.sqlite";
@ -86,11 +141,13 @@ sub getInfoFrom {
my $infoUrl = "$binaryCacheUrl/$pathHash.narinfo";
print STDERR "checking $infoUrl...\n";
my $s = `$Nix::Config::curl --fail --silent --location $infoUrl`;
if ($? != 0) {
my $status = $? >> 8;
if ($status != 22 && $status != 37) {
print STDERR "could not download $infoUrl (curl returned status ", $? >> 8, ")\n";
my $request = addRequest($infoUrl);
processRequests;
if ($request->{result} != 0 || $request->{httpStatus} != 200) {
if ($request->{httpStatus} != 404) {
print STDERR "could not download $infoUrl (" .
($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
} else {
$insertNegativeNAR->execute($cacheId, basename($storePath), time());
}
@ -100,7 +157,7 @@ sub getInfoFrom {
my ($storePath2, $url, $fileHash, $fileSize, $narHash, $narSize, $deriver, $system);
my $compression = "bzip2";
my @refs;
foreach my $line (split "\n", $s) {
foreach my $line (split "\n", $request->{content}) {
$line =~ /^(.*): (.*)$/ or return undef;
if ($1 eq "StorePath") { $storePath2 = $2; }
elsif ($1 eq "URL") { $url = $2; }
@ -248,9 +305,9 @@ if ($ARGV[0] eq "--query") {
if ($cmd eq "have") {
my $storePath = <STDIN>; chomp $storePath;
# FIXME: want to give correct info here, but it's too slow.
print "0\n";
#my $info = getInfo($storePath);
#if (defined $info) { print "1\n"; } else { print "0\n"; }
#print "0\n";
my $info = getInfo($storePath);
if (defined $info) { print "1\n"; } else { print "0\n"; }
}
elsif ($cmd eq "info") {