download-from-binary-cache: Use HEAD requests if possible

In "nix-env -qas", we don't need the substitute info, we just need to
know if it exists.  This can be done using a HTTP HEAD request, which
saves bandwidth.

Note however that curl currently has a bug that prevents it from
reusing HTTP connections if HEAD requests return a 404:

https://sourceforge.net/tracker/?func=detail&aid=3542731&group_id=976&atid=100976

Without the patch attached to the issue, using HEAD is actually quite
a bit slower than GET.
This commit is contained in:
Eelco Dolstra 2012-07-11 17:53:20 -04:00
parent 09a6321aeb
commit b74d92755d

View file

@ -18,7 +18,7 @@ my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /,
my $maxParallelRequests = int($Nix::Config::config{"binary-caches-parallel-connections"} // 150); my $maxParallelRequests = int($Nix::Config::config{"binary-caches-parallel-connections"} // 150);
$maxParallelRequests = 1 if $maxParallelRequests < 1; $maxParallelRequests = 1 if $maxParallelRequests < 1;
my ($dbh, $insertNAR, $queryNAR, $insertNegativeNAR, $queryNegativeNAR); my ($dbh, $insertNAR, $queryNAR, $insertNARExistence, $queryNARExistence);
my %cacheIds; my %cacheIds;
my $curlm = WWW::Curl::Multi->new; my $curlm = WWW::Curl::Multi->new;
@ -30,7 +30,7 @@ my $caBundle = $ENV{"CURL_CA_BUNDLE"} // $ENV{"OPENSSL_X509_CERT_FILE"};
sub addRequest { sub addRequest {
my ($storePath, $url) = @_; my ($storePath, $url, $head) = @_;
my $curl = WWW::Curl::Easy->new; my $curl = WWW::Curl::Easy->new;
my $curlId = $curlIdCount++; my $curlId = $curlIdCount++;
@ -41,6 +41,7 @@ sub addRequest {
$curl->setopt(CURLOPT_WRITEDATA, \$requests{$curlId}->{content}); $curl->setopt(CURLOPT_WRITEDATA, \$requests{$curlId}->{content});
$curl->setopt(CURLOPT_FOLLOWLOCATION, 1); $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
$curl->setopt(CURLOPT_CAINFO, $caBundle) if defined $caBundle; $curl->setopt(CURLOPT_CAINFO, $caBundle) if defined $caBundle;
$curl->setopt(CURLOPT_NOBODY, 1) if $head;
if ($activeRequests >= $maxParallelRequests) { if ($activeRequests >= $maxParallelRequests) {
$scheduled{$curlId} = 1; $scheduled{$curlId} = 1;
@ -127,9 +128,10 @@ EOF
EOF EOF
$dbh->do(<<EOF); $dbh->do(<<EOF);
create table if not exists NegativeNARs ( create table if not exists NARExistence (
cache integer not null, cache integer not null,
storePath text not null, storePath text not null,
exist integer not null,
timestamp integer not null, timestamp integer not null,
primary key (cache, storePath), primary key (cache, storePath),
foreign key (cache) references BinaryCaches(id) on delete cascade foreign key (cache) references BinaryCaches(id) on delete cascade
@ -142,17 +144,28 @@ EOF
$queryNAR = $dbh->prepare("select * from NARs where cache = ? and storePath = ?") or die; $queryNAR = $dbh->prepare("select * from NARs where cache = ? and storePath = ?") or die;
$insertNegativeNAR = $dbh->prepare( $insertNARExistence = $dbh->prepare(
"insert or replace into NegativeNARs(cache, storePath, timestamp) values (?, ?, ?)") or die; "insert or replace into NARExistence(cache, storePath, exist, timestamp) values (?, ?, ?, ?)") or die;
$queryNegativeNAR = $dbh->prepare("select 1 from NegativeNARs where cache = ? and storePath = ?") or die; $queryNARExistence = $dbh->prepare("select exist from NARExistence where cache = ? and storePath = ?") or die;
} }
sub negativeHit { sub negativeHit {
my ($storePath, $binaryCacheUrl) = @_; my ($storePath, $binaryCacheUrl) = @_;
$queryNegativeNAR->execute(getCacheId($binaryCacheUrl), basename($storePath)); $queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath));
return @{$queryNegativeNAR->fetchall_arrayref()} != 0; my $res = $queryNARExistence->fetchrow_hashref();
return defined $res && $res->{exist} == 0;
}
sub positiveHit {
my ($storePath, $binaryCacheUrl) = @_;
return 1 if defined getCachedInfoFrom($storePath, $binaryCacheUrl);
$queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath));
my $res = $queryNARExistence->fetchrow_hashref();
return defined $res && $res->{exist} == 1;
} }
@ -166,7 +179,7 @@ sub processNARInfo {
print STDERR "could not download $request->{url} (" . print STDERR "could not download $request->{url} (" .
($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n"; ($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
} else { } else {
$insertNegativeNAR->execute($cacheId, basename($storePath), time()); $insertNARExistence->execute($cacheId, basename($storePath), 0, time());
} }
return undef; return undef;
} }
@ -319,6 +332,61 @@ sub printInfoParallel {
} }
sub printSubstitutablePaths {
my @paths = @_;
# First look for paths that have cached info.
my @left;
foreach my $storePath (@paths) {
my $found = 0;
foreach my $binaryCacheUrl (@binaryCacheUrls) {
if (positiveHit($storePath, $binaryCacheUrl)) {
print "$storePath\n";
$found = 1;
last;
}
}
push @left, $storePath if !$found;
}
return if scalar @left == 0;
# For remaining paths, do HEAD requests.
foreach my $binaryCacheUrl (@binaryCacheUrls) {
my $cacheId = getCacheId($binaryCacheUrl);
my @left2;
%requests = ();
foreach my $storePath (@left) {
if (negativeHit($storePath, $binaryCacheUrl)) {
push @left2, $storePath;
next;
}
addRequest($storePath, infoUrl($binaryCacheUrl, $storePath), 1);
}
processRequests;
foreach my $request (values %requests) {
if ($request->{result} != 0 || $request->{httpStatus} != 200) {
if ($request->{httpStatus} != 404) {
print STDERR "could not check $request->{url} (" .
($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
} else {
$insertNARExistence->execute($cacheId, basename($request->{storePath}), 0, time());
}
push @left2, $request->{storePath};
} else {
$insertNARExistence->execute($cacheId, basename($request->{storePath}), 1, time());
print "$request->{storePath}\n";
}
}
@left = @left2;
}
}
sub downloadBinary { sub downloadBinary {
my ($storePath) = @_; my ($storePath) = @_;
@ -371,9 +439,8 @@ if ($ARGV[0] eq "--query") {
my ($cmd, @args) = split " ", $_; my ($cmd, @args) = split " ", $_;
if ($cmd eq "have") { if ($cmd eq "have") {
my $storePath = <STDIN>; chomp $storePath; printSubstitutablePaths(@args);
# FIXME: want to give correct info here, but it's too slow. print "\n";
print "0\n";
} }
elsif ($cmd eq "info") { elsif ($cmd eq "info") {