channel-scripts/find-binary-cache-garbage.pl
2015-06-03 16:23:16 +02:00

177 lines
5.5 KiB
Perl
Executable file

#! /usr/bin/env nix-shell
#! nix-shell -i perl -p perl perlPackages.DBDSQLite perlPackages.NetAmazonS3
use strict;
use Nix::Manifest;
use Net::Amazon::S3;
use File::Basename;
use DateTime::Format::Strptime;
my $bucketName = "nix-cache";
my $maxAge = 180 * 24 * 60 * 60;
my $dateParser = DateTime::Format::Strptime->new(pattern => "%Y-%m-%dT%H:%M:%S");
# Read the manifests of live releases.
my $res = `find /data/releases/nixos /data/releases/nixpkgs -name MANIFEST`;
die if $? != 0;
my @manifests = split /\n/, $res;
my %narFiles;
my %patches;
foreach my $manifest (@manifests) {
print STDERR "loading $manifest\n";
open TMP, "<$manifest" or die;
my $s = <TMP> or die;
chomp $s;
close TMP;
if ($s ne "version {") {
warn "skipping very old manifest (i.e., for Nix <= 0.7)\n";
next;
}
if (readManifest($manifest, \%narFiles, \%patches) < 3) {
warn "manifest `$manifest' is too old (i.e., for Nix <= 0.7)\n";
next;
}
}
print STDERR scalar(keys %narFiles), " live store paths found\n";
my %hashParts;
my %fileHashes;
foreach my $storePath (keys %narFiles) {
my $hashPart = substr(basename($storePath), 0, 32);
die "collision: $storePath vs $hashParts{$hashPart}\n"
if defined $hashParts{$hashPart};
$hashParts{$hashPart} = $storePath;
print "$storePath\n" if defined $ENV{'SHOW_LIVE'};
foreach my $file (@{$narFiles{$storePath}}) {
die unless defined $file->{hash};
$file->{hash} =~ /^sha256:(.*)$/ or die;
my $hash = $1;
die unless length $hash == 52;
$fileHashes{$1} = $hash;
print " $hash -> $file->{url}\n" if defined $ENV{'SHOW_LIVE'};
}
}
exit if defined $ENV{'SHOW_LIVE'};
# S3 setup.
my $aws_access_key_id = $ENV{'AWS_ACCESS_KEY_ID'} or die;
my $aws_secret_access_key = $ENV{'AWS_SECRET_ACCESS_KEY'} or die;
my $s3 = Net::Amazon::S3->new(
{ aws_access_key_id => $aws_access_key_id,
aws_secret_access_key => $aws_secret_access_key,
retry => 1,
});
# List the bucket and determine which files should be deleted.
my $marker;
my $nrFiles = 0;
my $totalSize = 0;
my $narinfos = 0;
my $narinfosSize = 0;
my $nars = 0;
my $narsSize = 0;
my @garbage;
my $garbageSize = 0;
my %alive;
my $youngGarbage = 0;
my $youngGarbageSize = 0;
my $n = 0;
while (1) {
print STDERR "fetching from ", ($marker // "start"), "...\n";
my $res = $s3->list_bucket({ bucket => $bucketName, marker => $marker });
die "could not get contents of S3 bucket $bucketName\n" unless $res;
$marker = $res->{next_marker};
foreach my $key (@{$res->{keys}}) {
my $fn = $key->{key};
$marker = $fn if $fn gt $marker;
$nrFiles++;
$totalSize += $key->{size};
#print "$fn\n";
my $isGarbage = 0;
if ($fn =~ /^(\w{32})\.narinfo$/) {
$narinfos++;
$narinfosSize += $key->{size};
my $hashPart = $1;
my $storePath = $hashParts{$hashPart};
if (defined $storePath) {
#print STDERR "EXISTS $fn -> $storePath\n";
} else {
$isGarbage = 1;
}
}
elsif ($fn =~ /nar\/(\w{52})\.nar.*$/) {
$nars++;
$narsSize += $key->{size};
my $hash = $1;
#print STDERR "$hash\n";
if (defined $fileHashes{$hash}) {
#print STDERR "EXISTS $fn\n";
} else {
$isGarbage = 1;
}
}
elsif ($fn eq "nix-cache-info") {
}
else {
printf STDERR "unknown file %s (%d bytes, %s)\n", $fn, $key->{size}, $key->{last_modified};
$isGarbage = 1;
}
if ($isGarbage) {
my $dt = $dateParser->parse_datetime($key->{last_modified}) or die;
if ($dt->epoch() >= time() - $maxAge) {
$youngGarbage++;
$youngGarbageSize += $key->{size};
printf STDERR "young %s (%d bytes, %s)\n", $fn, $key->{size}, $key->{last_modified};
} else {
push @garbage, $fn;
$garbageSize += $key->{size};
printf STDERR "garbage %s (%d bytes, %s)\n", $fn, $key->{size}, $key->{last_modified};
}
} else {
$alive{$fn} = 1;
printf STDERR "alive %s (%d bytes, %s)\n", $fn, $key->{size}, $key->{last_modified};
}
}
$n++;
#last if $n >= 2;
last unless $res->{is_truncated};
}
foreach my $storePath (keys %narFiles) {
my $hashPart = substr(basename($storePath), 0, 32);
if (!defined $alive{"$hashPart.narinfo"}) {
print STDERR "missing: $storePath -> $hashPart.narinfo\n";
}
foreach my $file (@{$narFiles{$storePath}}) {
die unless defined $file->{hash};
$file->{hash} =~ /^sha256:(.*)$/ or die;
my $hash = $1;
if (!defined $alive{"nar/$hash.nar.bz2"} && !defined $alive{"nar/$hash.nar.xz"}) {
print STDERR "missing: $storePath -> nar/$hash.nar.*\n";
}
}
}
printf STDERR "%s files in bucket (%.2f GiB), %s .narinfos (%.2f GiB), %s .nars (%.2f GiB), %s old garbage (%.2f GiB), %s young garbage (%.2f GiB)\n",
$nrFiles, $totalSize / (1024.0 * 1024.0 * 1024.0),
$narinfos, $narinfosSize / (1024.0 * 1024.0 * 1024.0),
$nars, $narsSize / (1024.0 * 1024.0 * 1024.0),
scalar(@garbage), $garbageSize / (1024.0 * 1024.0 * 1024.0),
$youngGarbage, $youngGarbageSize / (1024.0 * 1024.0 * 1024.0);
print "$_\n" foreach @garbage;