Scripts for deleting garbage from the binary cache
This commit is contained in:
parent
bc8cce06a5
commit
d84441483e
47
delete-binary-cache-garbage.pl
Executable file
47
delete-binary-cache-garbage.pl
Executable file
|
@ -0,0 +1,47 @@
|
|||
#! /usr/bin/env nix-shell
|
||||
#! nix-shell -i perl -p perl perlPackages.NetAmazonS3 perlPackages.ForksSuper
|
||||
|
||||
use strict;
|
||||
use Net::Amazon::S3;
|
||||
use Forks::Super 'bg_eval';
|
||||
use List::MoreUtils qw(part);
|
||||
|
||||
my $bucketName = "nix-cache";
|
||||
my $nrProcesses = 8;
|
||||
|
||||
my @files;
|
||||
while (<>) {
|
||||
chomp;
|
||||
push @files, $_;
|
||||
}
|
||||
|
||||
# S3 setup.
|
||||
my $aws_access_key_id = $ENV{'AWS_ACCESS_KEY_ID'} or die;
|
||||
my $aws_secret_access_key = $ENV{'AWS_SECRET_ACCESS_KEY'} or die;
|
||||
|
||||
my $s3 = Net::Amazon::S3->new(
|
||||
{ aws_access_key_id => $aws_access_key_id,
|
||||
aws_secret_access_key => $aws_secret_access_key,
|
||||
retry => 1,
|
||||
});
|
||||
|
||||
my $bucket = $s3->bucket($bucketName) or die;
|
||||
|
||||
sub deleteFile {
|
||||
my ($fn) = @_;
|
||||
print STDERR "deleting $fn...\n";
|
||||
if (!$bucket->delete_key($fn)) {
|
||||
print STDERR "warning: failed to delete $fn\n";
|
||||
}
|
||||
}
|
||||
|
||||
# Fork processes to delete files in parallel.
|
||||
my $i = 0;
|
||||
my @filesPerProcess = part { $i++ % $nrProcesses } @files;
|
||||
my @res;
|
||||
for (my $n = 0; $n < $nrProcesses; $n++) {
|
||||
push @res, bg_eval { deleteFile($_) foreach @{$filesPerProcess[$n]}; return 0; };
|
||||
}
|
||||
|
||||
foreach my $res (@res) { if ($res) { } }
|
||||
print STDERR "DONE\n";
|
177
find-binary-cache-garbage.pl
Executable file
177
find-binary-cache-garbage.pl
Executable file
|
@ -0,0 +1,177 @@
|
|||
#! /usr/bin/env nix-shell
|
||||
#! nix-shell -i perl -p perl perlPackages.DBDSQLite perlPackages.NetAmazonS3
|
||||
|
||||
use strict;
|
||||
use Nix::Manifest;
|
||||
use Net::Amazon::S3;
|
||||
use File::Basename;
|
||||
use DateTime::Format::Strptime;
|
||||
|
||||
my $bucketName = "nix-cache";
|
||||
my $maxAge = 180 * 24 * 60 * 60;
|
||||
|
||||
my $dateParser = DateTime::Format::Strptime->new(pattern => "%Y-%m-%dT%H:%M:%S");
|
||||
|
||||
# Read the manifests of live releases.
|
||||
my $res = `find /data/releases/nixos /data/releases/nixpkgs -name MANIFEST`;
|
||||
die if $? != 0;
|
||||
my @manifests = split /\n/, $res;
|
||||
|
||||
my %narFiles;
|
||||
my %patches;
|
||||
|
||||
foreach my $manifest (@manifests) {
|
||||
print STDERR "loading $manifest\n";
|
||||
open TMP, "<$manifest" or die;
|
||||
my $s = <TMP> or die;
|
||||
chomp $s;
|
||||
close TMP;
|
||||
if ($s ne "version {") {
|
||||
warn "skipping very old manifest (i.e., for Nix <= 0.7)\n";
|
||||
next;
|
||||
}
|
||||
if (readManifest($manifest, \%narFiles, \%patches) < 3) {
|
||||
warn "manifest `$manifest' is too old (i.e., for Nix <= 0.7)\n";
|
||||
next;
|
||||
}
|
||||
}
|
||||
|
||||
print STDERR scalar(keys %narFiles), " live store paths found\n";
|
||||
|
||||
my %hashParts;
|
||||
my %fileHashes;
|
||||
|
||||
foreach my $storePath (keys %narFiles) {
|
||||
my $hashPart = substr(basename($storePath), 0, 32);
|
||||
die "collision: $storePath vs $hashParts{$hashPart}\n"
|
||||
if defined $hashParts{$hashPart};
|
||||
$hashParts{$hashPart} = $storePath;
|
||||
|
||||
print "$storePath\n" if defined $ENV{'SHOW_LIVE'};
|
||||
|
||||
foreach my $file (@{$narFiles{$storePath}}) {
|
||||
die unless defined $file->{hash};
|
||||
$file->{hash} =~ /^sha256:(.*)$/ or die;
|
||||
my $hash = $1;
|
||||
die unless length $hash == 52;
|
||||
$fileHashes{$1} = $hash;
|
||||
print " $hash -> $file->{url}\n" if defined $ENV{'SHOW_LIVE'};
|
||||
}
|
||||
}
|
||||
|
||||
exit if defined $ENV{'SHOW_LIVE'};
|
||||
|
||||
# S3 setup.
|
||||
my $aws_access_key_id = $ENV{'AWS_ACCESS_KEY_ID'} or die;
|
||||
my $aws_secret_access_key = $ENV{'AWS_SECRET_ACCESS_KEY'} or die;
|
||||
|
||||
my $s3 = Net::Amazon::S3->new(
|
||||
{ aws_access_key_id => $aws_access_key_id,
|
||||
aws_secret_access_key => $aws_secret_access_key,
|
||||
retry => 1,
|
||||
});
|
||||
|
||||
# List the bucket and determine which files should be deleted.
|
||||
my $marker;
|
||||
my $nrFiles = 0;
|
||||
my $totalSize = 0;
|
||||
my $narinfos = 0;
|
||||
my $narinfosSize = 0;
|
||||
my $nars = 0;
|
||||
my $narsSize = 0;
|
||||
my @garbage;
|
||||
my $garbageSize = 0;
|
||||
my %alive;
|
||||
my $youngGarbage = 0;
|
||||
my $youngGarbageSize = 0;
|
||||
|
||||
my $n = 0;
|
||||
while (1) {
|
||||
print STDERR "fetching from ", ($marker // "start"), "...\n";
|
||||
my $res = $s3->list_bucket({ bucket => $bucketName, marker => $marker });
|
||||
die "could not get contents of S3 bucket $bucketName\n" unless $res;
|
||||
$marker = $res->{next_marker};
|
||||
|
||||
foreach my $key (@{$res->{keys}}) {
|
||||
my $fn = $key->{key};
|
||||
$marker = $fn if $fn gt $marker;
|
||||
$nrFiles++;
|
||||
$totalSize += $key->{size};
|
||||
#print "$fn\n";
|
||||
|
||||
my $isGarbage = 0;
|
||||
|
||||
if ($fn =~ /^(\w{32})\.narinfo$/) {
|
||||
$narinfos++;
|
||||
$narinfosSize += $key->{size};
|
||||
my $hashPart = $1;
|
||||
my $storePath = $hashParts{$hashPart};
|
||||
if (defined $storePath) {
|
||||
#print STDERR "EXISTS $fn -> $storePath\n";
|
||||
} else {
|
||||
$isGarbage = 1;
|
||||
}
|
||||
}
|
||||
elsif ($fn =~ /nar\/(\w{52})\.nar.*$/) {
|
||||
$nars++;
|
||||
$narsSize += $key->{size};
|
||||
my $hash = $1;
|
||||
#print STDERR "$hash\n";
|
||||
if (defined $fileHashes{$hash}) {
|
||||
#print STDERR "EXISTS $fn\n";
|
||||
} else {
|
||||
$isGarbage = 1;
|
||||
}
|
||||
}
|
||||
elsif ($fn eq "nix-cache-info") {
|
||||
}
|
||||
else {
|
||||
printf STDERR "unknown file %s (%d bytes, %s)\n", $fn, $key->{size}, $key->{last_modified};
|
||||
$isGarbage = 1;
|
||||
}
|
||||
|
||||
if ($isGarbage) {
|
||||
my $dt = $dateParser->parse_datetime($key->{last_modified}) or die;
|
||||
if ($dt->epoch() >= time() - $maxAge) {
|
||||
$youngGarbage++;
|
||||
$youngGarbageSize += $key->{size};
|
||||
printf STDERR "young %s (%d bytes, %s)\n", $fn, $key->{size}, $key->{last_modified};
|
||||
} else {
|
||||
push @garbage, $fn;
|
||||
$garbageSize += $key->{size};
|
||||
printf STDERR "garbage %s (%d bytes, %s)\n", $fn, $key->{size}, $key->{last_modified};
|
||||
}
|
||||
} else {
|
||||
$alive{$fn} = 1;
|
||||
printf STDERR "alive %s (%d bytes, %s)\n", $fn, $key->{size}, $key->{last_modified};
|
||||
}
|
||||
}
|
||||
|
||||
$n++;
|
||||
#last if $n >= 2;
|
||||
last unless $res->{is_truncated};
|
||||
}
|
||||
|
||||
foreach my $storePath (keys %narFiles) {
|
||||
my $hashPart = substr(basename($storePath), 0, 32);
|
||||
if (!defined $alive{"$hashPart.narinfo"}) {
|
||||
print STDERR "missing: $storePath -> $hashPart.narinfo\n";
|
||||
}
|
||||
foreach my $file (@{$narFiles{$storePath}}) {
|
||||
die unless defined $file->{hash};
|
||||
$file->{hash} =~ /^sha256:(.*)$/ or die;
|
||||
my $hash = $1;
|
||||
if (!defined $alive{"nar/$hash.nar.bz2"} && !defined $alive{"nar/$hash.nar.xz"}) {
|
||||
print STDERR "missing: $storePath -> nar/$hash.nar.*\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf STDERR "%s files in bucket (%.2f GiB), %s .narinfos (%.2f GiB), %s .nars (%.2f GiB), %s old garbage (%.2f GiB), %s young garbage (%.2f GiB)\n",
|
||||
$nrFiles, $totalSize / (1024.0 * 1024.0 * 1024.0),
|
||||
$narinfos, $narinfosSize / (1024.0 * 1024.0 * 1024.0),
|
||||
$nars, $narsSize / (1024.0 * 1024.0 * 1024.0),
|
||||
scalar(@garbage), $garbageSize / (1024.0 * 1024.0 * 1024.0),
|
||||
$youngGarbage, $youngGarbageSize / (1024.0 * 1024.0 * 1024.0);
|
||||
|
||||
print "$_\n" foreach @garbage;
|
Loading…
Reference in a new issue