hydra/src/script/hydra-s3-backup-collect-garbage

#! /var/run/current-system/sw/bin/perl -w

use strict;
use File::Basename;
use Fcntl;
use IO::File;
use Net::Amazon::S3;
use Net::Amazon::S3::Client;
use Nix::Config;
use Nix::Store;
use Hydra::Model::DB;
use Hydra::Helper::Nix;

my $cfg = getHydraConfig()->{s3backup};
my @config = defined $cfg ? ref $cfg eq "ARRAY" ? @$cfg : ($cfg) : ();

exit 0 unless @config;

my $lockfile = Hydra::Model::DB::getHydraPath . "/.hydra-s3backup.lock";
my $lockhandle = IO::File->new;
open($lockhandle, ">", $lockfile) or die "Opening $lockfile: $!";
flock($lockhandle, Fcntl::LOCK_EX) or die "Write-locking $lockfile: $!";

my $client = Net::Amazon::S3::Client->new( s3 => Net::Amazon::S3->new( retry => 1 ) );
my $db = Hydra::Model::DB->new();

my $gcRootsDir = getGCRootsDir;
opendir DIR, $gcRootsDir or die;
my @roots = readdir DIR;
closedir DIR;

my @actual_roots = ();
foreach my $link (@roots) {
    next if $link eq "." || $link eq "..";
    push @actual_roots, $Nix::Config::storeDir . "/" . $link;
}

# Don't delete a nix-cache-info file, if present
my %closure = ( "nix-cache-info" => undef );
foreach my $path (computeFSClosure(0, 0, @actual_roots)) {
    my $hash = substr basename($path), 0, 32;
    $closure{"$hash.narinfo"} = undef;
    $closure{"$hash.nar"} = undef;
}

foreach my $bucket_config (@config) {
    my $bucket = $client->bucket( name => $bucket_config->{name} );
    my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : "";

    my $cache_stream = $bucket->list({ prefix => $prefix });
    until ($cache_stream->is_done) {
        foreach my $object ($cache_stream->items) {
            $object->delete unless exists $closure{basename($object->key)};
        }
    }
}

1;
Add a plugin for backing up builds in s3 In your hydra config, you can add an arbitrary number of <s3config> sections, with the following options: * name (required): Bucket name * jobs (required): A regex to match job names (in project:jobset:job format) that should be backed up to this bucket * compression_type: bzip2 (default), xz, or none * prefix: String to prepend to all hydra-created s3 keys (if this is meant to represent a directory, you should include the trailing slash, e.g. "cache/"). Default "". After each build with an output (i.e. successful or failed-with-output builds), the output path and its closure are uploaded to the bucket as .nar files, with corresponding .narinfos to enable use as a binary cache. This plugin requires that s3 credentials be available. It uses Net::Amazon::S3, which as of this commit the nixpkgs version can retrieve s3 credentials from the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables, or from ec2 instance metadata when using an IAM role. This commit also adds a hydra-s3-backup-collect-garbage program, which uses hydra's gc roots directory to determine which paths are live, and then deletes all files except nix-cache-info and any .nar or .narinfo files corresponding to live paths. hydra-s3-backup-collect-garbage respects the prefix configuration option, so it won't delete anything outside of the hierarchy you give it, and it has the same credential requirements as the plugin. Probably a timer unit running the garbage collection periodically should be added to hydra-module.nix Note that two of the added tests fail, due to a bug in the interaction between Net::Amazon::S3 and fake-s3. Those behaviors work against real s3 though, so I'm committing this even with the broken tests. Signed-off-by: Shea Levy <shea@shealevy.com> 2013-09-03 14:53:56 +00:00			`#! /var/run/current-system/sw/bin/perl -w`

			`use strict;`
			`use File::Basename;`
			`use Fcntl;`
			`use IO::File;`
			`use Net::Amazon::S3;`
			`use Net::Amazon::S3::Client;`
			`use Nix::Config;`
			`use Nix::Store;`
			`use Hydra::Model::DB;`
			`use Hydra::Helper::Nix;`

			`my $cfg = getHydraConfig()->{s3backup};`
			`my @config = defined $cfg ? ref $cfg eq "ARRAY" ? @$cfg : ($cfg) : ();`

			`exit 0 unless @config;`

			`my $lockfile = Hydra::Model::DB::getHydraPath . "/.hydra-s3backup.lock";`
			`my $lockhandle = IO::File->new;`
			`open($lockhandle, ">", $lockfile) or die "Opening $lockfile: $!";`
			`flock($lockhandle, Fcntl::LOCK_EX) or die "Write-locking $lockfile: $!";`

			`my $client = Net::Amazon::S3::Client->new( s3 => Net::Amazon::S3->new( retry => 1 ) );`
			`my $db = Hydra::Model::DB->new();`

			`my $gcRootsDir = getGCRootsDir;`
			`opendir DIR, $gcRootsDir or die;`
			`my @roots = readdir DIR;`
			`closedir DIR;`

			`my @actual_roots = ();`
			`foreach my $link (@roots) {`
			`next if $link eq "." \|\| $link eq "..";`
Write Hydra roots as regular files instead of symlinks Note that this requires at least NixOS/Nix@1c208f2b7ef8ffb5e6d435d703dad83223a67bd6. 2014-08-01 15:24:55 +00:00			`push @actual_roots, $Nix::Config::storeDir . "/" . $link;`
Add a plugin for backing up builds in s3 In your hydra config, you can add an arbitrary number of <s3config> sections, with the following options: * name (required): Bucket name * jobs (required): A regex to match job names (in project:jobset:job format) that should be backed up to this bucket * compression_type: bzip2 (default), xz, or none * prefix: String to prepend to all hydra-created s3 keys (if this is meant to represent a directory, you should include the trailing slash, e.g. "cache/"). Default "". After each build with an output (i.e. successful or failed-with-output builds), the output path and its closure are uploaded to the bucket as .nar files, with corresponding .narinfos to enable use as a binary cache. This plugin requires that s3 credentials be available. It uses Net::Amazon::S3, which as of this commit the nixpkgs version can retrieve s3 credentials from the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables, or from ec2 instance metadata when using an IAM role. This commit also adds a hydra-s3-backup-collect-garbage program, which uses hydra's gc roots directory to determine which paths are live, and then deletes all files except nix-cache-info and any .nar or .narinfo files corresponding to live paths. hydra-s3-backup-collect-garbage respects the prefix configuration option, so it won't delete anything outside of the hierarchy you give it, and it has the same credential requirements as the plugin. Probably a timer unit running the garbage collection periodically should be added to hydra-module.nix Note that two of the added tests fail, due to a bug in the interaction between Net::Amazon::S3 and fake-s3. Those behaviors work against real s3 though, so I'm committing this even with the broken tests. Signed-off-by: Shea Levy <shea@shealevy.com> 2013-09-03 14:53:56 +00:00			`}`

			`# Don't delete a nix-cache-info file, if present`
			`my %closure = ( "nix-cache-info" => undef );`
			`foreach my $path (computeFSClosure(0, 0, @actual_roots)) {`
			`my $hash = substr basename($path), 0, 32;`
			`$closure{"$hash.narinfo"} = undef;`
			`$closure{"$hash.nar"} = undef;`
			`}`

			`foreach my $bucket_config (@config) {`
			`my $bucket = $client->bucket( name => $bucket_config->{name} );`
			`my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : "";`

			`my $cache_stream = $bucket->list({ prefix => $prefix });`
			`until ($cache_stream->is_done) {`
			`foreach my $object ($cache_stream->items) {`
			`$object->delete unless exists $closure{basename($object->key)};`
			`}`
			`}`
			`}`

			`1;`