From 74388353b524148eb389c45e2798090be26a1f60 Mon Sep 17 00:00:00 2001 From: Shea Levy Date: Tue, 3 Sep 2013 10:53:56 -0400 Subject: [PATCH] Add a plugin for backing up builds in s3 In your hydra config, you can add an arbitrary number of sections, with the following options: * name (required): Bucket name * jobs (required): A regex to match job names (in project:jobset:job format) that should be backed up to this bucket * compression_type: bzip2 (default), xz, or none * prefix: String to prepend to all hydra-created s3 keys (if this is meant to represent a directory, you should include the trailing slash, e.g. "cache/"). Default "". After each build with an output (i.e. successful or failed-with-output builds), the output path and its closure are uploaded to the bucket as .nar files, with corresponding .narinfos to enable use as a binary cache. This plugin requires that s3 credentials be available. It uses Net::Amazon::S3, which as of this commit the nixpkgs version can retrieve s3 credentials from the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables, or from ec2 instance metadata when using an IAM role. This commit also adds a hydra-s3-backup-collect-garbage program, which uses hydra's gc roots directory to determine which paths are live, and then deletes all files except nix-cache-info and any .nar or .narinfo files corresponding to live paths. hydra-s3-backup-collect-garbage respects the prefix configuration option, so it won't delete anything outside of the hierarchy you give it, and it has the same credential requirements as the plugin. Probably a timer unit running the garbage collection periodically should be added to hydra-module.nix Note that two of the added tests fail, due to a bug in the interaction between Net::Amazon::S3 and fake-s3. Those behaviors work against real s3 though, so I'm committing this even with the broken tests. Signed-off-by: Shea Levy --- release.nix | 41 ++++++ src/lib/Hydra/Plugin/S3Backup.pm | 146 +++++++++++++++++++++ src/script/Makefile.am | 1 + src/script/hydra-s3-backup-collect-garbage | 58 ++++++++ tests/Setup.pm | 4 +- tests/api-test.nix | 3 +- tests/s3-backup-test.config | 4 + tests/s3-backup-test.pl | 49 +++++++ 8 files changed, 303 insertions(+), 3 deletions(-) create mode 100644 src/lib/Hydra/Plugin/S3Backup.pm create mode 100755 src/script/hydra-s3-backup-collect-garbage create mode 100644 tests/s3-backup-test.config create mode 100755 tests/s3-backup-test.pl diff --git a/release.nix b/release.nix index 2f853079..d8ccc1cd 100644 --- a/release.nix +++ b/release.nix @@ -100,6 +100,7 @@ in rec { TextDiff TextTable XMLSimple + NetAmazonS3 nix git ]; }; @@ -143,6 +144,7 @@ in rec { ''; # */ meta.description = "Build of Hydra on ${system}"; + passthru.perlDeps = perlDeps; }); @@ -211,4 +213,43 @@ in rec { $machine->mustSucceed("perl ${./tests/api-test.pl} >&2"); ''; }); + + tests.s3backup = genAttrs' (system: + with import { inherit system; }; + let hydra = builtins.getAttr system build; in # build."${system}" + simpleTest { + machine = + { config, pkgs, ... }: + { services.postgresql.enable = true; + services.postgresql.package = pkgs.postgresql92; + environment.systemPackages = [ hydra pkgs.rubyLibs.fakes3 ]; + virtualisation.memorySize = 2047; + boot.kernelPackages = pkgs.linuxPackages_3_10; + virtualisation.writableStore = true; + networking.extraHosts = '' + 127.0.0.1 hydra.s3.amazonaws.com + ''; + }; + + testScript = + '' + $machine->waitForJob("postgresql"); + + # Initialise the database and the state. + $machine->mustSucceed + ( "createdb -O root hydra" + , "psql hydra -f ${hydra}/libexec/hydra/sql/hydra-postgresql.sql" + , "mkdir /var/lib/hydra" + , "mkdir /tmp/jobs" + , "cp ${./tests/s3-backup-test.pl} /tmp/s3-backup-test.pl" + , "cp ${./tests/api-test.nix} /tmp/jobs/default.nix" + ); + + # start fakes3 + $machine->mustSucceed("fakes3 --root /tmp/s3 --port 80 &>/dev/null &"); + $machine->waitForOpenPort("80"); + + $machine->mustSucceed("cd /tmp && LOGNAME=root AWS_ACCESS_KEY_ID=foo AWS_SECRET_ACCESS_KEY=bar HYDRA_DBI='dbi:Pg:dbname=hydra;user=root;' HYDRA_CONFIG=${./tests/s3-backup-test.config} perl -I ${hydra}/libexec/hydra/lib -I ${hydra.perlDeps}/lib/perl5/site_perl ./s3-backup-test.pl >&2"); + ''; + }); } diff --git a/src/lib/Hydra/Plugin/S3Backup.pm b/src/lib/Hydra/Plugin/S3Backup.pm new file mode 100644 index 00000000..a78a8ca0 --- /dev/null +++ b/src/lib/Hydra/Plugin/S3Backup.pm @@ -0,0 +1,146 @@ +package Hydra::Plugin::S3Backup; + +use strict; +use parent 'Hydra::Plugin'; +use File::Temp; +use File::Basename; +use Fcntl; +use IO::File; +use Net::Amazon::S3; +use Net::Amazon::S3::Client; +use Digest::SHA; +use Nix::Config; +use Nix::Store; +use Hydra::Model::DB; +use Hydra::Helper::CatalystUtils; + +my $client = Net::Amazon::S3::Client->new( s3 => Net::Amazon::S3->new( retry => 1 ) ); +my %compressors = ( + xz => "| $Nix::Config::xz", + bzip2 => "| $Nix::Config::bzip2", + none => "" +); +my $lockfile = Hydra::Model::DB::getHydraPath . "/.hydra-s3backup.lock"; + +sub buildFinished { + my ($self, $build, $dependents) = @_; + + return unless $build->buildstatus == 0 or $build->buildstatus == 6; + + my $jobName = showJobName $build; + my $job = $build->job; + + my $cfg = $self->{config}->{s3backup}; + my @config = defined $cfg ? ref $cfg eq "ARRAY" ? @$cfg : ($cfg) : (); + + my @matching_configs = (); + foreach my $bucket_config (@config) { + push @matching_configs, $bucket_config if $jobName =~ /^$bucket_config->{jobs}$/; + } + + return unless @matching_configs; + + # !!! Maybe should do per-bucket locking? + my $lockhandle = IO::File->new; + open($lockhandle, "+>", $lockfile) or die "Opening $lockfile: $!"; + flock($lockhandle, Fcntl::LOCK_SH) or die "Read-locking $lockfile: $!"; + + my @needed_paths = (); + foreach my $output ($build->buildoutputs) { + push @needed_paths, $output->path; + } + + my %narinfos = (); + my %compression_types = (); + foreach my $bucket_config (@matching_configs) { + my $compression_type = + exists $bucket_config->{compression_type} ? $bucket_config->{compression_type} : "bzip2"; + die "Unsupported compression type $compression_type" unless exists $compressors{$compression_type}; + if (exists $compression_types{$compression_type}) { + push @{$compression_types{$compression_type}}, $bucket_config; + } else { + $compression_types{$compression_type} = [ $bucket_config ]; + $narinfos{$compression_type} = []; + } + } + + my $build_id = $build->id; + my $tempdir = File::Temp->newdir("s3-backup-nars-$build_id" . "XXXXX"); + + my %seen = (); + # Upload nars and build narinfos + while (@needed_paths) { + my $path = shift @needed_paths; + next if exists $seen{$path}; + $seen{$path} = undef; + my $hash = substr basename($path), 0, 32; + my ($deriver, $narHash, $time, $narSize, $refs) = queryPathInfo($path, 0); + my $system; + if (defined $deriver and isValidPath($deriver)) { + $system = derivationFromPath($deriver)->{platform}; + } + foreach my $reference (@{$refs}) { + push @needed_paths, $reference; + } + while (my ($compression_type, $configs) = each %compression_types) { + my @incomplete_buckets = (); + # Don't do any work if all the buckets have this path + foreach my $bucket_config (@{$configs}) { + my $bucket = $client->bucket( name => $bucket_config->{name} ); + my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : ""; + push @incomplete_buckets, $bucket_config + unless $bucket->object( key => $prefix . "$hash.narinfo" )->exists; + } + next unless @incomplete_buckets; + my $compressor = $compressors{$compression_type}; + system("$Nix::Config::binDir/nix-store --export $path $compressor > $tempdir/nar") == 0 or die; + my $digest = Digest::SHA->new(256); + $digest->addfile("$tempdir/nar"); + my $file_hash = $digest->hexdigest; + my @stats = stat "$tempdir/nar" or die "Couldn't stat $tempdir/nar"; + my $file_size = $stats[7]; + my $narinfo = ""; + $narinfo .= "StorePath: $path\n"; + $narinfo .= "URL: $hash.nar\n"; + $narinfo .= "Compression: $compression_type\n"; + $narinfo .= "FileHash: sha256:$file_hash\n"; + $narinfo .= "FileSize: $file_size\n"; + $narinfo .= "NarHash: $narHash\n"; + $narinfo .= "NarSize: $narSize\n"; + $narinfo .= "References: " . join(" ", map { basename $_ } @{$refs}) . "\n"; + if (defined $deriver) { + $narinfo .= "Deriver: " . basename $deriver . "\n"; + if (defined $system) { + $narinfo .= "System: $system\n"; + } + } + push @{$narinfos{$compression_type}}, { hash => $hash, info => $narinfo }; + foreach my $bucket_config (@incomplete_buckets) { + my $bucket = $client->bucket( name => $bucket_config->{name} ); + my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : ""; + my $nar_object = $bucket->object( + key => $prefix . "$hash.nar", + content_type => "application/x-nix-archive" + ); + $nar_object->put_filename("$tempdir/nar"); + } + } + } + + # Upload narinfos + while (my ($compression_type, $infos) = each %narinfos) { + foreach my $bucket_config (@{$compression_types{$compression_type}}) { + foreach my $info (@{$infos}) { + my $bucket = $client->bucket( name => $bucket_config->{name} ); + my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : ""; + my $narinfo_object = $bucket->object( + key => $prefix . $info->{hash} . ".narinfo", + content_type => "text/x-nix-narinfo" + ); + $narinfo_object->put($info->{info}) unless $narinfo_object->exists; + } + } + } +} + +1; diff --git a/src/script/Makefile.am b/src/script/Makefile.am index 8cc48484..3994684c 100644 --- a/src/script/Makefile.am +++ b/src/script/Makefile.am @@ -10,6 +10,7 @@ distributable_scripts = \ hydra-queue-runner \ hydra-server \ hydra-update-gc-roots \ + hydra-s3-backup-collect-garbage \ nix-prefetch-git \ nix-prefetch-bzr \ nix-prefetch-hg diff --git a/src/script/hydra-s3-backup-collect-garbage b/src/script/hydra-s3-backup-collect-garbage new file mode 100755 index 00000000..9b7b01af --- /dev/null +++ b/src/script/hydra-s3-backup-collect-garbage @@ -0,0 +1,58 @@ +#! /var/run/current-system/sw/bin/perl -w + +use strict; +use File::Basename; +use Fcntl; +use IO::File; +use Net::Amazon::S3; +use Net::Amazon::S3::Client; +use Nix::Config; +use Nix::Store; +use Hydra::Model::DB; +use Hydra::Helper::Nix; + +my $cfg = getHydraConfig()->{s3backup}; +my @config = defined $cfg ? ref $cfg eq "ARRAY" ? @$cfg : ($cfg) : (); + +exit 0 unless @config; + +my $lockfile = Hydra::Model::DB::getHydraPath . "/.hydra-s3backup.lock"; +my $lockhandle = IO::File->new; +open($lockhandle, ">", $lockfile) or die "Opening $lockfile: $!"; +flock($lockhandle, Fcntl::LOCK_EX) or die "Write-locking $lockfile: $!"; + +my $client = Net::Amazon::S3::Client->new( s3 => Net::Amazon::S3->new( retry => 1 ) ); +my $db = Hydra::Model::DB->new(); + +my $gcRootsDir = getGCRootsDir; +opendir DIR, $gcRootsDir or die; +my @roots = readdir DIR; +closedir DIR; + +my @actual_roots = (); +foreach my $link (@roots) { + next if $link eq "." || $link eq ".."; + push @actual_roots, $Nix::Config::storeDir . "/$link"; +} + +# Don't delete a nix-cache-info file, if present +my %closure = ( "nix-cache-info" => undef ); +foreach my $path (computeFSClosure(0, 0, @actual_roots)) { + my $hash = substr basename($path), 0, 32; + $closure{"$hash.narinfo"} = undef; + $closure{"$hash.nar"} = undef; +} + +foreach my $bucket_config (@config) { + my $bucket = $client->bucket( name => $bucket_config->{name} ); + my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : ""; + + my $cache_stream = $bucket->list({ prefix => $prefix }); + until ($cache_stream->is_done) { + foreach my $object ($cache_stream->items) { + $object->delete unless exists $closure{basename($object->key)}; + } + } +} + +1; diff --git a/tests/Setup.pm b/tests/Setup.pm index 1cbbe750..96aecde6 100644 --- a/tests/Setup.pm +++ b/tests/Setup.pm @@ -61,7 +61,7 @@ sub createJobsetWithOneInput { sub evalSucceeds { my ($jobset) = @_; - my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("../src/script/hydra-evaluator", $jobset->project->name, $jobset->name)); + my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("hydra-evaluator", $jobset->project->name, $jobset->name)); chomp $stdout; chomp $stderr; print STDERR "Evaluation errors for jobset ".$jobset->project->name.":".$jobset->name.": \n".$jobset->errormsg."\n" if $jobset->errormsg; print STDERR "STDOUT: $stdout\n" if $stdout ne ""; @@ -71,7 +71,7 @@ sub evalSucceeds { sub runBuild { my ($build) = @_; - my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("../src/script/hydra-build", $build->id)); + my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("hydra-build", $build->id)); print "STDERR: $stderr" if $stderr ne ""; return !$res; } diff --git a/tests/api-test.nix b/tests/api-test.nix index a798294b..2f45a48f 100644 --- a/tests/api-test.nix +++ b/tests/api-test.nix @@ -1,6 +1,7 @@ let + thisFile = builtins.toFile "default.nix" (builtins.readFile ./default.nix); builder = builtins.toFile "builder.sh" '' - echo -n ${builtins.readFile ./default.nix} > $out + echo ${thisFile} > $out ''; in { job = derivation { diff --git a/tests/s3-backup-test.config b/tests/s3-backup-test.config new file mode 100644 index 00000000..49068ea9 --- /dev/null +++ b/tests/s3-backup-test.config @@ -0,0 +1,4 @@ + + jobs = tests:basic:job + name = hydra + diff --git a/tests/s3-backup-test.pl b/tests/s3-backup-test.pl new file mode 100755 index 00000000..a81d2d22 --- /dev/null +++ b/tests/s3-backup-test.pl @@ -0,0 +1,49 @@ +use strict; +use File::Basename; +use Hydra::Model::DB; +use Hydra::Helper::Nix; +use Nix::Store; +use Cwd; + +my $db = Hydra::Model::DB->new; + +use Test::Simple tests => 6; + +$db->resultset('Users')->create({ username => "root", emailaddress => 'root@invalid.org', password => '' }); + +$db->resultset('Projects')->create({name => "tests", displayname => "", owner => "root"}); +my $project = $db->resultset('Projects')->update_or_create({name => "tests", displayname => "", owner => "root"}); +my $jobset = $project->jobsets->create({name => "basic", nixexprinput => "jobs", nixexprpath => "default.nix", emailoverride => ""}); + +my $jobsetinput; + +$jobsetinput = $jobset->jobsetinputs->create({name => "jobs", type => "path"}); +$jobsetinput->jobsetinputalts->create({altnr => 0, value => getcwd . "/jobs"}); +system("hydra-evaluator " . $jobset->project->name . " " . $jobset->name); + +my $successful_hash; +foreach my $build ($jobset->builds->search({finished => 0})) { + system("hydra-build " . $build->id); + my @outputs = $build->buildoutputs->all; + my $hash = substr basename($outputs[0]->path), 0, 32; + if ($build->job->name eq "job") { + ok(-e "/tmp/s3/hydra/$hash.nar", "The nar of a successful matched build is uploaded"); + ok(-e "/tmp/s3/hydra/$hash.narinfo", "The narinfo of a successful matched build is uploaded"); + $successful_hash = $hash; + } +} + +system("hydra-s3-backup-collect-garbage"); +ok(-e "/tmp/s3/hydra/$successful_hash.nar", "The nar of a build that's a root is not removed by gc"); +ok(-e "/tmp/s3/hydra/$successful_hash.narinfo", "The narinfo of a build that's a root is not removed by gc"); + +my $gcRootsDir = getGCRootsDir; +opendir DIR, $gcRootsDir or die; +while(readdir DIR) { + next if $_ eq "." or $_ eq ".."; + unlink "$gcRootsDir/$_"; +} +closedir DIR; +system("hydra-s3-backup-collect-garbage"); +ok(not -e "/tmp/s3/hydra/$successful_hash.nar", "The nar of a build that's not a root is removed by gc"); +ok(not -e "/tmp/s3/hydra/$successful_hash.narinfo", "The narinfo of a build that's not a root is removed by gc");