From e00030563b092361eac2c4571a3e86453566b28f Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 5 Feb 2020 14:06:21 -0500 Subject: [PATCH 1/7] Jobsets: add a SERIAL, unique, non-null id column A postgresql column which is non-null and unique is treated with the same optimisations as a primary key, so we have no need to try and recreate the `id` as the primary key. No read paths are impacted by this change, and the database will automatically create an ID for each insert. Thus, no code needs to change. --- src/lib/Hydra/Schema/Jobsets.pm | 36 +++++++++++++++++++++++++++++++-- src/sql/hydra.sql | 4 +++- src/sql/upgrade-59.sql | 4 ++++ 3 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 src/sql/upgrade-59.sql diff --git a/src/lib/Hydra/Schema/Jobsets.pm b/src/lib/Hydra/Schema/Jobsets.pm index 9e9d4773..2615ef87 100644 --- a/src/lib/Hydra/Schema/Jobsets.pm +++ b/src/lib/Hydra/Schema/Jobsets.pm @@ -40,6 +40,13 @@ __PACKAGE__->table("jobsets"); data_type: 'text' is_nullable: 0 +=head2 id + + data_type: 'integer' + is_auto_increment: 1 + is_nullable: 0 + sequence: 'jobsets_id_seq' + =head2 project data_type: 'text' @@ -153,6 +160,13 @@ __PACKAGE__->table("jobsets"); __PACKAGE__->add_columns( "name", { data_type => "text", is_nullable => 0 }, + "id", + { + data_type => "integer", + is_auto_increment => 1, + is_nullable => 0, + sequence => "jobsets_id_seq", + }, "project", { data_type => "text", is_foreign_key => 1, is_nullable => 0 }, "description", @@ -209,6 +223,20 @@ __PACKAGE__->add_columns( __PACKAGE__->set_primary_key("project", "name"); +=head1 UNIQUE CONSTRAINTS + +=head2 C + +=over 4 + +=item * L + +=back + +=cut + +__PACKAGE__->add_unique_constraint("jobsets_id_unique", ["id"]); + =head1 RELATIONS =head2 buildmetrics @@ -350,8 +378,12 @@ __PACKAGE__->has_many( ); -# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:21:11 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:FVP1/AWjdKTlY6djrG592A +# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:25:17 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:1CMOaLf9fYRdJBlYiivmxA + +__PACKAGE__->add_column( + "+id" => { retrieve_on_insert => 1 } +); my %hint = ( columns => [ diff --git a/src/sql/hydra.sql b/src/sql/hydra.sql index 33ecab4d..6b22121c 100644 --- a/src/sql/hydra.sql +++ b/src/sql/hydra.sql @@ -52,6 +52,7 @@ create table ProjectMembers ( -- describing build jobs. create table Jobsets ( name text not null, + id serial not null, project text not null, description text, nixExprInput text, -- name of the jobsetInput containing the Nix or Guix expression @@ -76,7 +77,8 @@ create table Jobsets ( check ((type = 0) = (nixExprInput is not null and nixExprPath is not null)), check ((type = 1) = (flake is not null)), primary key (project, name), - foreign key (project) references Projects(name) on delete cascade on update cascade + foreign key (project) references Projects(name) on delete cascade on update cascade, + constraint Jobsets_id_unique UNIQUE(id) #ifdef SQLITE , foreign key (project, name, nixExprInput) references JobsetInputs(project, jobset, name) diff --git a/src/sql/upgrade-59.sql b/src/sql/upgrade-59.sql new file mode 100644 index 00000000..d0a28971 --- /dev/null +++ b/src/sql/upgrade-59.sql @@ -0,0 +1,4 @@ +-- will automatically add unique IDs to Jobsets. +ALTER TABLE Jobsets + ADD COLUMN id SERIAL NOT NULL, + ADD CONSTRAINT Jobsets_id_unique UNIQUE (id); From efa1f1d4fbdc468c61f7403e8709d0295e052c7f Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 5 Feb 2020 17:10:50 -0500 Subject: [PATCH 2/7] Jobs: add a nullable jobset_id foreign key to Jobsets. Also, adds an explicitly named "jobs" accessor to the Jobsets Schema object, which uses the project/jobset name. --- src/lib/Hydra/Schema/Jobs.pm | 32 +++++++++++++++++++++++-- src/lib/Hydra/Schema/Jobsets.pm | 41 +++++++++++++++++++++++++++++---- src/sql/hydra.sql | 2 ++ src/sql/upgrade-60.sql | 10 ++++++++ 4 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 src/sql/upgrade-60.sql diff --git a/src/lib/Hydra/Schema/Jobs.pm b/src/lib/Hydra/Schema/Jobs.pm index bdecef3a..d4126e32 100644 --- a/src/lib/Hydra/Schema/Jobs.pm +++ b/src/lib/Hydra/Schema/Jobs.pm @@ -47,6 +47,12 @@ __PACKAGE__->table("jobs"); is_foreign_key: 1 is_nullable: 0 +=head2 jobset_id + + data_type: 'integer' + is_foreign_key: 1 + is_nullable: 1 + =head2 name data_type: 'text' @@ -59,6 +65,8 @@ __PACKAGE__->add_columns( { data_type => "text", is_foreign_key => 1, is_nullable => 0 }, "jobset", { data_type => "text", is_foreign_key => 1, is_nullable => 0 }, + "jobset_id", + { data_type => "integer", is_foreign_key => 1, is_nullable => 1 }, "name", { data_type => "text", is_nullable => 0 }, ); @@ -130,6 +138,26 @@ Related object: L __PACKAGE__->belongs_to( "jobset", "Hydra::Schema::Jobsets", + { id => "jobset_id" }, + { + is_deferrable => 0, + join_type => "LEFT", + on_delete => "CASCADE", + on_update => "NO ACTION", + }, +); + +=head2 jobset_project_jobset + +Type: belongs_to + +Related object: L + +=cut + +__PACKAGE__->belongs_to( + "jobset_project_jobset", + "Hydra::Schema::Jobsets", { name => "jobset", project => "project" }, { is_deferrable => 0, on_delete => "CASCADE", on_update => "CASCADE" }, ); @@ -169,7 +197,7 @@ __PACKAGE__->has_many( ); -# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:22:36 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:sYa6dZNK+stMAnTH0Tmn8A +# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:30:58 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:dFusVjxb423gIEoadAw9sw 1; diff --git a/src/lib/Hydra/Schema/Jobsets.pm b/src/lib/Hydra/Schema/Jobsets.pm index 2615ef87..60efa962 100644 --- a/src/lib/Hydra/Schema/Jobsets.pm +++ b/src/lib/Hydra/Schema/Jobsets.pm @@ -275,7 +275,7 @@ __PACKAGE__->has_many( undef, ); -=head2 jobs +=head2 jobs_jobset_ids Type: has_many @@ -284,7 +284,22 @@ Related object: L =cut __PACKAGE__->has_many( - "jobs", + "jobs_jobset_ids", + "Hydra::Schema::Jobs", + { "foreign.jobset_id" => "self.id" }, + undef, +); + +=head2 jobs_project_jobsets + +Type: has_many + +Related object: L + +=cut + +__PACKAGE__->has_many( + "jobs_project_jobsets", "Hydra::Schema::Jobs", { "foreign.jobset" => "self.name", @@ -378,8 +393,26 @@ __PACKAGE__->has_many( ); -# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:25:17 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:1CMOaLf9fYRdJBlYiivmxA +# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:26:15 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:DH1jX0smo2rFvyr4V+qJcw + +=head2 jobs + +Type: has_many + +Related object: L + +=cut + +__PACKAGE__->has_many( + "jobs", + "Hydra::Schema::Jobs", + { + "foreign.jobset" => "self.name", + "foreign.project" => "self.project", + }, + undef, +); __PACKAGE__->add_column( "+id" => { retrieve_on_insert => 1 } diff --git a/src/sql/hydra.sql b/src/sql/hydra.sql index 6b22121c..77b5822a 100644 --- a/src/sql/hydra.sql +++ b/src/sql/hydra.sql @@ -146,9 +146,11 @@ create table JobsetInputAlts ( create table Jobs ( project text not null, jobset text not null, + jobset_id integer null, name text not null, primary key (project, jobset, name), + foreign key (jobset_id) references Jobsets(id) on delete cascade, foreign key (project) references Projects(name) on delete cascade on update cascade, foreign key (project, jobset) references Jobsets(project, name) on delete cascade on update cascade ); diff --git a/src/sql/upgrade-60.sql b/src/sql/upgrade-60.sql new file mode 100644 index 00000000..3cdb00c3 --- /dev/null +++ b/src/sql/upgrade-60.sql @@ -0,0 +1,10 @@ +-- Add the jobset_id columns to the Jobs table. This will go +-- quickly, since the field is nullable. Note this is just part one of +-- this migration. Future steps involve a piecemeal backfilling, and +-- then making the column non-null. + +ALTER TABLE Jobs + ADD COLUMN jobset_id integer NULL, + ADD FOREIGN KEY (jobset_id) + REFERENCES Jobsets(id) + ON DELETE CASCADE; From 624f1d8d2d3641694acef83367a5c557cfe9bfde Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 5 Feb 2020 17:30:11 -0500 Subject: [PATCH 3/7] Jobs: populate Jobs.jobset_id field when writing from hydra-eval-jobset --- src/script/hydra-eval-jobset | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/script/hydra-eval-jobset b/src/script/hydra-eval-jobset index fcc60a6c..5e68f124 100755 --- a/src/script/hydra-eval-jobset +++ b/src/script/hydra-eval-jobset @@ -417,7 +417,12 @@ sub checkBuild { my $build; txn_do($db, sub { - my $job = $jobset->jobs->update_or_create({ name => $jobName }); + my $job = $jobset->jobs->update_or_create({ + name => $jobName, + jobset_id => $jobset->id, + project => $jobset->project, + jobset => $jobset->name, + }); # Don't add a build that has already been scheduled for this # job, or has been built but is still a "current" build for From ddf00fa6278532c9cce6bfeceeb1ee3d9c3e45cd Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 5 Feb 2020 15:11:56 -0500 Subject: [PATCH 4/7] Builds: add a nullable jobset_id foreign key to Jobsets. Also, adds an explicitly named "builds" accessor to the Jobsets Schema object, which uses the project/jobset name. --- src/lib/Hydra/Schema/Builds.pm | 32 +++++++++++++++++++++++-- src/lib/Hydra/Schema/Jobsets.pm | 42 +++++++++++++++++++++++++++++---- src/sql/hydra.sql | 2 ++ src/sql/upgrade-61.sql | 10 ++++++++ 4 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 src/sql/upgrade-61.sql diff --git a/src/lib/Hydra/Schema/Builds.pm b/src/lib/Hydra/Schema/Builds.pm index 013fd09d..53454867 100644 --- a/src/lib/Hydra/Schema/Builds.pm +++ b/src/lib/Hydra/Schema/Builds.pm @@ -64,6 +64,12 @@ __PACKAGE__->table("builds"); is_foreign_key: 1 is_nullable: 0 +=head2 jobset_id + + data_type: 'integer' + is_foreign_key: 1 + is_nullable: 1 + =head2 job data_type: 'text' @@ -215,6 +221,8 @@ __PACKAGE__->add_columns( { data_type => "text", is_foreign_key => 1, is_nullable => 0 }, "jobset", { data_type => "text", is_foreign_key => 1, is_nullable => 0 }, + "jobset_id", + { data_type => "integer", is_foreign_key => 1, is_nullable => 1 }, "job", { data_type => "text", is_foreign_key => 1, is_nullable => 0 }, "nixname", @@ -457,6 +465,26 @@ Related object: L __PACKAGE__->belongs_to( "jobset", "Hydra::Schema::Jobsets", + { id => "jobset_id" }, + { + is_deferrable => 0, + join_type => "LEFT", + on_delete => "CASCADE", + on_update => "NO ACTION", + }, +); + +=head2 jobset_project_jobset + +Type: belongs_to + +Related object: L + +=cut + +__PACKAGE__->belongs_to( + "jobset_project_jobset", + "Hydra::Schema::Jobsets", { name => "jobset", project => "project" }, { is_deferrable => 0, on_delete => "NO ACTION", on_update => "CASCADE" }, ); @@ -550,8 +578,8 @@ __PACKAGE__->many_to_many( ); -# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:22:36 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:H3hs+zEywsUmwTWKfSE8wQ +# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:32:28 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:RvrINOAowDcde8Nd9VD6rQ __PACKAGE__->has_many( "dependents", diff --git a/src/lib/Hydra/Schema/Jobsets.pm b/src/lib/Hydra/Schema/Jobsets.pm index 60efa962..fbbb253c 100644 --- a/src/lib/Hydra/Schema/Jobsets.pm +++ b/src/lib/Hydra/Schema/Jobsets.pm @@ -257,7 +257,7 @@ __PACKAGE__->has_many( undef, ); -=head2 builds +=head2 builds_jobset_ids Type: has_many @@ -266,7 +266,22 @@ Related object: L =cut __PACKAGE__->has_many( - "builds", + "builds_jobset_ids", + "Hydra::Schema::Builds", + { "foreign.jobset_id" => "self.id" }, + undef, +); + +=head2 builds_project_jobsets + +Type: has_many + +Related object: L + +=cut + +__PACKAGE__->has_many( + "builds_project_jobsets", "Hydra::Schema::Builds", { "foreign.jobset" => "self.name", @@ -393,8 +408,27 @@ __PACKAGE__->has_many( ); -# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:26:15 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:DH1jX0smo2rFvyr4V+qJcw +# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:32:17 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:P8+t7rgpOqkGwRdM2b+3Bw + + +=head2 builds + +Type: has_many + +Related object: L + +=cut + +__PACKAGE__->has_many( + "builds", + "Hydra::Schema::Builds", + { + "foreign.jobset" => "self.name", + "foreign.project" => "self.project", + }, + undef, +); =head2 jobs diff --git a/src/sql/hydra.sql b/src/sql/hydra.sql index 77b5822a..8349e3be 100644 --- a/src/sql/hydra.sql +++ b/src/sql/hydra.sql @@ -170,6 +170,7 @@ create table Builds ( -- Info about the inputs. project text not null, jobset text not null, + jobset_id integer null, job text not null, -- Info about the build result. @@ -236,6 +237,7 @@ create table Builds ( check (finished = 0 or (stoptime is not null and stoptime != 0)), check (finished = 0 or (starttime is not null and starttime != 0)), + foreign key (jobset_id) references Jobsets(id) on delete cascade, foreign key (project) references Projects(name) on update cascade, foreign key (project, jobset) references Jobsets(project, name) on update cascade, foreign key (project, jobset, job) references Jobs(project, jobset, name) on update cascade diff --git a/src/sql/upgrade-61.sql b/src/sql/upgrade-61.sql new file mode 100644 index 00000000..bfa6b8b3 --- /dev/null +++ b/src/sql/upgrade-61.sql @@ -0,0 +1,10 @@ +-- Add the jobset_id columns to the Builds table. This will go +-- quickly, since the field is nullable. Note this is just part one of +-- this migration. Future steps involve a piecemeal backfilling, and +-- then making the column non-null. + +ALTER TABLE Builds + ADD COLUMN jobset_id integer NULL, + ADD FOREIGN KEY (jobset_id) + REFERENCES Jobsets(id) + ON DELETE CASCADE; From f3a561aecd4469e456bd214f4f3f6648ce46d916 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 5 Feb 2020 16:24:43 -0500 Subject: [PATCH 5/7] Builds: populate Builds.jobset_id in hydra-eval-jobset --- src/script/hydra-eval-jobset | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/script/hydra-eval-jobset b/src/script/hydra-eval-jobset index 5e68f124..d0cc3e8a 100755 --- a/src/script/hydra-eval-jobset +++ b/src/script/hydra-eval-jobset @@ -469,6 +469,9 @@ sub checkBuild { # Add the build to the database. $build = $job->builds->create( { timestamp => $time + , project => $jobset->project + , jobset => $jobset->name + , jobset_id => $jobset->id , description => null($buildInfo->{description}) , license => null($buildInfo->{license}) , homepage => null($buildInfo->{homepage}) From f69260118bf7e78a4b7830ea58e2f0ba8c85d6b7 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 5 Feb 2020 21:12:49 -0500 Subject: [PATCH 6/7] hydra-backfill-ids: create to add jobset_id values to Builds and Jobs Vacuum every 10 iterations, update 10k at a time. --- src/script/Makefile.am | 1 + src/script/hydra-backfill-ids | 164 ++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100755 src/script/hydra-backfill-ids diff --git a/src/script/Makefile.am b/src/script/Makefile.am index 5852bc85..9deb6f29 100644 --- a/src/script/Makefile.am +++ b/src/script/Makefile.am @@ -3,6 +3,7 @@ EXTRA_DIST = \ hydra-eval-guile-jobs.in distributable_scripts = \ + hydra-backfill-ids \ hydra-init \ hydra-eval-jobset \ hydra-server \ diff --git a/src/script/hydra-backfill-ids b/src/script/hydra-backfill-ids new file mode 100755 index 00000000..d9fc362a --- /dev/null +++ b/src/script/hydra-backfill-ids @@ -0,0 +1,164 @@ +#! /usr/bin/env perl + +use strict; +use utf8; +use Hydra::Model::DB; + +STDOUT->autoflush(); +STDERR->autoflush(1); +binmode STDERR, ":encoding(utf8)"; + +my $db = Hydra::Model::DB->new(); +my $vacuum = $db->storage->dbh->prepare("VACUUM;"); + +my $dryRun = defined $ENV{'HYDRA_DRY_RUN'}; + +my $batchSize = 10000; +my $iterationsPerVacuum = 500; + +sub backfillJobsJobsetId { + my ($skipLocked) = @_; + my $logPrefix; + + if ($skipLocked) { + $logPrefix = "(pass 1/2)"; + } else { + $logPrefix = "(pass 2/2)"; + } + + print STDERR "$logPrefix Backfilling Jobs records where jobset_id is NULL...\n"; + + my $totalToGoSth = $db->storage->dbh->prepare(<execute(); + my ($totalToGo) = $totalToGoSth->fetchrow_array; + + my $skipLockedStmt = $skipLocked ? "FOR UPDATE SKIP LOCKED" : ""; + my $update10kJobs = $db->storage->dbh->prepare(<execute($batchSize); + print STDERR "$logPrefix (batch #$iteration; $totalToGo remaining) Jobs.jobset_id: affected $affected rows...\n"; + $totalToGo -= $affected; + + if ($iteration % $iterationsPerVacuum == 0) { + print STDERR "$logPrefix (batch #$iteration) Vacuuming...\n"; + $vacuum->execute(); + } + } while ($affected > 0); + + + if ($skipLocked) { + backfillJobsJobsetId(0); + } +} + + +sub backfillBuildsJobsetId { + my ($skipLocked) = @_; + my $logPrefix; + + if ($skipLocked) { + $logPrefix = "(pass 1/2)"; + print STDERR "$logPrefix Backfilling unlocked Builds records where jobset_id is NULL...\n"; + } else { + $logPrefix = "(pass 2/2)"; + print STDERR "$logPrefix Backfilling all Builds records where jobset_id is NULL...\n"; + } + + my $skipLockedStmt = $skipLocked ? "FOR UPDATE SKIP LOCKED" : ""; + my $update10kBuilds = $db->storage->dbh->prepare(<<"QUERY"); +WITH updateprogress AS ( + UPDATE builds + SET jobset_id = ( + SELECT jobsets.id + FROM jobsets + WHERE jobsets.name = builds.jobset + AND jobsets.project = builds.project + ) + WHERE builds.id in ( + SELECT buildprime.id + FROM builds buildprime + WHERE buildprime.jobset_id IS NULL + AND buildprime.id >= ? + ORDER BY buildprime.id + $skipLockedStmt + LIMIT ? + ) + RETURNING id +) +SELECT + count(*) AS affected, + max(updateprogress.id) AS highest_id +FROM updateprogress; + +QUERY + + my $lowestNullIdSth = $db->storage->dbh->prepare(<execute(); + my ($highestId) = $lowestNullIdSth->fetchrow_array; + + my $totalToGoSth = $db->storage->dbh->prepare(<= ? +QUERY + $totalToGoSth->execute($highestId); + my ($totalToGo) = $totalToGoSth->fetchrow_array; + + print STDERR "$logPrefix Total Builds records without a jobset_id: $totalToGo, starting at $highestId\n"; + + my $iteration = 0; + my $affected; + do { + my $previousHighId = $highestId; + $iteration++; + $update10kBuilds->execute($highestId, $batchSize); + ($affected, $highestId) = $update10kBuilds->fetchrow_array; + + print STDERR "$logPrefix (batch #$iteration; $totalToGo remaining) Builds.jobset_id: affected $affected rows; max ID: $previousHighId -> $highestId\n"; + $totalToGo -= $affected; + + if ($iteration % $iterationsPerVacuum == 0) { + print STDERR "$logPrefix (batch #$iteration) Vacuuming...\n"; + $vacuum->execute(); + } + } while ($affected > 0); + + if ($skipLocked) { + backfillBuildsJobsetId(0); + } +} + +die "syntax: $0\n" unless @ARGV == 0; + +print STDERR "Beginning with a VACUUM\n"; +$vacuum->execute(); + +backfillJobsJobsetId(1); +backfillBuildsJobsetId(1); + +print STDERR "Ending with a VACUUM\n"; +$vacuum->execute(); From c4cc72f94449564b56ad9f71d530ee5917af3a97 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 6 Feb 2020 14:26:38 -0500 Subject: [PATCH 7/7] hydra-init: Warn about the schema version migration --- src/script/hydra-init | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/script/hydra-init b/src/script/hydra-init index d813d9fe..1a5f2439 100755 --- a/src/script/hydra-init +++ b/src/script/hydra-init @@ -44,6 +44,17 @@ my @versions = $db->resultset('SchemaVersion')->all; die "couldn't get Hydra schema version!" if scalar @versions != 1; my $schemaVersion = $versions[0]->version; +if ($schemaVersion <= 60) { + print STDERR <