Merge pull request #711 from grahamc/jobset-id-pgsql-part-1

jobset_id, #710 Part 1
2020-02-10 11:43:38 -05:00 · 2020-02-10 11:43:38 -05:00 · add4f610ce
parent 4a05bb36ac c4cc72f944
commit add4f610ce
11 changed files with 381 additions and 12 deletions
--- a/src/lib/Hydra/Schema/Builds.pm
+++ b/src/lib/Hydra/Schema/Builds.pm
@ -64,6 +64,12 @@ __PACKAGE__->table("builds");
  is_foreign_key: 1
  is_nullable: 0
 =head2 jobset_id
  data_type: 'integer'
  is_foreign_key: 1
  is_nullable: 1
 =head2 job
  data_type: 'text'
@ -215,6 +221,8 @@ __PACKAGE__->add_columns(
  { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
  "jobset",
  { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
  "jobset_id",
  { data_type => "integer", is_foreign_key => 1, is_nullable => 1 },
  "job",
  { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
  "nixname",
@ -457,6 +465,26 @@ Related object: L<Hydra::Schema::Jobsets>
 __PACKAGE__->belongs_to(
  "jobset",
  "Hydra::Schema::Jobsets",
  { id => "jobset_id" },
  {
    is_deferrable => 0,
    join_type     => "LEFT",
    on_delete     => "CASCADE",
    on_update     => "NO ACTION",
  },
 );
 =head2 jobset_project_jobset
 Type: belongs_to
 Related object: L<Hydra::Schema::Jobsets>
 =cut
 __PACKAGE__->belongs_to(
  "jobset_project_jobset",
  "Hydra::Schema::Jobsets",
  { name => "jobset", project => "project" },
  { is_deferrable => 0, on_delete => "NO ACTION", on_update => "CASCADE" },
 );
@ -550,8 +578,8 @@ __PACKAGE__->many_to_many(
 );
-# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:22:36
+# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:32:28
-# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:H3hs+zEywsUmwTWKfSE8wQ
+# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:RvrINOAowDcde8Nd9VD6rQ
 __PACKAGE__->has_many(
  "dependents",
--- a/src/lib/Hydra/Schema/Jobs.pm
+++ b/src/lib/Hydra/Schema/Jobs.pm
@ -47,6 +47,12 @@ __PACKAGE__->table("jobs");
  is_foreign_key: 1
  is_nullable: 0
 =head2 jobset_id
  data_type: 'integer'
  is_foreign_key: 1
  is_nullable: 1
 =head2 name
  data_type: 'text'
@ -59,6 +65,8 @@ __PACKAGE__->add_columns(
  { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
  "jobset",
  { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
  "jobset_id",
  { data_type => "integer", is_foreign_key => 1, is_nullable => 1 },
  "name",
  { data_type => "text", is_nullable => 0 },
 );
@ -130,6 +138,26 @@ Related object: L<Hydra::Schema::Jobsets>
 __PACKAGE__->belongs_to(
  "jobset",
  "Hydra::Schema::Jobsets",
  { id => "jobset_id" },
  {
    is_deferrable => 0,
    join_type     => "LEFT",
    on_delete     => "CASCADE",
    on_update     => "NO ACTION",
  },
 );
 =head2 jobset_project_jobset
 Type: belongs_to
 Related object: L<Hydra::Schema::Jobsets>
 =cut
 __PACKAGE__->belongs_to(
  "jobset_project_jobset",
  "Hydra::Schema::Jobsets",
  { name => "jobset", project => "project" },
  { is_deferrable => 0, on_delete => "CASCADE", on_update => "CASCADE" },
 );
@ -169,7 +197,7 @@ __PACKAGE__->has_many(
 );
-# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:22:36
+# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:30:58
-# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:sYa6dZNK+stMAnTH0Tmn8A
+# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:dFusVjxb423gIEoadAw9sw
 1;
--- a/src/lib/Hydra/Schema/Jobsets.pm
+++ b/src/lib/Hydra/Schema/Jobsets.pm
@ -40,6 +40,13 @@ __PACKAGE__->table("jobsets");
  data_type: 'text'
  is_nullable: 0
 =head2 id
  data_type: 'integer'
  is_auto_increment: 1
  is_nullable: 0
  sequence: 'jobsets_id_seq'
 =head2 project
  data_type: 'text'
@ -153,6 +160,13 @@ __PACKAGE__->table("jobsets");
 __PACKAGE__->add_columns(
  "name",
  { data_type => "text", is_nullable => 0 },
  "id",
  {
    data_type         => "integer",
    is_auto_increment => 1,
    is_nullable       => 0,
    sequence          => "jobsets_id_seq",
  },
  "project",
  { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
  "description",
@ -209,6 +223,20 @@ __PACKAGE__->add_columns(
 __PACKAGE__->set_primary_key("project", "name");
 =head1 UNIQUE CONSTRAINTS
 =head2 C<jobsets_id_unique>
 =over 4
 =item * L</id>
 =back
 =cut
 __PACKAGE__->add_unique_constraint("jobsets_id_unique", ["id"]);
 =head1 RELATIONS
 =head2 buildmetrics
@ -229,7 +257,7 @@ __PACKAGE__->has_many(
  undef,
 );
-=head2 builds
+=head2 builds_jobset_ids
 Type: has_many
@ -238,7 +266,22 @@ Related object: L<Hydra::Schema::Builds>
 =cut
 __PACKAGE__->has_many(
-  "builds",
+  "builds_jobset_ids",
  "Hydra::Schema::Builds",
  { "foreign.jobset_id" => "self.id" },
  undef,
 );
 =head2 builds_project_jobsets
 Type: has_many
 Related object: L<Hydra::Schema::Builds>
 =cut
 __PACKAGE__->has_many(
  "builds_project_jobsets",
  "Hydra::Schema::Builds",
  {
    "foreign.jobset"  => "self.name",
@ -247,7 +290,7 @@ __PACKAGE__->has_many(
  undef,
 );
-=head2 jobs
+=head2 jobs_jobset_ids
 Type: has_many
@ -256,7 +299,22 @@ Related object: L<Hydra::Schema::Jobs>
 =cut
 __PACKAGE__->has_many(
-  "jobs",
+  "jobs_jobset_ids",
  "Hydra::Schema::Jobs",
  { "foreign.jobset_id" => "self.id" },
  undef,
 );
 =head2 jobs_project_jobsets
 Type: has_many
 Related object: L<Hydra::Schema::Jobs>
 =cut
 __PACKAGE__->has_many(
  "jobs_project_jobsets",
  "Hydra::Schema::Jobs",
  {
    "foreign.jobset"  => "self.name",
@ -350,8 +408,49 @@ __PACKAGE__->has_many(
 );
-# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:21:11
+# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:32:17
-# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:FVP1/AWjdKTlY6djrG592A
+# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:P8+t7rgpOqkGwRdM2b+3Bw
 =head2 builds
 Type: has_many
 Related object: L<Hydra::Schema::Builds>
 =cut
 __PACKAGE__->has_many(
  "builds",
  "Hydra::Schema::Builds",
  {
    "foreign.jobset"  => "self.name",
    "foreign.project" => "self.project",
  },
  undef,
 );
 =head2 jobs
 Type: has_many
 Related object: L<Hydra::Schema::Jobs>
 =cut
 __PACKAGE__->has_many(
  "jobs",
  "Hydra::Schema::Jobs",
  {
    "foreign.jobset"  => "self.name",
    "foreign.project" => "self.project",
  },
  undef,
 );
 __PACKAGE__->add_column(
    "+id" => { retrieve_on_insert => 1 }
 );
 my %hint = (
    columns => [
--- a/src/script/Makefile.am
+++ b/src/script/Makefile.am
@ -3,6 +3,7 @@ EXTRA_DIST =					\
  hydra-eval-guile-jobs.in
 distributable_scripts =				\
  hydra-backfill-ids				\
  hydra-init					\
  hydra-eval-jobset				\
  hydra-server					\
--- a/src/script/hydra-backfill-ids
+++ b/src/script/hydra-backfill-ids
@ -0,0 +1,164 @@
 #! /usr/bin/env perl
 use strict;
 use utf8;
 use Hydra::Model::DB;
 STDOUT->autoflush();
 STDERR->autoflush(1);
 binmode STDERR, ":encoding(utf8)";
 my $db = Hydra::Model::DB->new();
 my $vacuum = $db->storage->dbh->prepare("VACUUM;");
 my $dryRun = defined $ENV{'HYDRA_DRY_RUN'};
 my $batchSize = 10000;
 my $iterationsPerVacuum = 500;
 sub backfillJobsJobsetId {
    my ($skipLocked) = @_;
    my $logPrefix;
    if ($skipLocked) {
        $logPrefix = "(pass 1/2)";
    } else {
        $logPrefix = "(pass 2/2)";
    }
    print STDERR "$logPrefix Backfilling Jobs records where jobset_id is NULL...\n";
    my $totalToGoSth = $db->storage->dbh->prepare(<<QUERY);
 SELECT COUNT(*) FROM jobs WHERE jobset_id IS NULL
 QUERY
    $totalToGoSth->execute();
    my ($totalToGo) = $totalToGoSth->fetchrow_array;
    my $skipLockedStmt = $skipLocked ? "FOR UPDATE SKIP LOCKED" : "";
    my $update10kJobs = $db->storage->dbh->prepare(<<QUERY);
 UPDATE jobs
 SET jobset_id = (
  SELECT jobsets.id
  FROM jobsets
  WHERE jobsets.name = jobs.jobset
    AND jobsets.project = jobs.project
 )
 WHERE (jobs.project, jobs.jobset, jobs.name) in (
  SELECT jobsprime.project, jobsprime.jobset, jobsprime.name
  FROM jobs jobsprime
  WHERE jobsprime.jobset_id IS NULL
  $skipLockedStmt
  LIMIT ?
 );
 QUERY
    print STDERR "$logPrefix Total Jobs records without a jobset_id: $totalToGo\n";
    my $iteration = 0;
    my $affected;
    do {
        $iteration++;
        $affected = $update10kJobs->execute($batchSize);
        print STDERR "$logPrefix (batch #$iteration; $totalToGo remaining) Jobs.jobset_id: affected $affected rows...\n";
        $totalToGo -= $affected;
        if ($iteration % $iterationsPerVacuum == 0) {
            print STDERR "$logPrefix (batch #$iteration) Vacuuming...\n";
            $vacuum->execute();
        }
    } while ($affected > 0);
    if ($skipLocked) {
        backfillJobsJobsetId(0);
    }
 }
 sub backfillBuildsJobsetId {
    my ($skipLocked) = @_;
    my $logPrefix;
    if ($skipLocked) {
        $logPrefix = "(pass 1/2)";
        print STDERR "$logPrefix Backfilling unlocked Builds records where jobset_id is NULL...\n";
    } else {
        $logPrefix = "(pass 2/2)";
        print STDERR "$logPrefix Backfilling all Builds records where jobset_id is NULL...\n";
    }
    my $skipLockedStmt = $skipLocked ? "FOR UPDATE SKIP LOCKED" : "";
    my $update10kBuilds = $db->storage->dbh->prepare(<<"QUERY");
 WITH updateprogress AS (
  UPDATE builds
  SET jobset_id = (
    SELECT jobsets.id
    FROM jobsets
    WHERE jobsets.name = builds.jobset
      AND jobsets.project = builds.project
  )
  WHERE builds.id in (
    SELECT buildprime.id
    FROM builds buildprime
    WHERE buildprime.jobset_id IS NULL
      AND buildprime.id >= ?
    ORDER BY buildprime.id
    $skipLockedStmt
    LIMIT ?
  )
  RETURNING id
 )
 SELECT
  count(*) AS affected,
  max(updateprogress.id) AS highest_id
 FROM updateprogress;
 QUERY
    my $lowestNullIdSth = $db->storage->dbh->prepare(<<QUERY);
 SELECT id FROM builds WHERE jobset_id IS NULL ORDER BY id LIMIT 1
 QUERY
    $lowestNullIdSth->execute();
    my ($highestId) = $lowestNullIdSth->fetchrow_array;
    my $totalToGoSth = $db->storage->dbh->prepare(<<QUERY);
 SELECT COUNT(*) FROM builds WHERE jobset_id IS NULL AND id >= ?
 QUERY
    $totalToGoSth->execute($highestId);
    my ($totalToGo) = $totalToGoSth->fetchrow_array;
    print STDERR "$logPrefix Total Builds records without a jobset_id: $totalToGo, starting at $highestId\n";
    my $iteration = 0;
    my $affected;
    do {
        my $previousHighId = $highestId;
        $iteration++;
        $update10kBuilds->execute($highestId, $batchSize);
        ($affected, $highestId) = $update10kBuilds->fetchrow_array;
        print STDERR "$logPrefix (batch #$iteration; $totalToGo remaining) Builds.jobset_id: affected $affected rows; max ID: $previousHighId -> $highestId\n";
        $totalToGo -= $affected;
        if ($iteration % $iterationsPerVacuum == 0) {
            print STDERR "$logPrefix (batch #$iteration) Vacuuming...\n";
            $vacuum->execute();
        }
    } while ($affected > 0);
    if ($skipLocked) {
        backfillBuildsJobsetId(0);
    }
 }
 die "syntax: $0\n" unless @ARGV == 0;
 print STDERR "Beginning with a VACUUM\n";
 $vacuum->execute();
 backfillJobsJobsetId(1);
 backfillBuildsJobsetId(1);
 print STDERR "Ending with a VACUUM\n";
 $vacuum->execute();
--- a/src/script/hydra-eval-jobset
+++ b/src/script/hydra-eval-jobset
@ -417,7 +417,12 @@ sub checkBuild {
    my $build;
    txn_do($db, sub {
-        my $job = $jobset->jobs->update_or_create({ name => $jobName });
+        my $job = $jobset->jobs->update_or_create({
            name => $jobName,
            jobset_id => $jobset->id,
            project => $jobset->project,
            jobset => $jobset->name,
        });
        # Don't add a build that has already been scheduled for this
        # job, or has been built but is still a "current" build for
@ -464,6 +469,9 @@ sub checkBuild {
        # Add the build to the database.
        $build = $job->builds->create(
            { timestamp => $time
            , project => $jobset->project
            , jobset => $jobset->name
            , jobset_id => $jobset->id
            , description => null($buildInfo->{description})
            , license => null($buildInfo->{license})
            , homepage => null($buildInfo->{homepage})
--- a/src/script/hydra-init
+++ b/src/script/hydra-init
@ -44,6 +44,17 @@ my @versions = $db->resultset('SchemaVersion')->all;
 die "couldn't get Hydra schema version!" if scalar @versions != 1;
 my $schemaVersion = $versions[0]->version;
 if ($schemaVersion <= 60) {
    print STDERR <<QUOTE;
 WARNING: Schema version 62 and 63 make nullable jobset_id fields on
         Builds and Jobs non-nullable. On big Hydra servers, this
         migration will take many hours. Because of that, the
         migration is not automatic, and must be performed manually.
         To backfill these IDs, run: hydra-fill-ids
 QUOTE
 }
 for (my $n = $schemaVersion; $n < $maxSchemaVersion; $n++) {
    my $m = $n + 1;
    print STDERR "upgrading Hydra schema from version $n to $m\n";
--- a/src/sql/hydra.sql
+++ b/src/sql/hydra.sql
@ -52,6 +52,7 @@ create table ProjectMembers (
 -- describing build jobs.
 create table Jobsets (
    name          text not null,
    id            serial not null,
    project       text not null,
    description   text,
    nixExprInput  text, -- name of the jobsetInput containing the Nix or Guix expression
@ -76,7 +77,8 @@ create table Jobsets (
    check ((type = 0) = (nixExprInput is not null and nixExprPath is not null)),
    check ((type = 1) = (flake is not null)),
    primary key   (project, name),
-    foreign key   (project) references Projects(name) on delete cascade on update cascade
+    foreign key   (project) references Projects(name) on delete cascade on update cascade,
    constraint    Jobsets_id_unique UNIQUE(id)
 #ifdef SQLITE
    ,
    foreign key   (project, name, nixExprInput) references JobsetInputs(project, jobset, name)
@ -144,9 +146,11 @@ create table JobsetInputAlts (
 create table Jobs (
    project       text not null,
    jobset        text not null,
    jobset_id     integer null,
    name          text not null,
    primary key   (project, jobset, name),
    foreign key   (jobset_id) references Jobsets(id) on delete cascade,
    foreign key   (project) references Projects(name) on delete cascade on update cascade,
    foreign key   (project, jobset) references Jobsets(project, name) on delete cascade on update cascade
 );
@ -166,6 +170,7 @@ create table Builds (
    -- Info about the inputs.
    project       text not null,
    jobset        text not null,
    jobset_id     integer null,
    job           text not null,
    -- Info about the build result.
@ -232,6 +237,7 @@ create table Builds (
    check (finished = 0 or (stoptime is not null and stoptime != 0)),
    check (finished = 0 or (starttime is not null and starttime != 0)),
    foreign key (jobset_id) references Jobsets(id) on delete cascade,
    foreign key (project) references Projects(name) on update cascade,
    foreign key (project, jobset) references Jobsets(project, name) on update cascade,
    foreign key (project, jobset, job) references Jobs(project, jobset, name) on update cascade
--- a/src/sql/upgrade-59.sql
+++ b/src/sql/upgrade-59.sql
@ -0,0 +1,4 @@
 -- will automatically add unique IDs to Jobsets.
 ALTER TABLE Jobsets
  ADD COLUMN id SERIAL NOT NULL,
  ADD CONSTRAINT Jobsets_id_unique UNIQUE (id);
--- a/src/sql/upgrade-60.sql
+++ b/src/sql/upgrade-60.sql
@ -0,0 +1,10 @@
 -- Add the jobset_id columns to the Jobs table. This will go
 -- quickly, since the field is nullable. Note this is just part one of
 -- this migration. Future steps involve a piecemeal backfilling, and
 -- then making the column non-null.
 ALTER TABLE Jobs
  ADD COLUMN jobset_id integer NULL,
  ADD FOREIGN KEY (jobset_id)
      REFERENCES Jobsets(id)
      ON DELETE CASCADE;
--- a/src/sql/upgrade-61.sql
+++ b/src/sql/upgrade-61.sql
@ -0,0 +1,10 @@
 -- Add the jobset_id columns to the Builds table. This will go
 -- quickly, since the field is nullable. Note this is just part one of
 -- this migration. Future steps involve a piecemeal backfilling, and
 -- then making the column non-null.
 ALTER TABLE Builds
  ADD COLUMN jobset_id integer NULL,
  ADD FOREIGN KEY (jobset_id)
      REFERENCES Jobsets(id)
      ON DELETE CASCADE;