Merge pull request #711 from grahamc/jobset-id-pgsql-part-1

jobset_id, #710 Part 1
This commit is contained in:
Graham Christensen 2020-02-10 11:43:38 -05:00 committed by GitHub
commit add4f610ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 381 additions and 12 deletions

View file

@ -64,6 +64,12 @@ __PACKAGE__->table("builds");
is_foreign_key: 1 is_foreign_key: 1
is_nullable: 0 is_nullable: 0
=head2 jobset_id
data_type: 'integer'
is_foreign_key: 1
is_nullable: 1
=head2 job =head2 job
data_type: 'text' data_type: 'text'
@ -215,6 +221,8 @@ __PACKAGE__->add_columns(
{ data_type => "text", is_foreign_key => 1, is_nullable => 0 }, { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
"jobset", "jobset",
{ data_type => "text", is_foreign_key => 1, is_nullable => 0 }, { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
"jobset_id",
{ data_type => "integer", is_foreign_key => 1, is_nullable => 1 },
"job", "job",
{ data_type => "text", is_foreign_key => 1, is_nullable => 0 }, { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
"nixname", "nixname",
@ -457,6 +465,26 @@ Related object: L<Hydra::Schema::Jobsets>
__PACKAGE__->belongs_to( __PACKAGE__->belongs_to(
"jobset", "jobset",
"Hydra::Schema::Jobsets", "Hydra::Schema::Jobsets",
{ id => "jobset_id" },
{
is_deferrable => 0,
join_type => "LEFT",
on_delete => "CASCADE",
on_update => "NO ACTION",
},
);
=head2 jobset_project_jobset
Type: belongs_to
Related object: L<Hydra::Schema::Jobsets>
=cut
__PACKAGE__->belongs_to(
"jobset_project_jobset",
"Hydra::Schema::Jobsets",
{ name => "jobset", project => "project" }, { name => "jobset", project => "project" },
{ is_deferrable => 0, on_delete => "NO ACTION", on_update => "CASCADE" }, { is_deferrable => 0, on_delete => "NO ACTION", on_update => "CASCADE" },
); );
@ -550,8 +578,8 @@ __PACKAGE__->many_to_many(
); );
# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:22:36 # Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:32:28
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:H3hs+zEywsUmwTWKfSE8wQ # DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:RvrINOAowDcde8Nd9VD6rQ
__PACKAGE__->has_many( __PACKAGE__->has_many(
"dependents", "dependents",

View file

@ -47,6 +47,12 @@ __PACKAGE__->table("jobs");
is_foreign_key: 1 is_foreign_key: 1
is_nullable: 0 is_nullable: 0
=head2 jobset_id
data_type: 'integer'
is_foreign_key: 1
is_nullable: 1
=head2 name =head2 name
data_type: 'text' data_type: 'text'
@ -59,6 +65,8 @@ __PACKAGE__->add_columns(
{ data_type => "text", is_foreign_key => 1, is_nullable => 0 }, { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
"jobset", "jobset",
{ data_type => "text", is_foreign_key => 1, is_nullable => 0 }, { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
"jobset_id",
{ data_type => "integer", is_foreign_key => 1, is_nullable => 1 },
"name", "name",
{ data_type => "text", is_nullable => 0 }, { data_type => "text", is_nullable => 0 },
); );
@ -130,6 +138,26 @@ Related object: L<Hydra::Schema::Jobsets>
__PACKAGE__->belongs_to( __PACKAGE__->belongs_to(
"jobset", "jobset",
"Hydra::Schema::Jobsets", "Hydra::Schema::Jobsets",
{ id => "jobset_id" },
{
is_deferrable => 0,
join_type => "LEFT",
on_delete => "CASCADE",
on_update => "NO ACTION",
},
);
=head2 jobset_project_jobset
Type: belongs_to
Related object: L<Hydra::Schema::Jobsets>
=cut
__PACKAGE__->belongs_to(
"jobset_project_jobset",
"Hydra::Schema::Jobsets",
{ name => "jobset", project => "project" }, { name => "jobset", project => "project" },
{ is_deferrable => 0, on_delete => "CASCADE", on_update => "CASCADE" }, { is_deferrable => 0, on_delete => "CASCADE", on_update => "CASCADE" },
); );
@ -169,7 +197,7 @@ __PACKAGE__->has_many(
); );
# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:22:36 # Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:30:58
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:sYa6dZNK+stMAnTH0Tmn8A # DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:dFusVjxb423gIEoadAw9sw
1; 1;

View file

@ -40,6 +40,13 @@ __PACKAGE__->table("jobsets");
data_type: 'text' data_type: 'text'
is_nullable: 0 is_nullable: 0
=head2 id
data_type: 'integer'
is_auto_increment: 1
is_nullable: 0
sequence: 'jobsets_id_seq'
=head2 project =head2 project
data_type: 'text' data_type: 'text'
@ -153,6 +160,13 @@ __PACKAGE__->table("jobsets");
__PACKAGE__->add_columns( __PACKAGE__->add_columns(
"name", "name",
{ data_type => "text", is_nullable => 0 }, { data_type => "text", is_nullable => 0 },
"id",
{
data_type => "integer",
is_auto_increment => 1,
is_nullable => 0,
sequence => "jobsets_id_seq",
},
"project", "project",
{ data_type => "text", is_foreign_key => 1, is_nullable => 0 }, { data_type => "text", is_foreign_key => 1, is_nullable => 0 },
"description", "description",
@ -209,6 +223,20 @@ __PACKAGE__->add_columns(
__PACKAGE__->set_primary_key("project", "name"); __PACKAGE__->set_primary_key("project", "name");
=head1 UNIQUE CONSTRAINTS
=head2 C<jobsets_id_unique>
=over 4
=item * L</id>
=back
=cut
__PACKAGE__->add_unique_constraint("jobsets_id_unique", ["id"]);
=head1 RELATIONS =head1 RELATIONS
=head2 buildmetrics =head2 buildmetrics
@ -229,7 +257,7 @@ __PACKAGE__->has_many(
undef, undef,
); );
=head2 builds =head2 builds_jobset_ids
Type: has_many Type: has_many
@ -238,7 +266,22 @@ Related object: L<Hydra::Schema::Builds>
=cut =cut
__PACKAGE__->has_many( __PACKAGE__->has_many(
"builds", "builds_jobset_ids",
"Hydra::Schema::Builds",
{ "foreign.jobset_id" => "self.id" },
undef,
);
=head2 builds_project_jobsets
Type: has_many
Related object: L<Hydra::Schema::Builds>
=cut
__PACKAGE__->has_many(
"builds_project_jobsets",
"Hydra::Schema::Builds", "Hydra::Schema::Builds",
{ {
"foreign.jobset" => "self.name", "foreign.jobset" => "self.name",
@ -247,7 +290,7 @@ __PACKAGE__->has_many(
undef, undef,
); );
=head2 jobs =head2 jobs_jobset_ids
Type: has_many Type: has_many
@ -256,7 +299,22 @@ Related object: L<Hydra::Schema::Jobs>
=cut =cut
__PACKAGE__->has_many( __PACKAGE__->has_many(
"jobs", "jobs_jobset_ids",
"Hydra::Schema::Jobs",
{ "foreign.jobset_id" => "self.id" },
undef,
);
=head2 jobs_project_jobsets
Type: has_many
Related object: L<Hydra::Schema::Jobs>
=cut
__PACKAGE__->has_many(
"jobs_project_jobsets",
"Hydra::Schema::Jobs", "Hydra::Schema::Jobs",
{ {
"foreign.jobset" => "self.name", "foreign.jobset" => "self.name",
@ -350,8 +408,49 @@ __PACKAGE__->has_many(
); );
# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:21:11 # Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-09 15:32:17
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:FVP1/AWjdKTlY6djrG592A # DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:P8+t7rgpOqkGwRdM2b+3Bw
=head2 builds
Type: has_many
Related object: L<Hydra::Schema::Builds>
=cut
__PACKAGE__->has_many(
"builds",
"Hydra::Schema::Builds",
{
"foreign.jobset" => "self.name",
"foreign.project" => "self.project",
},
undef,
);
=head2 jobs
Type: has_many
Related object: L<Hydra::Schema::Jobs>
=cut
__PACKAGE__->has_many(
"jobs",
"Hydra::Schema::Jobs",
{
"foreign.jobset" => "self.name",
"foreign.project" => "self.project",
},
undef,
);
__PACKAGE__->add_column(
"+id" => { retrieve_on_insert => 1 }
);
my %hint = ( my %hint = (
columns => [ columns => [

View file

@ -3,6 +3,7 @@ EXTRA_DIST = \
hydra-eval-guile-jobs.in hydra-eval-guile-jobs.in
distributable_scripts = \ distributable_scripts = \
hydra-backfill-ids \
hydra-init \ hydra-init \
hydra-eval-jobset \ hydra-eval-jobset \
hydra-server \ hydra-server \

164
src/script/hydra-backfill-ids Executable file
View file

@ -0,0 +1,164 @@
#! /usr/bin/env perl
use strict;
use utf8;
use Hydra::Model::DB;
STDOUT->autoflush();
STDERR->autoflush(1);
binmode STDERR, ":encoding(utf8)";
my $db = Hydra::Model::DB->new();
my $vacuum = $db->storage->dbh->prepare("VACUUM;");
my $dryRun = defined $ENV{'HYDRA_DRY_RUN'};
my $batchSize = 10000;
my $iterationsPerVacuum = 500;
sub backfillJobsJobsetId {
my ($skipLocked) = @_;
my $logPrefix;
if ($skipLocked) {
$logPrefix = "(pass 1/2)";
} else {
$logPrefix = "(pass 2/2)";
}
print STDERR "$logPrefix Backfilling Jobs records where jobset_id is NULL...\n";
my $totalToGoSth = $db->storage->dbh->prepare(<<QUERY);
SELECT COUNT(*) FROM jobs WHERE jobset_id IS NULL
QUERY
$totalToGoSth->execute();
my ($totalToGo) = $totalToGoSth->fetchrow_array;
my $skipLockedStmt = $skipLocked ? "FOR UPDATE SKIP LOCKED" : "";
my $update10kJobs = $db->storage->dbh->prepare(<<QUERY);
UPDATE jobs
SET jobset_id = (
SELECT jobsets.id
FROM jobsets
WHERE jobsets.name = jobs.jobset
AND jobsets.project = jobs.project
)
WHERE (jobs.project, jobs.jobset, jobs.name) in (
SELECT jobsprime.project, jobsprime.jobset, jobsprime.name
FROM jobs jobsprime
WHERE jobsprime.jobset_id IS NULL
$skipLockedStmt
LIMIT ?
);
QUERY
print STDERR "$logPrefix Total Jobs records without a jobset_id: $totalToGo\n";
my $iteration = 0;
my $affected;
do {
$iteration++;
$affected = $update10kJobs->execute($batchSize);
print STDERR "$logPrefix (batch #$iteration; $totalToGo remaining) Jobs.jobset_id: affected $affected rows...\n";
$totalToGo -= $affected;
if ($iteration % $iterationsPerVacuum == 0) {
print STDERR "$logPrefix (batch #$iteration) Vacuuming...\n";
$vacuum->execute();
}
} while ($affected > 0);
if ($skipLocked) {
backfillJobsJobsetId(0);
}
}
sub backfillBuildsJobsetId {
my ($skipLocked) = @_;
my $logPrefix;
if ($skipLocked) {
$logPrefix = "(pass 1/2)";
print STDERR "$logPrefix Backfilling unlocked Builds records where jobset_id is NULL...\n";
} else {
$logPrefix = "(pass 2/2)";
print STDERR "$logPrefix Backfilling all Builds records where jobset_id is NULL...\n";
}
my $skipLockedStmt = $skipLocked ? "FOR UPDATE SKIP LOCKED" : "";
my $update10kBuilds = $db->storage->dbh->prepare(<<"QUERY");
WITH updateprogress AS (
UPDATE builds
SET jobset_id = (
SELECT jobsets.id
FROM jobsets
WHERE jobsets.name = builds.jobset
AND jobsets.project = builds.project
)
WHERE builds.id in (
SELECT buildprime.id
FROM builds buildprime
WHERE buildprime.jobset_id IS NULL
AND buildprime.id >= ?
ORDER BY buildprime.id
$skipLockedStmt
LIMIT ?
)
RETURNING id
)
SELECT
count(*) AS affected,
max(updateprogress.id) AS highest_id
FROM updateprogress;
QUERY
my $lowestNullIdSth = $db->storage->dbh->prepare(<<QUERY);
SELECT id FROM builds WHERE jobset_id IS NULL ORDER BY id LIMIT 1
QUERY
$lowestNullIdSth->execute();
my ($highestId) = $lowestNullIdSth->fetchrow_array;
my $totalToGoSth = $db->storage->dbh->prepare(<<QUERY);
SELECT COUNT(*) FROM builds WHERE jobset_id IS NULL AND id >= ?
QUERY
$totalToGoSth->execute($highestId);
my ($totalToGo) = $totalToGoSth->fetchrow_array;
print STDERR "$logPrefix Total Builds records without a jobset_id: $totalToGo, starting at $highestId\n";
my $iteration = 0;
my $affected;
do {
my $previousHighId = $highestId;
$iteration++;
$update10kBuilds->execute($highestId, $batchSize);
($affected, $highestId) = $update10kBuilds->fetchrow_array;
print STDERR "$logPrefix (batch #$iteration; $totalToGo remaining) Builds.jobset_id: affected $affected rows; max ID: $previousHighId -> $highestId\n";
$totalToGo -= $affected;
if ($iteration % $iterationsPerVacuum == 0) {
print STDERR "$logPrefix (batch #$iteration) Vacuuming...\n";
$vacuum->execute();
}
} while ($affected > 0);
if ($skipLocked) {
backfillBuildsJobsetId(0);
}
}
die "syntax: $0\n" unless @ARGV == 0;
print STDERR "Beginning with a VACUUM\n";
$vacuum->execute();
backfillJobsJobsetId(1);
backfillBuildsJobsetId(1);
print STDERR "Ending with a VACUUM\n";
$vacuum->execute();

View file

@ -417,7 +417,12 @@ sub checkBuild {
my $build; my $build;
txn_do($db, sub { txn_do($db, sub {
my $job = $jobset->jobs->update_or_create({ name => $jobName }); my $job = $jobset->jobs->update_or_create({
name => $jobName,
jobset_id => $jobset->id,
project => $jobset->project,
jobset => $jobset->name,
});
# Don't add a build that has already been scheduled for this # Don't add a build that has already been scheduled for this
# job, or has been built but is still a "current" build for # job, or has been built but is still a "current" build for
@ -464,6 +469,9 @@ sub checkBuild {
# Add the build to the database. # Add the build to the database.
$build = $job->builds->create( $build = $job->builds->create(
{ timestamp => $time { timestamp => $time
, project => $jobset->project
, jobset => $jobset->name
, jobset_id => $jobset->id
, description => null($buildInfo->{description}) , description => null($buildInfo->{description})
, license => null($buildInfo->{license}) , license => null($buildInfo->{license})
, homepage => null($buildInfo->{homepage}) , homepage => null($buildInfo->{homepage})

View file

@ -44,6 +44,17 @@ my @versions = $db->resultset('SchemaVersion')->all;
die "couldn't get Hydra schema version!" if scalar @versions != 1; die "couldn't get Hydra schema version!" if scalar @versions != 1;
my $schemaVersion = $versions[0]->version; my $schemaVersion = $versions[0]->version;
if ($schemaVersion <= 60) {
print STDERR <<QUOTE;
WARNING: Schema version 62 and 63 make nullable jobset_id fields on
Builds and Jobs non-nullable. On big Hydra servers, this
migration will take many hours. Because of that, the
migration is not automatic, and must be performed manually.
To backfill these IDs, run: hydra-fill-ids
QUOTE
}
for (my $n = $schemaVersion; $n < $maxSchemaVersion; $n++) { for (my $n = $schemaVersion; $n < $maxSchemaVersion; $n++) {
my $m = $n + 1; my $m = $n + 1;
print STDERR "upgrading Hydra schema from version $n to $m\n"; print STDERR "upgrading Hydra schema from version $n to $m\n";

View file

@ -52,6 +52,7 @@ create table ProjectMembers (
-- describing build jobs. -- describing build jobs.
create table Jobsets ( create table Jobsets (
name text not null, name text not null,
id serial not null,
project text not null, project text not null,
description text, description text,
nixExprInput text, -- name of the jobsetInput containing the Nix or Guix expression nixExprInput text, -- name of the jobsetInput containing the Nix or Guix expression
@ -76,7 +77,8 @@ create table Jobsets (
check ((type = 0) = (nixExprInput is not null and nixExprPath is not null)), check ((type = 0) = (nixExprInput is not null and nixExprPath is not null)),
check ((type = 1) = (flake is not null)), check ((type = 1) = (flake is not null)),
primary key (project, name), primary key (project, name),
foreign key (project) references Projects(name) on delete cascade on update cascade foreign key (project) references Projects(name) on delete cascade on update cascade,
constraint Jobsets_id_unique UNIQUE(id)
#ifdef SQLITE #ifdef SQLITE
, ,
foreign key (project, name, nixExprInput) references JobsetInputs(project, jobset, name) foreign key (project, name, nixExprInput) references JobsetInputs(project, jobset, name)
@ -144,9 +146,11 @@ create table JobsetInputAlts (
create table Jobs ( create table Jobs (
project text not null, project text not null,
jobset text not null, jobset text not null,
jobset_id integer null,
name text not null, name text not null,
primary key (project, jobset, name), primary key (project, jobset, name),
foreign key (jobset_id) references Jobsets(id) on delete cascade,
foreign key (project) references Projects(name) on delete cascade on update cascade, foreign key (project) references Projects(name) on delete cascade on update cascade,
foreign key (project, jobset) references Jobsets(project, name) on delete cascade on update cascade foreign key (project, jobset) references Jobsets(project, name) on delete cascade on update cascade
); );
@ -166,6 +170,7 @@ create table Builds (
-- Info about the inputs. -- Info about the inputs.
project text not null, project text not null,
jobset text not null, jobset text not null,
jobset_id integer null,
job text not null, job text not null,
-- Info about the build result. -- Info about the build result.
@ -232,6 +237,7 @@ create table Builds (
check (finished = 0 or (stoptime is not null and stoptime != 0)), check (finished = 0 or (stoptime is not null and stoptime != 0)),
check (finished = 0 or (starttime is not null and starttime != 0)), check (finished = 0 or (starttime is not null and starttime != 0)),
foreign key (jobset_id) references Jobsets(id) on delete cascade,
foreign key (project) references Projects(name) on update cascade, foreign key (project) references Projects(name) on update cascade,
foreign key (project, jobset) references Jobsets(project, name) on update cascade, foreign key (project, jobset) references Jobsets(project, name) on update cascade,
foreign key (project, jobset, job) references Jobs(project, jobset, name) on update cascade foreign key (project, jobset, job) references Jobs(project, jobset, name) on update cascade

4
src/sql/upgrade-59.sql Normal file
View file

@ -0,0 +1,4 @@
-- will automatically add unique IDs to Jobsets.
ALTER TABLE Jobsets
ADD COLUMN id SERIAL NOT NULL,
ADD CONSTRAINT Jobsets_id_unique UNIQUE (id);

10
src/sql/upgrade-60.sql Normal file
View file

@ -0,0 +1,10 @@
-- Add the jobset_id columns to the Jobs table. This will go
-- quickly, since the field is nullable. Note this is just part one of
-- this migration. Future steps involve a piecemeal backfilling, and
-- then making the column non-null.
ALTER TABLE Jobs
ADD COLUMN jobset_id integer NULL,
ADD FOREIGN KEY (jobset_id)
REFERENCES Jobsets(id)
ON DELETE CASCADE;

10
src/sql/upgrade-61.sql Normal file
View file

@ -0,0 +1,10 @@
-- Add the jobset_id columns to the Builds table. This will go
-- quickly, since the field is nullable. Note this is just part one of
-- this migration. Future steps involve a piecemeal backfilling, and
-- then making the column non-null.
ALTER TABLE Builds
ADD COLUMN jobset_id integer NULL,
ADD FOREIGN KEY (jobset_id)
REFERENCES Jobsets(id)
ON DELETE CASCADE;