GitInput: Include deepClone option in the cache key

Without this commit, two jobsets using the same repository as input,
but different `deepClone` options, end up incorrectly sharing the same
"checkout" for a given (`uri`, `branch`, `revision`) tuple.  The
presence or absence of `.git` is determined by the jobset execution
order.

This patch adds the missing `isDeepClone` boolean to the cache key.

The database upgrade script empties the `CachedGitInputs` table, as we
don't know if existing checkouts are deep clones.  Unfortunately, this
generally forces rebuilds even for correct `deepClone` checkouts, as
the binary contents of `.git` are not deterministic.

Fixes #510
This commit is contained in:
Damien Diederen 2021-06-15 10:31:42 +02:00
parent b6921c2006
commit df7dab1291
4 changed files with 28 additions and 5 deletions

View file

@ -182,7 +182,7 @@ sub fetchInput {
# TODO: Fix case where the branch is reset to a previous commit.
my $cachedInput;
($cachedInput) = $self->{db}->resultset('CachedGitInputs')->search(
{uri => $uri, branch => $branch, revision => $revision},
{uri => $uri, branch => $branch, revision => $revision, isdeepclone => defined($deepClone) ? 1 : 0},
{rows => 1});
addTempRoot($cachedInput->storepath) if defined $cachedInput;
@ -223,6 +223,7 @@ sub fetchInput {
{ uri => $uri
, branch => $branch
, revision => $revision
, isdeepclone => defined($deepClone) ? 1 : 0
, sha256hash => $sha256
, storepath => $storePath
});

View file

@ -50,6 +50,11 @@ __PACKAGE__->table("cachedgitinputs");
data_type: 'text'
is_nullable: 0
=head2 isdeepclone
data_type: 'boolean'
is_nullable: 0
=head2 sha256hash
data_type: 'text'
@ -69,6 +74,8 @@ __PACKAGE__->add_columns(
{ data_type => "text", is_nullable => 0 },
"revision",
{ data_type => "text", is_nullable => 0 },
"isdeepclone",
{ data_type => "boolean", is_nullable => 0 },
"sha256hash",
{ data_type => "text", is_nullable => 0 },
"storepath",
@ -85,14 +92,16 @@ __PACKAGE__->add_columns(
=item * L</revision>
=item * L</isdeepclone>
=back
=cut
__PACKAGE__->set_primary_key("uri", "branch", "revision");
__PACKAGE__->set_primary_key("uri", "branch", "revision", "isdeepclone");
# Created by DBIx::Class::Schema::Loader v0.07049 @ 2020-02-06 12:22:36
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:0sdK9uQZpx869oqS5thRLw
# Created by DBIx::Class::Schema::Loader v0.07049 @ 2021-06-19 17:26:24
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:Wr0grsFTaqRlMeM9vpXjrw
1;

View file

@ -402,9 +402,10 @@ create table CachedGitInputs (
uri text not null,
branch text not null,
revision text not null,
isDeepClone boolean not null,
sha256hash text not null,
storePath text not null,
primary key (uri, branch, revision)
primary key (uri, branch, revision, isDeepClone)
);
create table CachedDarcsInputs (

12
src/sql/upgrade-76.sql Normal file
View file

@ -0,0 +1,12 @@
-- We don't know if existing checkouts are deep clones. This will
-- force a new fetch (and most likely trigger a new build for deep
-- clones, as the binary contents of '.git' are not deterministic).
DELETE FROM CachedGitInputs;
ALTER TABLE CachedGitInputs
ADD COLUMN isDeepClone BOOLEAN NOT NULL;
ALTER TABLE CachedGitInputs DROP CONSTRAINT cachedgitinputs_pkey;
ALTER TABLE CachedGitInputs ADD CONSTRAINT cachedgitinputs_pkey
PRIMARY KEY (uri, branch, revision, isDeepClone);