Merge pull request #1062 from DeterminateSystems/runcommand-logs

RunCommand: store a log of execution and render it on the Build page
This commit is contained in:
Graham Christensen 2022-01-10 09:27:28 -05:00 committed by GitHub
commit bf9ac72fe1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 706 additions and 4 deletions

View file

@ -37,6 +37,7 @@ sub buildChain :Chained('/') :PathPart('build') :CaptureArgs(1) {
$c->stash->{project} = $c->stash->{build}->project;
$c->stash->{jobset} = $c->stash->{build}->jobset;
$c->stash->{job} = $c->stash->{build}->job;
$c->stash->{runcommandlogs} = [$c->stash->{build}->runcommandlogs->search({}, {order_by => ["id DESC"]})];
}

View file

@ -150,8 +150,20 @@ sub buildFinished {
foreach my $commandToRun (@{$commandsToRun}) {
my $command = $commandToRun->{command};
# todo: make all the to-run jobs "unstarted" in a batch, then start processing
my $runlog = $self->{db}->resultset("RunCommandLogs")->create({
job_matcher => $commandToRun->{matcher},
build_id => $build->get_column('id'),
command => $command
});
$runlog->started();
system("$command") == 0
or warn "notification command '$command' failed with exit status $?\n";
or warn "notification command '$command' failed with exit status $? ($!)\n";
$runlog->completed_with_child_error($?, $!);
}
}

View file

@ -499,6 +499,21 @@ __PACKAGE__->belongs_to(
{ is_deferrable => 0, on_delete => "NO ACTION", on_update => "CASCADE" },
);
=head2 runcommandlogs
Type: has_many
Related object: L<Hydra::Schema::Result::RunCommandLogs>
=cut
__PACKAGE__->has_many(
"runcommandlogs",
"Hydra::Schema::Result::RunCommandLogs",
{ "foreign.build_id" => "self.id" },
undef,
);
=head2 aggregates
Type: many_to_many
@ -528,8 +543,8 @@ __PACKAGE__->many_to_many(
);
# Created by DBIx::Class::Schema::Loader v0.07049 @ 2021-08-26 12:02:36
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:WHdSVHhQykmUz0tR/TExVg
# Created by DBIx::Class::Schema::Loader v0.07049 @ 2021-11-17 12:42:34
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:ylttv/NTMDcSZumBXRCOCw
__PACKAGE__->has_many(
"dependents",

View file

@ -0,0 +1,323 @@
use utf8;
package Hydra::Schema::Result::RunCommandLogs;
# Created by DBIx::Class::Schema::Loader
# DO NOT MODIFY THE FIRST PART OF THIS FILE
=head1 NAME
Hydra::Schema::Result::RunCommandLogs
=cut
use strict;
use warnings;
use base 'DBIx::Class::Core';
=head1 COMPONENTS LOADED
=over 4
=item * L<Hydra::Component::ToJSON>
=back
=cut
__PACKAGE__->load_components("+Hydra::Component::ToJSON");
=head1 TABLE: C<runcommandlogs>
=cut
__PACKAGE__->table("runcommandlogs");
=head1 ACCESSORS
=head2 id
data_type: 'integer'
is_auto_increment: 1
is_nullable: 0
sequence: 'runcommandlogs_id_seq'
=head2 job_matcher
data_type: 'text'
is_nullable: 0
=head2 build_id
data_type: 'integer'
is_foreign_key: 1
is_nullable: 0
=head2 command
data_type: 'text'
is_nullable: 0
=head2 start_time
data_type: 'integer'
is_nullable: 1
=head2 end_time
data_type: 'integer'
is_nullable: 1
=head2 error_number
data_type: 'integer'
is_nullable: 1
=head2 exit_code
data_type: 'integer'
is_nullable: 1
=head2 signal
data_type: 'integer'
is_nullable: 1
=head2 core_dumped
data_type: 'boolean'
is_nullable: 1
=cut
__PACKAGE__->add_columns(
"id",
{
data_type => "integer",
is_auto_increment => 1,
is_nullable => 0,
sequence => "runcommandlogs_id_seq",
},
"job_matcher",
{ data_type => "text", is_nullable => 0 },
"build_id",
{ data_type => "integer", is_foreign_key => 1, is_nullable => 0 },
"command",
{ data_type => "text", is_nullable => 0 },
"start_time",
{ data_type => "integer", is_nullable => 1 },
"end_time",
{ data_type => "integer", is_nullable => 1 },
"error_number",
{ data_type => "integer", is_nullable => 1 },
"exit_code",
{ data_type => "integer", is_nullable => 1 },
"signal",
{ data_type => "integer", is_nullable => 1 },
"core_dumped",
{ data_type => "boolean", is_nullable => 1 },
);
=head1 PRIMARY KEY
=over 4
=item * L</id>
=back
=cut
__PACKAGE__->set_primary_key("id");
=head1 RELATIONS
=head2 build
Type: belongs_to
Related object: L<Hydra::Schema::Result::Builds>
=cut
__PACKAGE__->belongs_to(
"build",
"Hydra::Schema::Result::Builds",
{ id => "build_id" },
{ is_deferrable => 0, on_delete => "CASCADE", on_update => "NO ACTION" },
);
# Created by DBIx::Class::Schema::Loader v0.07049 @ 2021-11-19 15:15:36
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:9AIzlQl1RjRXrs9gQCZKVw
use POSIX qw(WEXITSTATUS WIFEXITED WIFSIGNALED WTERMSIG);
=head2 started
Update the row with the current timestamp as the start time.
=cut
sub started {
my ($self) = @_;
return $self->update({
start_time => time()
});
}
=head2 completed_with_child_error
Update the row with the current timestamp, exit code, core dump, errno,
and signal status.
Arguments:
=over 2
=item C<$child_error>
The value of $? or $CHILD_ERROR (with use English) after calling system().
=item C<$errno>
The value of $! or $ERRNO (with use English) after calling system().
=back
=cut
sub completed_with_child_error {
my ($self, $child_error, $reported_errno) = @_;
my $errno = undef;
my $exit_code = undef;
my $signal = undef;
my $core_dumped = undef;
if ($child_error == -1) {
# -1 indicates `exec` failed, and this is the only
# case where the reported errno is valid.
#
# The `+ 0` is because $! is a dual var and likes to be a string
# if it can. +0 forces it to not be. Sigh.
$errno = $reported_errno + 0;
}
if (WIFEXITED($child_error)) {
# The exit status bits are only meaningful if the process exited
$exit_code = WEXITSTATUS($child_error);
}
if (WIFSIGNALED($child_error)) {
# The core dump and signal bits are only meaningful if the
# process was terminated via a signal
$signal = WTERMSIG($child_error);
# This `& 128` comes from where Perl constructs the CHILD_ERROR
# value:
# https://github.com/Perl/perl5/blob/a9d7a07c2ebbfd8ee992f1d27ef4cfbed53085b6/perl.h#L3609-L3621
#
# The `+ 0` is handling another dualvar. It is a bool, but a
# bool false is an empty string in boolean context and 0 in a
# numeric concept. The ORM knows the column is a bool, but
# does not treat the empty string as a bool when talking to
# postgres.
$core_dumped = (($child_error & 128) == 128) + 0;
}
return $self->update({
end_time => time(),
error_number => $errno,
exit_code => $exit_code,
signal => $signal,
core_dumped => $core_dumped,
});
}
=head2 did_succeed
Return:
* true if the task ran and finished successfully,
* false if the task did not run successfully but is completed
* undef if the task has not yet run
=cut
sub did_succeed {
my ($self) = @_;
if (!defined($self->end_time)) {
return undef;
}
if (!defined($self->exit_code)) {
return 0;
}
return $self->exit_code == 0;
}
=head2 is_running
Looks in the database to see if the task has been marked as completed.
Does not actually examine to see if the process is running anywhere.
Return:
* true if the task does not have a marked end date
* false if the task does have a recorded end
=cut
sub is_running {
my ($self) = @_;
return !defined($self->end_time);
}
=head2 did_fail_with_signal
Looks in the database to see if the task failed with a signal.
Return:
* true if the task is not running and failed with a signal.
* false if the task is running or exited with an exit code.
=cut
sub did_fail_with_signal {
my ($self) = @_;
if ($self->is_running()) {
return 0;
}
if ($self->did_succeed()) {
return 0;
}
return defined($self->signal);
}
=head2 did_fail_with_exec_error
Looks in the database to see if the task failed with a signal.
Return:
* true if the task is not running and failed with a signal.
* false if the task is running or exited with an exit code.
=cut
sub did_fail_with_exec_error {
my ($self) = @_;
if ($self->is_running()) {
return 0;
}
if ($self->did_succeed()) {
return 0;
}
return defined($self->error_number);
}
1;

View file

@ -149,6 +149,7 @@ END;
[% IF build.dependents %]<li class="nav-item"><a class="nav-link" href="#tabs-usedby" data-toggle="tab">Used By</a></li>[% END%]
[% IF drvAvailable %]<li class="nav-item"><a class="nav-link" href="#tabs-build-deps" data-toggle="tab">Build Dependencies</a></li>[% END %]
[% IF localStore && available %]<li class="nav-item"><a class="nav-link" href="#tabs-runtime-deps" data-toggle="tab">Runtime Dependencies</a></li>[% END %]
[% IF runcommandlogs.size() > 0 %]<li class="nav-item"><a class="nav-link" href="#tabs-runcommandlogs" data-toggle="tab">RunCommand Logs</a></li>[% END %]
</ul>
<div id="generic-tabs" class="tab-content">
@ -477,7 +478,7 @@ END;
</tbody>
</table>
</div>
[% END %]
[% END %]
[% IF drvAvailable %]
[% INCLUDE makeLazyTab tabName="tabs-build-deps" uri=c.uri_for('/build' build.id 'build-deps') %]
@ -487,6 +488,56 @@ END;
[% INCLUDE makeLazyTab tabName="tabs-runtime-deps" uri=c.uri_for('/build' build.id 'runtime-deps') %]
[% END %]
<div id="tabs-runcommandlogs" class="tab-pane">
<div class="d-flex flex-column">
[% FOREACH runcommandlog IN runcommandlogs %]
<div class="p-2 border-bottom">
<div class="d-flex flex-row">
<div class="d-flex flex-column" style="padding: 10px; width: 50px;">
[% IF runcommandlog.did_succeed() %]
<img src="[% c.uri_for("/static/images/emojione-check-2714.svg") %]" height="30" width="30" title="Succeeded" alt="Succeeded" class="build-status" />
[% ELSIF runcommandlog.is_running() %]
[% ELSE %]
<img src="[% c.uri_for("/static/images/emojione-red-x-274c.svg") %]" height="30" width="30" title="Failed" alt="Failed" class="build-status" />
[% END %]
</div>
<div class="d-flex flex-column mr-auto align-self-center">
<div><tt>[% runcommandlog.command | html%]</tt></div>
<div>
[% IF not runcommandlog.is_running() %]
[% IF runcommandlog.did_fail_with_signal() %]
Exit signal: [% runcommandlog.signal %]
[% IF runcommandlog.core_dumped %]
(Core Dumped)
[% END %]
[% ELSIF runcommandlog.did_fail_with_exec_error() %]
Exec error: [% runcommandlog.error_number %]
[% ELSIF not runcommandlog.did_succeed() %]
Exit code: [% runcommandlog.exit_code %]
[% END %]
[% END %]
</div>
</div>
<div class="d-flex flex-column align-items-end">
[% IF runcommandlog.start_time != undef %]
<div>Started at [% INCLUDE renderDateTime timestamp = runcommandlog.start_time; %]</div>
[% IF runcommandlog.end_time != undef %]
<div>Ran for [% INCLUDE renderDuration duration = runcommandlog.end_time - runcommandlog.start_time %]</div>
[% ELSE %]
<div>Running for [% INCLUDE renderDuration duration = curTime - runcommandlog.start_time %]</div>
[% END %]
[% ELSE %]
<div>Pending</div>
[% END %]
</div>
</div>
</div>
[% END %]
</div>
</div>
</div>
<div id="reproduce" class="modal hide fade" tabindex="-1" role="dialog" aria-hidden="true">

View file

@ -562,6 +562,55 @@ create table TaskRetries (
);
create index IndexTaskRetriesOrdered on TaskRetries(retry_at asc);
-- Records of RunCommand executions
--
-- The intended flow is:
--
-- 1. Create a RunCommandLogs entry when the task is "queued" to run
-- 2. Update the start_time when it begins
-- 3. Update the end_time and exit_code when it completes
create table RunCommandLogs (
id serial primary key not null,
job_matcher text not null,
build_id integer not null,
-- TODO: evaluation_id integer not null,
-- can we do this in a principled way? a build can be part of many evaluations
-- but a "bug" of RunCommand, imho, is that it should probably run per evaluation?
command text not null,
start_time integer,
end_time integer,
error_number integer,
exit_code integer,
signal integer,
core_dumped boolean,
foreign key (build_id) references Builds(id) on delete cascade,
-- foreign key (evaluation_id) references Builds(id) on delete cascade,
constraint RunCommandLogs_not_started_no_exit_time_no_code check (
-- If start time is null, then end_time, exit_code, signal, and core_dumped should be null.
-- A logical implication operator would be nice :).
(start_time is not null) or (
end_time is null
and error_number is null
and exit_code is null
and signal is null
and core_dumped is null
)
),
constraint RunCommandLogs_end_time_has_start_time check (
-- If end time is not null, then end_time, exit_code, and core_dumped should not be null
(end_time is null) or (start_time is not null)
)
-- Note: if exit_code is not null then signal and core_dumped must be null.
-- Similarly, if signal is not null then exit_code must be null and
-- core_dumped must not be null. However, these semantics are tricky
-- to encode as constraints and probably provide limited actual value.
);
-- The output paths that have permanently failed.
create table FailedPaths (
path text primary key not null

View file

@ -38,6 +38,7 @@ make_schema_at("Hydra::Schema", {
"nrbuilds" => "NrBuilds",
"projectmembers" => "ProjectMembers",
"projects" => "Projects",
"runcommandlogs" => "RunCommandLogs",
"schemaversion" => "SchemaVersion",
"starredjobs" => "StarredJobs",
"systemstatus" => "SystemStatus",

49
src/sql/upgrade-79.sql Normal file
View file

@ -0,0 +1,49 @@
-- Records of RunCommand executions
--
-- The intended flow is:
--
-- 1. Create a RunCommandLogs entry when the task is "queued" to run
-- 2. Update the start_time when it begins
-- 3. Update the end_time and exit_code when it completes
create table RunCommandLogs (
id serial primary key not null,
job_matcher text not null,
build_id integer not null,
-- TODO: evaluation_id integer not null,
-- can we do this in a principled way? a build can be part of many evaluations
-- but a "bug" of RunCommand, imho, is that it should probably run per evaluation?
command text not null,
start_time integer,
end_time integer,
error_number integer,
exit_code integer,
signal integer,
core_dumped boolean,
foreign key (build_id) references Builds(id) on delete cascade,
-- foreign key (evaluation_id) references Builds(id) on delete cascade,
constraint RunCommandLogs_not_started_no_exit_time_no_code check (
-- If start time is null, then end_time, exit_code, signal, and core_dumped should be null.
-- A logical implication operator would be nice :).
(start_time is not null) or (
end_time is null
and error_number is null
and exit_code is null
and signal is null
and core_dumped is null
)
),
constraint RunCommandLogs_end_time_has_start_time check (
-- If end time is not null, then end_time, exit_code, and core_dumped should not be null
(end_time is null) or (start_time is not null)
)
-- Note: if exit_code is not null then signal and core_dumped must be null.
-- Similarly, if signal is not null then exit_code must be null and
-- core_dumped must not be null. However, these semantics are tricky
-- to encode as constraints and probably provide limited actual value.
);

View file

@ -0,0 +1,139 @@
use strict;
use warnings;
use Setup;
use Test2::V0;
my $ctx = test_context();
my $db = $ctx->db();
my $builds = $ctx->makeAndEvaluateJobset(
expression => "basic.nix",
);
my $build = $builds->{"empty_dir"};
sub new_run_log {
return $db->resultset('RunCommandLogs')->create({
job_matcher => "*:*:*",
build_id => $build->get_column('id'),
command => "bogus",
});
}
subtest "Not yet started" => sub {
my $runlog = new_run_log();
is($runlog->start_time, undef, "The start time is undefined.");
is($runlog->end_time, undef, "The start time is undefined.");
is($runlog->exit_code, undef, "The exit code is undefined.");
is($runlog->signal, undef, "The signal is undefined.");
is($runlog->core_dumped, undef, "The core dump status is undefined.");
};
subtest "Completing a process before it is started is invalid" => sub {
my $runlog = new_run_log();
like(
dies {
$runlog->completed_with_child_error(0, 0);
},
qr/runcommandlogs_end_time_has_start_time/,
"It is invalid to complete the process before it started"
);
};
subtest "Starting a process" => sub {
my $runlog = new_run_log();
$runlog->started();
is($runlog->did_succeed(), undef, "The process has not yet succeeded.");
ok($runlog->is_running(), "The process is running.");
ok(!$runlog->did_fail_with_signal(), "The process was not killed by a signal.");
ok(!$runlog->did_fail_with_exec_error(), "The process did not fail to start due to an exec error.");
is($runlog->start_time, within(time() - 1, 2), "The start time is recent.");
is($runlog->end_time, undef, "The end time is undefined.");
is($runlog->exit_code, undef, "The exit code is undefined.");
is($runlog->signal, undef, "The signal is undefined.");
is($runlog->core_dumped, undef, "The core dump status is undefined.");
};
subtest "The process completed (success)" => sub {
my $runlog = new_run_log();
$runlog->started();
$runlog->completed_with_child_error(0, 123);
ok($runlog->did_succeed(), "The process did succeed.");
ok(!$runlog->is_running(), "The process is not running.");
ok(!$runlog->did_fail_with_signal(), "The process was not killed by a signal.");
ok(!$runlog->did_fail_with_exec_error(), "The process did not fail to start due to an exec error.");
is($runlog->start_time, within(time() - 1, 2), "The start time is recent.");
is($runlog->end_time, within(time() - 1, 2), "The end time is recent.");
is($runlog->error_number, undef, "The error number is undefined");
is($runlog->exit_code, 0, "The exit code is 0.");
is($runlog->signal, undef, "The signal is undefined.");
is($runlog->core_dumped, undef, "The core dump is undefined.");
};
subtest "The process completed (errored)" => sub {
my $runlog = new_run_log();
$runlog->started();
$runlog->completed_with_child_error(21760, 123);
ok(!$runlog->did_succeed(), "The process did not succeed.");
ok(!$runlog->is_running(), "The process is not running.");
ok(!$runlog->did_fail_with_signal(), "The process was not killed by a signal.");
ok(!$runlog->did_fail_with_exec_error(), "The process did not fail to start due to an exec error.");
is($runlog->start_time, within(time() - 1, 2), "The start time is recent.");
is($runlog->end_time, within(time() - 1, 2), "The end time is recent.");
is($runlog->error_number, undef, "The error number is undefined");
is($runlog->exit_code, 85, "The exit code is 85.");
is($runlog->signal, undef, "The signal is undefined.");
is($runlog->core_dumped, undef, "The core dump is undefined.");
};
subtest "The process completed (status 15, child error 0)" => sub {
my $runlog = new_run_log();
$runlog->started();
$runlog->completed_with_child_error(15, 0);
ok(!$runlog->did_succeed(), "The process did not succeed.");
ok(!$runlog->is_running(), "The process is not running.");
ok($runlog->did_fail_with_signal(), "The process was killed by a signal.");
ok(!$runlog->did_fail_with_exec_error(), "The process did not fail to start due to an exec error.");
is($runlog->start_time, within(time() - 1, 2), "The start time is recent.");
is($runlog->end_time, within(time() - 1, 2), "The end time is recent.");
is($runlog->error_number, undef, "The error number is undefined");
is($runlog->exit_code, undef, "The exit code is undefined.");
is($runlog->signal, 15, "Signal 15 was sent.");
is($runlog->core_dumped, 0, "There was no core dump.");
};
subtest "The process completed (signaled)" => sub {
my $runlog = new_run_log();
$runlog->started();
$runlog->completed_with_child_error(393, 234);
ok(!$runlog->did_succeed(), "The process did not succeed.");
ok(!$runlog->is_running(), "The process is not running.");
ok($runlog->did_fail_with_signal(), "The process was killed by a signal.");
ok(!$runlog->did_fail_with_exec_error(), "The process did not fail to start due to an exec error.");
is($runlog->start_time, within(time() - 1, 2), "The start time is recent.");
is($runlog->end_time, within(time() - 1, 2), "The end time is recent.");
is($runlog->error_number, undef, "The error number is undefined");
is($runlog->exit_code, undef, "The exit code is undefined.");
is($runlog->signal, 9, "The signal is 9.");
is($runlog->core_dumped, 1, "The core dumped.");
};
subtest "The process failed to start" => sub {
my $runlog = new_run_log();
$runlog->started();
$runlog->completed_with_child_error(-1, 2);
ok(!$runlog->did_succeed(), "The process did not succeed.");
ok(!$runlog->is_running(), "The process is running.");
ok(!$runlog->did_fail_with_signal(), "The process was not killed by a signal.");
ok($runlog->did_fail_with_exec_error(), "The process failed to start due to an exec error.");
is($runlog->start_time, within(time() - 1, 2), "The start time is recent.");
is($runlog->end_time, within(time() - 1, 2), "The end time is recent.");
is($runlog->error_number, 2, "The error number is saved");
is($runlog->exit_code, undef, "The exit code is undefined.");
is($runlog->signal, undef, "The signal is undefined.");
is($runlog->core_dumped, undef, "The core dumped is not defined.");
};
done_testing;

View file

@ -50,4 +50,14 @@ subtest "Validate the file parsed and at least one field matches" => sub {
is($dat->{build}, $newbuild->id, "The build event matches our expected ID.");
};
subtest "Validate a run log was created" => sub {
my $runlog = $build->runcommandlogs->find({});
ok($runlog->did_succeed(), "The process did succeed.");
is($runlog->job_matcher, "*:*:*", "An unspecified job matcher is defaulted to *:*:*");
is($runlog->command, 'cp "$HYDRA_JSON" "$HYDRA_DATA/joboutput.json"', "The executed command is saved.");
is($runlog->start_time, within(time() - 1, 2), "The start time is recent.");
is($runlog->end_time, within(time() - 1, 2), "The end time is also recent.");
is($runlog->exit_code, 0, "This command should have succeeded.");
};
done_testing;

View file

@ -0,0 +1,52 @@
use feature 'unicode_strings';
use strict;
use warnings;
use JSON::MaybeXS;
use Setup;
my %ctx = test_init(
hydra_config => q|
<runcommand>
command = invalid-command-this-does-not-exist
</runcommand>
|);
require Hydra::Schema;
require Hydra::Model::DB;
use Test2::V0;
my $db = Hydra::Model::DB->new;
hydra_setup($db);
my $project = $db->resultset('Projects')->create({name => "tests", displayname => "", owner => "root"});
# Most basic test case, no parameters
my $jobset = createBaseJobset("basic", "runcommand.nix", $ctx{jobsdir});
ok(evalSucceeds($jobset), "Evaluating jobs/runcommand.nix should exit with return code 0");
is(nrQueuedBuildsForJobset($jobset), 1, "Evaluating jobs/runcommand.nix should result in 1 build1");
(my $build) = queuedBuildsForJobset($jobset);
is($build->job, "metrics", "The only job should be metrics");
ok(runBuild($build), "Build should exit with return code 0");
my $newbuild = $db->resultset('Builds')->find($build->id);
is($newbuild->finished, 1, "Build should be finished.");
is($newbuild->buildstatus, 0, "Build should have buildstatus 0.");
ok(sendNotifications(), "Notifications execute successfully.");
subtest "Validate a run log was created" => sub {
my $runlog = $build->runcommandlogs->find({});
ok(!$runlog->did_succeed(), "The process did not succeed.");
ok($runlog->did_fail_with_exec_error(), "The process failed to start due to an exec error.");
is($runlog->job_matcher, "*:*:*", "An unspecified job matcher is defaulted to *:*:*");
is($runlog->command, 'invalid-command-this-does-not-exist', "The executed command is saved.");
is($runlog->start_time, within(time() - 1, 2), "The start time is recent.");
is($runlog->end_time, within(time() - 1, 2), "The end time is also recent.");
is($runlog->exit_code, undef, "This command should not have executed.");
is($runlog->error_number, 2, "This command failed to exec.");
};
done_testing;