feat: non-flakes entrypoint #23

Open
raito wants to merge 17 commits from non-flakes-rerun into main
2 changed files with 180 additions and 59 deletions

View file

@ -24,6 +24,7 @@ from buildbot.reporters.generators.build import BuildStatusGenerator
from buildbot.reporters.message import MessageFormatterFunction
from buildbot.process.buildstep import EXCEPTION
from buildbot.process.buildstep import SUCCESS
from buildbot.process.buildstep import BuildStepFailed
from buildbot.process.results import worst_status
import requests
@ -39,6 +40,14 @@ log = Logger()
FLAKE_TARGET_ATTRIBUTE_FOR_JOBS = "buildbotJobs"
@dataclass
class EvaluatorSettings:
supported_systems: list[str]
worker_count: int
max_memory_size: int
gc_roots_dir: str
lock: util.MasterLock
@dataclass
class NixBuilder:
protocol: str
@ -130,7 +139,7 @@ class GerritConfig:
"""
Returns the prefix to build a repourl using that gerrit configuration.
"""
return 'ssh://{self.username}@{self.domain}:{self.port}/'
return f'ssh://{self.username}@{self.domain}:{self.port}/'
class BuildTrigger(steps.BuildStep):
def __init__(
@ -148,7 +157,7 @@ class BuildTrigger(steps.BuildStep):
self.ended = False
self.waitForFinishDeferred = None
self.brids = []
self.description = f"building {len(jobs)} hydra jobs"
self.description = f"building {len(jobs)} jobs"
super().__init__(**kwargs)
def interrupt(self, reason):
@ -177,15 +186,14 @@ class BuildTrigger(steps.BuildStep):
return sch
def schedule_one(self, build_props: Properties, job):
project_name = build_props.getProperty('event.project')
source = f"{project_name}-eval-lix"
project_name = build_props.getProperty("event.refUpdate.project") or build_props.getProperty("event.change.project")
source = f"{project_name}-eval"
attr = job.get("attr", "eval-error")
name = attr
name = f"{FLAKE_TARGET_ATTRIBUTE_FOR_JOBS}.{name}"
name = f"buildbotJobs.{attr}"
error = job.get("error")
props = Properties()
props.setProperty("virtual_builder_name", name, source)
props.setProperty("status_name", f"nix-build .#{FLAKE_TARGET_ATTRIBUTE_FOR_JOBS}.{attr}", source)
props.setProperty("status_name", f"building buildbotJobs.{attr}", source)
props.setProperty("virtual_builder_tags", "", source)
if error is not None:
@ -372,7 +380,8 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
# run nix-eval-jobs --flake .#$FLAKE_TARGET_ATTRIBUTE_FOR_JOBS to generate the dict of stages
cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand()
build_props = self.build.getProperties()
project_name = build_props.get('event.project')
project_name = build_props.getProperty("event.refUpdate.project") or build_props.getProperty("event.change.project")
assert project_name is not None, "`event.refUpdate.project` or `event.change.project` is not available on the build properties, unexpected build type!"
yield self.runCommand(cmd)
@ -396,26 +405,11 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
if not system or system in self.supported_systems: # report eval errors
filtered_jobs.append(job)
# Filter out failed evaluations
succeeded_jobs = [job for job in filtered_jobs if job.get('error') is None]
drv_show_log: Log = yield self.getLog("stdio")
drv_show_log.addStdout(f"getting derivation infos\n")
cmd = yield self.makeRemoteShellCommand(
stdioLogName=None,
collectStdout=True,
command=(
["nix", "derivation", "show", "--recursive"]
+ [ drv for drv in (job.get("drvPath") for job in filtered_jobs) if drv ]
),
)
yield self.runCommand(cmd)
drv_show_log.addStdout(f"done\n")
try:
drv_info = json.loads(cmd.stdout)
except json.JSONDecodeError as e:
msg = f"Failed to parse `nix derivation show` output for {cmd.command}"
raise BuildbotNixError(msg) from e
all_deps = dict()
for drv, info in drv_info.items():
all_deps[drv] = set(info.get("inputDrvs").keys())
def closure_of(key, deps):
r, size = set([key]), 0
@ -424,14 +418,34 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
r.update(*[ deps[k] for k in r ])
return r.difference([key])
job_set = set(( drv for drv in ( job.get("drvPath") for job in filtered_jobs ) if drv ))
all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set }
if succeeded_jobs:
drv_show_log.addStdout(f"getting derivation infos for valid derivations\n")
cmd = yield self.makeRemoteShellCommand(
stdioLogName=None,
collectStdout=True,
command=(
["nix", "derivation", "show", "--recursive"]
+ [ drv for drv in (job.get("drvPath") for job in succeeded_jobs) if drv ]
),
)
yield self.runCommand(cmd)
drv_show_log.addStdout(f"done\n")
try:
drv_info = json.loads(cmd.stdout)
except json.JSONDecodeError as e:
msg = f"Failed to parse `nix derivation show` output for {cmd.command}"
raise BuildbotNixError(msg) from e
for drv, info in drv_info.items():
all_deps[drv] = set(info.get("inputDrvs").keys())
job_set = set(( drv for drv in ( job.get("drvPath") for job in filtered_jobs ) if drv ))
all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set }
self.build.addStepsAfterCurrentStep(
[
BuildTrigger(
builds_scheduler_group=f"{project_name}-nix-build",
name="build flake",
name="build derivations",
jobs=filtered_jobs,
all_deps=all_deps,
),
@ -440,6 +454,88 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
return result
def make_job_evaluator(name: str, settings: EvaluatorSettings, flake: bool) -> NixEvalCommand:
actual_command = []
if flake:
actual_command += ["--flake", f".#{FLAKE_TARGET_ATTRIBUTE_FOR_JOBS}"]
else:
actual_command += ["--expr", "import ./.ci/buildbot.nix"]
return NixEvalCommand(
env={},
name=name,
supported_systems=settings.supported_systems,
command=[
"nix-eval-jobs",
"--workers",
str(settings.worker_count),
"--max-memory-size",
str(settings.max_memory_size),
"--gc-roots-dir",
settings.gc_roots_dir,
"--force-recurse",
"--check-cache-status",
] + actual_command,
haltOnFailure=True,
locks=[settings.lock.access("exclusive")]
)
class NixConfigure(buildstep.CommandMixin, steps.BuildStep):
name = "determining jobs"
"""
Determine what `NixEvalCommand` step should be added after
based on the existence of:
- flake.nix
- .ci/buildbot.nix
"""
def __init__(self, eval_settings: EvaluatorSettings, **kwargs: Any) -> None:
self.evaluator_settings = eval_settings
super().__init__(**kwargs)
self.observer = logobserver.BufferLogObserver()
self.addLogObserver("stdio", self.observer)
@defer.inlineCallbacks
def run(self) -> Generator[Any, object, Any]:
try:
configure_log: Log = yield self.getLog("stdio")
except Exception:
configure_log: Log = yield self.addLog("stdio")
# Takes precedence.
configure_log.addStdout("checking if there's a .ci/buildbot.nix...\n")
ci_buildbot_defn_exists = yield self.pathExists('build/.ci/buildbot.nix')
if ci_buildbot_defn_exists:
configure_log.addStdout(".ci/buildbot.nix found, configured for non-flake CI\n")
self.build.addStepsAfterCurrentStep(
[
make_job_evaluator(
"evaluate `.ci/buildbot.nix` jobs",
self.evaluator_settings,
False
)
]
)
return SUCCESS
flake_exists = yield self.pathExists('build/flake.nix')
if flake_exists:
configure_log.addStdout(f"flake.nix found")
self.build.addStepsAfterCurrentStep([
make_job_evaluator(
"evaluate `flake.nix` jobs",
self.evaluator_settings,
True
)
]
)
return SUCCESS
configure_log.addStdout("neither flake.nix found neither .ci/buildbot.nix, no CI to run!")
return SUCCESS
class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep):
"""Builds a nix derivation."""
@ -481,10 +577,19 @@ def nix_eval_config(
worker_count: int,
max_memory_size: int,
) -> util.BuilderConfig:
"""Uses nix-eval-jobs to evaluate $FLAKE_TARGET_ATTRIBUTE_FOR_JOBS (`.#hydraJobs` by default) from flake.nix in parallel.
"""
Uses nix-eval-jobs to evaluate the entrypoint of this project.
For each evaluated attribute a new build pipeline is started.
"""
factory = util.BuildFactory()
gerrit_private_key = None
with open(project.private_sshkey_path, 'r') as f:
gerrit_private_key = f.read()
if gerrit_private_key is None:
raise RuntimeError('No gerrit private key to fetch the repositories')
# check out the source
factory.addStep(
steps.Gerrit(
@ -492,9 +597,10 @@ def nix_eval_config(
mode="full",
retry=[60, 60],
timeout=3600,
sshPrivateKey=project.private_sshkey_path
sshPrivateKey=gerrit_private_key
),
)
# use one gcroots directory per worker. this should be scoped to the largest unique resource
# in charge of builds (ie, buildnumber is too narrow) to not litter the system with permanent
# gcroots in case of worker restarts.
@ -503,27 +609,22 @@ def nix_eval_config(
"/nix/var/nix/gcroots/per-user/buildbot-worker/%(prop:project)s/drvs/%(prop:workername)s/",
)
eval_settings = EvaluatorSettings(
supported_systems=supported_systems,
worker_count=worker_count,
max_memory_size=max_memory_size,
gc_roots_dir=drv_gcroots_dir,
lock=eval_lock
)
# NixConfigure will choose
# how to add a NixEvalCommand job
# based on whether there's a flake.nix or
# a .ci/buildbot.nix.
factory.addStep(
NixEvalCommand(
env={},
name="evaluate flake",
supported_systems=supported_systems,
command=[
"nix-eval-jobs",
"--workers",
str(worker_count),
"--max-memory-size",
str(max_memory_size),
"--gc-roots-dir",
drv_gcroots_dir,
"--force-recurse",
"--check-cache-status",
"--flake",
f".#{FLAKE_TARGET_ATTRIBUTE_FOR_JOBS}"
],
haltOnFailure=True,
locks=[eval_lock.access("exclusive")],
),
NixConfigure(
eval_settings
)
)
factory.addStep(
@ -712,12 +813,6 @@ def config_for_project(
),
],
)
gerrit_private_key = None
with open(project.private_sshkey_path, 'r') as f:
gerrit_private_key = f.read()
if gerrit_private_key is None:
raise RuntimeError('No gerrit private key to fetch the repositories')
config["builders"].extend(
[
@ -824,13 +919,15 @@ class GerritNixConfigurator(ConfiguratorBase):
prometheus_config: dict[str, int | str] | None = None,
binary_cache_config: dict[str, str] | None = None,
auth_method: AuthBase | None = None,
manhole: Any = None,
) -> None:
super().__init__()
self.manhole = manhole
self.allowed_origins = allowed_origins
self.gerrit_server = gerrit_server
self.gerrit_user = gerrit_user
self.gerrit_port = gerrit_port
self.gerrit_sshkey_path = gerrit_sshkey_path
self.gerrit_sshkey_path = str(gerrit_sshkey_path)
self.gerrit_config = GerritConfig(domain=self.gerrit_server,
username=self.gerrit_user,
port=self.gerrit_port)
@ -860,6 +957,9 @@ class GerritNixConfigurator(ConfiguratorBase):
worker_config = json.loads(read_secret_file(self.nix_workers_secret_name))
worker_names = []
if self.manhole is not None:
config["manhole"] = self.manhole
config.setdefault("projects", [])
config.setdefault("secretsProviders", [])
config.setdefault("www", {

View file

@ -7,6 +7,9 @@
let
inherit (lib) filterAttrs;
cfg = config.services.buildbot-nix.coordinator;
debuggingManhole = if cfg.debugging.enable then
"manhole.TelnetManhole(${toString cfg.debugging.port}, 'admin', 'admin')"
else "None";
in
{
options = {
@ -28,6 +31,14 @@ in
description = "List of local remote builders machines associated to that Buildbot instance";
};
debugging = {
enable = lib.mkEnableOption "manhole's buildbot debugging on localhost using `admin:admin`";
port = lib.mkOption {
type = lib.types.port;
default = 15000;
};
};
oauth2 = {
name = lib.mkOption {
type = lib.types.str;
@ -163,6 +174,14 @@ in
'';
example = [ "lix" ];
};
projects = lib.mkOption {
type = lib.types.listOf lib.types.str;
description = ''
List of projects which are to check on Gerrit.
'';
example = [ "lix" ];
};
};
binaryCache = {
@ -216,6 +235,7 @@ in
extraImports = ''
from datetime import timedelta
from buildbot_nix import GerritNixConfigurator, read_secret_file, make_oauth2_method, OAuth2Config, assemble_secret_file_path
from buildbot import manhole
# TODO(raito): make me configurable from the NixOS module.
# how?
@ -257,7 +277,8 @@ in
auth_method=CustomOAuth2(${builtins.toJSON cfg.oauth2.clientId},
read_secret_file('buildbot-oauth2-secret'),
autologin=True
)
),
manhole=${debuggingManhole}
)
''
];