Compare commits

..

18 commits

Author SHA1 Message Date
4aa1d7e78c feat: add incoming ref data to non-flakes entrypoint
We can now implement a Nix library for Buildbot CI. :)

We dump it into a file, it's better to pass large stuff and easier to
escape things.

Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 22:49:00 +02:00
b3a0b5a69e fix(auth): remove userinfoUri as a positional argument
This broke the authentication, because we were expecting the client ID &
client secret.

Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:43:45 +02:00
77d0ed37d1 fix(args): pass the right string
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
79bcbba46d fix(buildbot-name): nix_configure → determining jobs
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
1484a875dc fix(ret): return success and write \n
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
c824b084e8 fix(steps): add *steps* not *step*
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
2a2a2793e4 fix(workdir): rebase in build/ for ShellMixin
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
97dee6cfec feat(nix-configure): name it
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
dfb9595f7d fix(log): add the stdio log if it doesn't exist
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
c5dea04717 feat: support non-flake entrypoint
The way to use Buildbot jobs without Flakes is via `.ci/buildbot.nix`.

Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
1711bfd840 fix(eval): event.change.project is also a buildprop for project name
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
2d51fda98e feat(eval): in case of total failure, do not derivation show
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
af13c5948e fix(eval): event.refUpdate.project instead of event.project
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
253e44646d fix(properties): use getProperty
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
bbac51e09e fix(gerrit): pass properly the ssh private key and not its path
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
83ac74f18f fix(gerrit): repourl was not formatted
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
2411875825 fix(sshkey): PosixPath does not play well with Buildbot APIs
It expected a `str`, not a `PosixPath`.

Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 21:21:26 +02:00
fa83000f07 feat(debug): add manhole debugging
Signed-off-by: Raito Bezarius <raito@lix.systems>
2024-10-05 20:52:52 +02:00
2 changed files with 182 additions and 61 deletions

View file

@ -8,6 +8,7 @@ from collections.abc import Generator
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any
import tempfile
import buildbot
from buildbot.configurators import ConfiguratorBase
@ -24,6 +25,7 @@ from buildbot.reporters.generators.build import BuildStatusGenerator
from buildbot.reporters.message import MessageFormatterFunction
from buildbot.process.buildstep import EXCEPTION
from buildbot.process.buildstep import SUCCESS
from buildbot.process.buildstep import BuildStepFailed
from buildbot.process.results import worst_status
import requests
@ -39,6 +41,14 @@ log = Logger()
FLAKE_TARGET_ATTRIBUTE_FOR_JOBS = "buildbotJobs"
@dataclass
class EvaluatorSettings:
supported_systems: list[str]
worker_count: int
max_memory_size: int
gc_roots_dir: str
lock: util.MasterLock
@dataclass
class NixBuilder:
protocol: str
@ -71,9 +81,8 @@ class OAuth2Config:
debug: bool = False
class KeycloakOAuth2Auth(OAuth2Auth):
def __init__(self, userinfoUri: str, *args, debug=False, **kwargs):
def __init__(self, *args, debug=False, **kwargs):
super().__init__(*args, **kwargs)
self.userinfoUri = userinfoUri
self.debug = debug
def createSessionFromToken(self, token):
@ -130,7 +139,7 @@ class GerritConfig:
"""
Returns the prefix to build a repourl using that gerrit configuration.
"""
return 'ssh://{self.username}@{self.domain}:{self.port}/'
return f'ssh://{self.username}@{self.domain}:{self.port}/'
class BuildTrigger(steps.BuildStep):
def __init__(
@ -148,7 +157,7 @@ class BuildTrigger(steps.BuildStep):
self.ended = False
self.waitForFinishDeferred = None
self.brids = []
self.description = f"building {len(jobs)} hydra jobs"
self.description = f"building {len(jobs)} jobs"
super().__init__(**kwargs)
def interrupt(self, reason):
@ -177,15 +186,14 @@ class BuildTrigger(steps.BuildStep):
return sch
def schedule_one(self, build_props: Properties, job):
project_name = build_props.getProperty('event.project')
source = f"{project_name}-eval-lix"
project_name = build_props.getProperty("event.refUpdate.project") or build_props.getProperty("event.change.project")
source = f"{project_name}-eval"
attr = job.get("attr", "eval-error")
name = attr
name = f"{FLAKE_TARGET_ATTRIBUTE_FOR_JOBS}.{name}"
name = f"buildbotJobs.{attr}"
error = job.get("error")
props = Properties()
props.setProperty("virtual_builder_name", name, source)
props.setProperty("status_name", f"nix-build .#{FLAKE_TARGET_ATTRIBUTE_FOR_JOBS}.{attr}", source)
props.setProperty("status_name", f"building buildbotJobs.{attr}", source)
props.setProperty("virtual_builder_tags", "", source)
if error is not None:
@ -372,7 +380,8 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
# run nix-eval-jobs --flake .#$FLAKE_TARGET_ATTRIBUTE_FOR_JOBS to generate the dict of stages
cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand()
build_props = self.build.getProperties()
project_name = build_props.get('event.project')
project_name = build_props.getProperty("event.refUpdate.project") or build_props.getProperty("event.change.project")
assert project_name is not None, "`event.refUpdate.project` or `event.change.project` is not available on the build properties, unexpected build type!"
yield self.runCommand(cmd)
@ -396,26 +405,11 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
if not system or system in self.supported_systems: # report eval errors
filtered_jobs.append(job)
# Filter out failed evaluations
succeeded_jobs = [job for job in filtered_jobs if job.get('error') is None]
drv_show_log: Log = yield self.getLog("stdio")
drv_show_log.addStdout(f"getting derivation infos\n")
cmd = yield self.makeRemoteShellCommand(
stdioLogName=None,
collectStdout=True,
command=(
["nix", "derivation", "show", "--recursive"]
+ [ drv for drv in (job.get("drvPath") for job in filtered_jobs) if drv ]
),
)
yield self.runCommand(cmd)
drv_show_log.addStdout(f"done\n")
try:
drv_info = json.loads(cmd.stdout)
except json.JSONDecodeError as e:
msg = f"Failed to parse `nix derivation show` output for {cmd.command}"
raise BuildbotNixError(msg) from e
all_deps = dict()
for drv, info in drv_info.items():
all_deps[drv] = set(info.get("inputDrvs").keys())
def closure_of(key, deps):
r, size = set([key]), 0
@ -424,14 +418,34 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
r.update(*[ deps[k] for k in r ])
return r.difference([key])
job_set = set(( drv for drv in ( job.get("drvPath") for job in filtered_jobs ) if drv ))
all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set }
if succeeded_jobs:
drv_show_log.addStdout(f"getting derivation infos for valid derivations\n")
cmd = yield self.makeRemoteShellCommand(
stdioLogName=None,
collectStdout=True,
command=(
["nix", "derivation", "show", "--recursive"]
+ [ drv for drv in (job.get("drvPath") for job in succeeded_jobs) if drv ]
),
)
yield self.runCommand(cmd)
drv_show_log.addStdout(f"done\n")
try:
drv_info = json.loads(cmd.stdout)
except json.JSONDecodeError as e:
msg = f"Failed to parse `nix derivation show` output for {cmd.command}"
raise BuildbotNixError(msg) from e
for drv, info in drv_info.items():
all_deps[drv] = set(info.get("inputDrvs").keys())
job_set = set(( drv for drv in ( job.get("drvPath") for job in filtered_jobs ) if drv ))
all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set }
self.build.addStepsAfterCurrentStep(
[
BuildTrigger(
builds_scheduler_group=f"{project_name}-nix-build",
name="build flake",
name="build derivations",
jobs=filtered_jobs,
all_deps=all_deps,
),
@ -440,6 +454,91 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
return result
def make_job_evaluator(name: str, settings: EvaluatorSettings, flake: bool, incoming_ref_filename: str) -> NixEvalCommand:
actual_command = []
if flake:
actual_command += ["--flake", f".#{FLAKE_TARGET_ATTRIBUTE_FOR_JOBS}"]
else:
actual_command += ["--expr",
f"import ./.ci/buildbot.nix {{ incoming_ref_data = builtins.fromJSON (builtins.readFile {incoming_ref_filename}); }}"]
return NixEvalCommand(
env={},
name=name,
supported_systems=settings.supported_systems,
command=[
"nix-eval-jobs",
"--workers",
str(settings.worker_count),
"--max-memory-size",
str(settings.max_memory_size),
"--gc-roots-dir",
settings.gc_roots_dir,
"--force-recurse",
"--check-cache-status",
] + actual_command,
haltOnFailure=True,
locks=[settings.lock.access("exclusive")]
)
class NixConfigure(buildstep.CommandMixin, steps.BuildStep):
name = "determining jobs"
"""
Determine what `NixEvalCommand` step should be added after
based on the existence of:
- flake.nix
- .ci/buildbot.nix
"""
def __init__(self, eval_settings: EvaluatorSettings, **kwargs: Any) -> None:
self.evaluator_settings = eval_settings
super().__init__(**kwargs)
self.observer = logobserver.BufferLogObserver()
self.addLogObserver("stdio", self.observer)
@defer.inlineCallbacks
def run(self) -> Generator[Any, object, Any]:
try:
configure_log: Log = yield self.getLog("stdio")
except Exception:
configure_log: Log = yield self.addLog("stdio")
# Takes precedence.
configure_log.addStdout("checking if there's a .ci/buildbot.nix...\n")
ci_buildbot_defn_exists = yield self.pathExists('build/.ci/buildbot.nix')
if ci_buildbot_defn_exists:
configure_log.addStdout(".ci/buildbot.nix found, configured for non-flake CI\n")
self.build.addStepsAfterCurrentStep(
[
make_job_evaluator(
"evaluate `.ci/buildbot.nix` jobs",
self.evaluator_settings,
False,
"./incoming-ref.json"
)
]
)
return SUCCESS
flake_exists = yield self.pathExists('build/flake.nix')
if flake_exists:
configure_log.addStdout(f"flake.nix found")
self.build.addStepsAfterCurrentStep([
make_job_evaluator(
"evaluate `flake.nix` jobs",
self.evaluator_settings,
True,
"./incoming-ref.json"
)
]
)
return SUCCESS
configure_log.addStdout("neither flake.nix found neither .ci/buildbot.nix, no CI to run!")
return SUCCESS
class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep):
"""Builds a nix derivation."""
@ -481,10 +580,19 @@ def nix_eval_config(
worker_count: int,
max_memory_size: int,
) -> util.BuilderConfig:
"""Uses nix-eval-jobs to evaluate $FLAKE_TARGET_ATTRIBUTE_FOR_JOBS (`.#hydraJobs` by default) from flake.nix in parallel.
"""
Uses nix-eval-jobs to evaluate the entrypoint of this project.
For each evaluated attribute a new build pipeline is started.
"""
factory = util.BuildFactory()
gerrit_private_key = None
with open(project.private_sshkey_path, 'r') as f:
gerrit_private_key = f.read()
if gerrit_private_key is None:
raise RuntimeError('No gerrit private key to fetch the repositories')
# check out the source
factory.addStep(
steps.Gerrit(
@ -492,9 +600,10 @@ def nix_eval_config(
mode="full",
retry=[60, 60],
timeout=3600,
sshPrivateKey=project.private_sshkey_path
sshPrivateKey=gerrit_private_key
),
)
# use one gcroots directory per worker. this should be scoped to the largest unique resource
# in charge of builds (ie, buildnumber is too narrow) to not litter the system with permanent
# gcroots in case of worker restarts.
@ -503,27 +612,27 @@ def nix_eval_config(
"/nix/var/nix/gcroots/per-user/buildbot-worker/%(prop:project)s/drvs/%(prop:workername)s/",
)
eval_settings = EvaluatorSettings(
supported_systems=supported_systems,
worker_count=worker_count,
max_memory_size=max_memory_size,
gc_roots_dir=drv_gcroots_dir,
lock=eval_lock
)
# This information can be passed at job evaluation time
# to skip some jobs, e.g. expensive jobs, etc.
# Transfer incoming ref data to the target.
factory.addStep(steps.JSONPropertiesDownload(workerdest="incoming-ref.json"))
# NixConfigure will choose
# how to add a NixEvalCommand job
# based on whether there's a flake.nix or
# a .ci/buildbot.nix.
factory.addStep(
NixEvalCommand(
env={},
name="evaluate flake",
supported_systems=supported_systems,
command=[
"nix-eval-jobs",
"--workers",
str(worker_count),
"--max-memory-size",
str(max_memory_size),
"--gc-roots-dir",
drv_gcroots_dir,
"--force-recurse",
"--check-cache-status",
"--flake",
f".#{FLAKE_TARGET_ATTRIBUTE_FOR_JOBS}"
],
haltOnFailure=True,
locks=[eval_lock.access("exclusive")],
),
NixConfigure(
eval_settings
)
)
factory.addStep(
@ -712,12 +821,6 @@ def config_for_project(
),
],
)
gerrit_private_key = None
with open(project.private_sshkey_path, 'r') as f:
gerrit_private_key = f.read()
if gerrit_private_key is None:
raise RuntimeError('No gerrit private key to fetch the repositories')
config["builders"].extend(
[
@ -824,13 +927,15 @@ class GerritNixConfigurator(ConfiguratorBase):
prometheus_config: dict[str, int | str] | None = None,
binary_cache_config: dict[str, str] | None = None,
auth_method: AuthBase | None = None,
manhole: Any = None,
) -> None:
super().__init__()
self.manhole = manhole
self.allowed_origins = allowed_origins
self.gerrit_server = gerrit_server
self.gerrit_user = gerrit_user
self.gerrit_port = gerrit_port
self.gerrit_sshkey_path = gerrit_sshkey_path
self.gerrit_sshkey_path = str(gerrit_sshkey_path)
self.gerrit_config = GerritConfig(domain=self.gerrit_server,
username=self.gerrit_user,
port=self.gerrit_port)
@ -860,6 +965,9 @@ class GerritNixConfigurator(ConfiguratorBase):
worker_config = json.loads(read_secret_file(self.nix_workers_secret_name))
worker_names = []
if self.manhole is not None:
config["manhole"] = self.manhole
config.setdefault("projects", [])
config.setdefault("secretsProviders", [])
config.setdefault("www", {

View file

@ -7,6 +7,9 @@
let
inherit (lib) filterAttrs;
cfg = config.services.buildbot-nix.coordinator;
debuggingManhole = if cfg.debugging.enable then
"manhole.TelnetManhole(${toString cfg.debugging.port}, 'admin', 'admin')"
else "None";
in
{
options = {
@ -28,6 +31,14 @@ in
description = "List of local remote builders machines associated to that Buildbot instance";
};
debugging = {
enable = lib.mkEnableOption "manhole's buildbot debugging on localhost using `admin:admin`";
port = lib.mkOption {
type = lib.types.port;
default = 15000;
};
};
oauth2 = {
name = lib.mkOption {
type = lib.types.str;
@ -216,6 +227,7 @@ in
extraImports = ''
from datetime import timedelta
from buildbot_nix import GerritNixConfigurator, read_secret_file, make_oauth2_method, OAuth2Config, assemble_secret_file_path
from buildbot import manhole
# TODO(raito): make me configurable from the NixOS module.
# how?
@ -257,7 +269,8 @@ in
auth_method=CustomOAuth2(${builtins.toJSON cfg.oauth2.clientId},
read_secret_file('buildbot-oauth2-secret'),
autologin=True
)
),
manhole=${debuggingManhole}
)
''
];