buildbot-nix/buildbot_nix/__init__.py

1089 lines
40 KiB
Python
Raw Normal View History

2023-09-10 08:11:56 +00:00
import json
import multiprocessing
import os
2023-09-17 20:14:56 +00:00
import sys
2024-03-10 21:27:24 +00:00
import graphlib
import base64
import random
2023-09-17 20:14:56 +00:00
from collections.abc import Generator
from dataclasses import dataclass, field
2023-09-10 08:11:56 +00:00
from pathlib import Path
2023-12-26 20:56:36 +00:00
from typing import TYPE_CHECKING, Any
2023-12-26 18:49:57 +00:00
import buildbot
2023-09-17 20:14:56 +00:00
from buildbot.configurators import ConfiguratorBase
2023-10-15 06:36:45 +00:00
from buildbot.plugins import reporters, schedulers, secrets, steps, util, worker
2023-09-10 08:11:56 +00:00
from buildbot.process import buildstep, logobserver, remotecommand
2023-09-17 20:14:56 +00:00
from buildbot.process.project import Project
from buildbot.process.properties import Properties
2023-09-10 08:11:56 +00:00
from buildbot.process.results import ALL_RESULTS, statusToString
from buildbot.www.auth import AuthBase
from buildbot.www.oauth2 import OAuth2Auth
from buildbot.changes.gerritchangesource import GerritChangeSource
2024-03-10 21:27:24 +00:00
from buildbot.reporters.utils import getURLForBuildrequest
from buildbot.reporters.generators.build import BuildStatusGenerator
from buildbot.reporters.message import MessageFormatterFunction
2024-03-10 21:27:24 +00:00
from buildbot.process.buildstep import EXCEPTION
from buildbot.process.buildstep import SUCCESS
from buildbot.process.buildstep import BuildStepFailed
from buildbot.process.results import worst_status
import requests
2023-10-27 08:49:40 +00:00
2023-12-26 20:56:36 +00:00
if TYPE_CHECKING:
from buildbot.process.log import Log
from twisted.internet import defer
2023-12-26 20:56:36 +00:00
from twisted.logger import Logger
from .binary_cache import S3BinaryCacheConfig
log = Logger()
FLAKE_TARGET_ATTRIBUTE_FOR_JOBS = "buildbotJobs"
@dataclass
class EvaluatorSettings:
supported_systems: list[str]
worker_count: int
max_memory_size: int
gc_roots_dir: str
lock: util.MasterLock
@dataclass
class NixBuilder:
protocol: str
hostName: str
maxJobs: int
speedFactor: int = 1
# without base64
publicHostKey: str | None = None
sshUser: str | None = None
sshKey: str | None = None
systems: list[str] = field(default_factory=lambda: [])
supportedFeatures: list[str] = field(default_factory=lambda: [])
mandatoryFeatures: list[str] = field(default_factory=lambda: [])
def to_nix_store(self):
fullConnection = f"{self.sshUser}@{self.hostName}" if self.sshUser is not None else self.hostName
fullConnection = f"{self.protocol}://{fullConnection}"
params = []
if self.sshKey is not None:
params.append(f"ssh-key={self.sshKey}")
if self.publicHostKey is not None:
encoded_public_key = base64.b64encode(self.publicHostKey.encode('ascii')).decode('ascii')
params.append(f"base64-ssh-public-host-key={encoded_public_key}")
if params != []:
fullConnection += "?"
fullConnection += "&".join(params)
return fullConnection
@dataclass
class OAuth2Config:
name: str
faIcon: str
resourceEndpoint: str
authUri: str
tokenUri: str
userinfoUri: str
sslVerify: bool = True
debug: bool = False
class KeycloakOAuth2Auth(OAuth2Auth):
def __init__(self, userinfoUri: str, *args, debug=False, **kwargs):
super().__init__(*args, **kwargs)
self.userinfoUri = userinfoUri
self.debug = debug
2023-09-10 08:11:56 +00:00
def createSessionFromToken(self, token):
s = requests.Session()
s.headers = {
'Authorization': 'Bearer ' + token['access_token'],
'User-Agent': f'buildbot/{buildbot.version}',
}
if self.debug:
log.info("Token obtained: {}".format(token))
s.verify = self.sslVerify
return s
def getUserInfoFromOAuthClient(self, c):
userinfo_resp = c.get(self.userinfoUri)
log.info("Userinfo request to OAuth2: {}".format(userinfo_resp.status_code))
if userinfo_resp.status_code != 200:
log.error("Userinfo failure: {}".format(userinfo_resp.headers["www-authenticate"]))
userinfo_resp.raise_for_status()
userinfo_data = userinfo_resp.json()
return {
'groups': userinfo_data['buildbot_roles']
}
def make_oauth2_method(oauth2_config: OAuth2Config):
"""
This constructs dynamically a class inheriting
an OAuth2 base configured using a dataclass.
"""
return type(f'{oauth2_config.name}DynamicOAuth2',
(KeycloakOAuth2Auth,),
oauth2_config.__dict__)
2023-12-26 20:56:36 +00:00
class BuildbotNixError(Exception):
pass
@dataclass
class GerritProject:
# `project` field.
name: str
# Private SSH key path to access Gerrit API
private_sshkey_path: str
2023-12-26 20:56:36 +00:00
@dataclass
class GerritConfig:
# Gerrit server domain
domain: str
port: int
username: str
@property
def repourl_template(self) -> str:
"""
Returns the prefix to build a repourl using that gerrit configuration.
"""
return f'ssh://{self.username}@{self.domain}:{self.port}/'
2024-03-10 21:27:24 +00:00
class BuildTrigger(steps.BuildStep):
2023-09-10 08:11:56 +00:00
def __init__(
2023-11-18 07:18:46 +00:00
self,
builds_scheduler_group: str,
2023-11-18 07:18:46 +00:00
jobs: list[dict[str, Any]],
all_deps: dict[str, Any],
2023-11-18 07:18:46 +00:00
**kwargs: Any,
2023-09-10 08:11:56 +00:00
) -> None:
self.jobs = jobs
self.all_deps = all_deps
2023-09-10 08:11:56 +00:00
self.config = None
self.builds_scheduler_group = builds_scheduler_group
self._result_list = []
self.ended = False
self.waitForFinishDeferred = None
self.brids = []
self.description = f"building {len(jobs)} jobs"
super().__init__(**kwargs)
def interrupt(self, reason):
# We cancel the buildrequests, as the data api handles
# both cases:
# - build started: stop is sent,
# - build not created yet: related buildrequests are set to CANCELLED.
# Note that there is an identified race condition though (more details
# are available at buildbot.data.buildrequests).
for brid in self.brids:
self.master.data.control(
"cancel", {'reason': 'parent build was interrupted'}, ("buildrequests", brid)
)
if self.running and not self.ended:
self.ended = True
# if we are interrupted because of a connection lost, we interrupt synchronously
if self.build.conn is None and self.waitForFinishDeferred is not None:
self.waitForFinishDeferred.cancel()
2024-03-10 21:27:24 +00:00
def getSchedulerByName(self, name):
schedulers = self.master.scheduler_manager.namedServices
if name not in schedulers:
raise ValueError(f"unknown triggered scheduler: {repr(name)}")
sch = schedulers[name]
# todo: check ITriggerableScheduler
return sch
def schedule_one(self, build_props: Properties, job):
project_name = build_props.getProperty("event.refUpdate.project") or build_props.getProperty("event.change.project")
source = f"{project_name}-eval"
2024-03-10 21:27:24 +00:00
attr = job.get("attr", "eval-error")
name = f"buildbotJobs.{attr}"
2024-03-10 21:27:24 +00:00
error = job.get("error")
props = Properties()
props.setProperty("virtual_builder_name", name, source)
props.setProperty("status_name", f"building buildbotJobs.{attr}", source)
2024-03-10 21:27:24 +00:00
props.setProperty("virtual_builder_tags", "", source)
if error is not None:
props.setProperty("error", error, source)
return (f"{self.builds_scheduler_group}-other", props)
2024-03-10 21:27:24 +00:00
drv_path = job.get("drvPath")
system = job.get("system")
out_path = job.get("outputs", {}).get("out")
build_props.setProperty(f"{attr}-out_path", out_path, source)
build_props.setProperty(f"{attr}-drv_path", drv_path, source)
props.setProperty("attr", attr, source)
props.setProperty("system", system, source)
props.setProperty("drv_path", drv_path, source)
props.setProperty("out_path", out_path, source)
props.setProperty("isCached", job.get("isCached"), source)
return (f"{self.builds_scheduler_group}-{system}", props)
2024-03-10 21:27:24 +00:00
@defer.inlineCallbacks
def _add_results(self, brid):
2024-03-10 21:27:24 +00:00
@defer.inlineCallbacks
def _is_buildrequest_complete(brid):
buildrequest = yield self.master.db.buildrequests.getBuildRequest(brid)
return buildrequest['complete']
event = ('buildrequests', str(brid), 'complete')
yield self.master.mq.waitUntilEvent(event, lambda: _is_buildrequest_complete(brid))
builds = yield self.master.db.builds.getBuilds(buildrequestid=brid)
for build in builds:
self._result_list.append(build["results"])
self.updateSummary()
def prepareSourcestampListForTrigger(self):
ss_for_trigger = {}
objs_from_build = self.build.getAllSourceStamps()
for ss in objs_from_build:
ss_for_trigger[ss.codebase] = ss.asDict()
trigger_values = [ss_for_trigger[k] for k in sorted(ss_for_trigger.keys())]
return trigger_values
2024-03-10 21:27:24 +00:00
@defer.inlineCallbacks
def run(self):
2024-03-10 23:09:27 +00:00
self.running = True
2023-09-10 08:11:56 +00:00
build_props = self.build.getProperties()
logs: Log = yield self.addLog("build info")
2023-09-10 08:11:56 +00:00
2024-03-10 21:27:24 +00:00
builds_to_schedule = list(self.jobs)
build_schedule_order = []
sorter = graphlib.TopologicalSorter(self.all_deps)
2024-03-10 21:27:24 +00:00
for item in sorter.static_order():
i = 0
while i < len(builds_to_schedule):
2024-03-10 21:27:24 +00:00
if item == builds_to_schedule[i].get("drvPath"):
build_schedule_order.append(builds_to_schedule[i])
del builds_to_schedule[i]
else:
i += 1
done = []
scheduled = []
failed = []
all_results = SUCCESS
ss_for_trigger = self.prepareSourcestampListForTrigger()
2024-03-10 23:09:27 +00:00
while not self.ended and (len(build_schedule_order) > 0 or len(scheduled) > 0):
2024-03-10 21:27:24 +00:00
schedule_now = []
for build in list(build_schedule_order):
if self.all_deps.get(build.get("drvPath"), []) == []:
2024-03-10 21:27:24 +00:00
build_schedule_order.remove(build)
schedule_now.append(build)
for job in schedule_now:
if job.get('isCached'):
logs.addStdout(f"Cached {job.get('attr')} ({job.get('drvPath')}) - skipping\n")
for dep in self.all_deps:
if job.get("drvPath") in self.all_deps[dep]:
self.all_deps[dep].remove(job.get("drvPath"))
continue
logs.addStdout(f"Scheduling {job.get('attr')} ({job.get('drvPath')})\n")
2024-03-10 21:27:24 +00:00
(scheduler, props) = self.schedule_one(build_props, job)
scheduler = self.getSchedulerByName(scheduler)
idsDeferred, resultsDeferred = scheduler.trigger(
waited_for = True,
sourcestamps = ss_for_trigger,
set_props = props,
parent_buildid = self.build.buildid,
parent_relationship = "Triggered from",
)
brids = {}
try:
_, brids = yield idsDeferred
except Exception as e:
yield self.addLogWithException(e)
results = EXCEPTION
scheduled.append((job, brids, resultsDeferred))
for brid in brids.values():
url = getURLForBuildrequest(self.master, brid)
yield self.addURL(f"{scheduler.name} #{brid}", url)
2024-03-10 21:27:24 +00:00
self._add_results(brid)
self.brids.append(brid)
if len(scheduled) == 0:
if len(build_schedule_order) == 0:
logs.addStderr('Ran out of builds\n')
break
continue
2024-03-10 21:27:24 +00:00
wait_for_next = defer.DeferredList([results for _, _, results in scheduled], fireOnOneCallback = True, fireOnOneErrback=True)
self.waitForFinishDeferred = wait_for_next
2024-03-10 21:27:24 +00:00
results, index = yield wait_for_next
job, brids, _ = scheduled[index]
done.append((job, brids, results))
del scheduled[index]
result = results[0]
logs.addStdout(f'Build {job.get("attr")} ({job.get("drvPath")}) finished, result {util.Results[result].upper()}\n')
if result != SUCCESS:
failed_checks = []
2024-03-10 23:09:27 +00:00
failed_paths = [job.get('drvPath')]
removed = []
failed.append((
job.get("attr"),
"failed",
[ getURLForBuildrequest(self.master, brid) for brid in brids.values() ]
))
while True:
old_paths = list(failed_paths)
for build in list(build_schedule_order):
deps = self.all_deps.get(build.get("drvPath"), [])
for path in old_paths:
if path in deps:
failed_checks.append(build)
failed_paths.append(build.get("drvPath"))
build_schedule_order.remove(build)
removed.append(build.get("attr"))
failed.append((build.get("attr"), f"dependency {job.get('attr')} failed", []))
break
if old_paths == failed_paths:
break
if len(removed) > 3:
yield logs.addStdout(' Skipping jobs: ' + ', '.join(removed[:3]) + f', ... ({len(removed) - 3} more)\n')
else:
yield logs.addStdout(' Skipping jobs: ' + ', '.join(removed) + '\n')
all_results = worst_status(result, all_results)
for dep in self.all_deps:
if job.get("drvPath") in self.all_deps[dep]:
self.all_deps[dep].remove(job.get("drvPath"))
yield logs.addHeader('Done!\n')
yield logs.finish()
build_props.setProperty("failed_builds", failed, "nix-eval")
2024-03-10 23:09:27 +00:00
if self.ended:
return util.CANCELLED
return all_results
2023-09-10 08:11:56 +00:00
def getCurrentSummary(self) -> dict[str, str]: # noqa: N802
2023-09-10 08:11:56 +00:00
summary = []
if self._result_list:
for status in ALL_RESULTS:
count = self._result_list.count(status)
if count:
summary.append(
2023-12-26 20:56:36 +00:00
f"{self._result_list.count(status)} {statusToString(status, count)}",
2023-09-10 08:11:56 +00:00
)
return {"step": f"({', '.join(summary)})"}
class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
2023-12-26 20:56:36 +00:00
"""Parses the output of `nix-eval-jobs` and triggers a `nix-build` build for
2023-09-10 08:11:56 +00:00
every attribute.
"""
def __init__(self, supported_systems: list[str], **kwargs: Any) -> None:
kwargs = self.setupShellMixin(kwargs)
super().__init__(**kwargs)
self.observer = logobserver.BufferLogObserver()
self.addLogObserver("stdio", self.observer)
self.supported_systems = supported_systems
@defer.inlineCallbacks
def run(self) -> Generator[Any, object, Any]:
# run nix-eval-jobs --flake .#$FLAKE_TARGET_ATTRIBUTE_FOR_JOBS to generate the dict of stages
2023-09-10 08:11:56 +00:00
cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand()
build_props = self.build.getProperties()
project_name = build_props.getProperty("event.refUpdate.project") or build_props.getProperty("event.change.project")
assert project_name is not None, "`event.refUpdate.project` or `event.change.project` is not available on the build properties, unexpected build type!"
2023-09-10 08:11:56 +00:00
yield self.runCommand(cmd)
# if the command passes extract the list of stages
result = cmd.results()
if result == util.SUCCESS:
# create a ShellCommand for each stage and add them to the build
jobs = []
for line in self.observer.getStdout().split("\n"):
if line != "":
try:
job = json.loads(line)
except json.JSONDecodeError as e:
2023-12-26 20:56:36 +00:00
msg = f"Failed to parse line: {line}"
raise BuildbotNixError(msg) from e
2023-09-10 08:11:56 +00:00
jobs.append(job)
filtered_jobs = []
for job in jobs:
system = job.get("system")
2023-12-26 20:56:36 +00:00
if not system or system in self.supported_systems: # report eval errors
2023-09-10 08:11:56 +00:00
filtered_jobs.append(job)
# Filter out failed evaluations
succeeded_jobs = [job for job in filtered_jobs if job.get('error') is None]
drv_show_log: Log = yield self.getLog("stdio")
all_deps = dict()
def closure_of(key, deps):
r, size = set([key]), 0
while len(r) != size:
size = len(r)
r.update(*[ deps[k] for k in r ])
return r.difference([key])
if succeeded_jobs:
drv_show_log.addStdout(f"getting derivation infos for valid derivations\n")
cmd = yield self.makeRemoteShellCommand(
stdioLogName=None,
collectStdout=True,
command=(
["nix", "derivation", "show", "--recursive"]
+ [ drv for drv in (job.get("drvPath") for job in succeeded_jobs) if drv ]
),
)
yield self.runCommand(cmd)
drv_show_log.addStdout(f"done\n")
try:
drv_info = json.loads(cmd.stdout)
except json.JSONDecodeError as e:
msg = f"Failed to parse `nix derivation show` output for {cmd.command}"
raise BuildbotNixError(msg) from e
for drv, info in drv_info.items():
all_deps[drv] = set(info.get("inputDrvs").keys())
job_set = set(( drv for drv in ( job.get("drvPath") for job in filtered_jobs ) if drv ))
all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set }
2023-09-10 08:11:56 +00:00
self.build.addStepsAfterCurrentStep(
2023-10-27 09:39:11 +00:00
[
BuildTrigger(
builds_scheduler_group=f"{project_name}-nix-build",
name="build derivations",
2023-11-18 07:18:46 +00:00
jobs=filtered_jobs,
all_deps=all_deps,
2023-12-26 20:56:36 +00:00
),
],
2023-09-10 08:11:56 +00:00
)
return result
def make_job_evaluator(name: str, settings: EvaluatorSettings, flake: bool) -> NixEvalCommand:
actual_command = []
if flake:
actual_command += ["--flake", f".#{FLAKE_TARGET_ATTRIBUTE_FOR_JOBS}"]
else:
actual_command += ["--expr", "import ./.ci/buildbot.nix"]
return NixEvalCommand(
env={},
name=name,
supported_systems=settings.supported_systems,
command=[
"nix-eval-jobs",
"--workers",
str(settings.worker_count),
"--max-memory-size",
str(settings.max_memory_size),
"--gc-roots-dir",
settings.gc_roots_dir,
"--force-recurse",
"--check-cache-status",
] + actual_command,
haltOnFailure=True,
locks=[settings.lock.access("exclusive")]
)
class NixConfigure(buildstep.CommandMixin, steps.BuildStep):
name = "determining jobs"
"""
Determine what `NixEvalCommand` step should be added after
based on the existence of:
- flake.nix
- .ci/buildbot.nix
"""
def __init__(self, eval_settings: EvaluatorSettings, **kwargs: Any) -> None:
self.evaluator_settings = eval_settings
super().__init__(**kwargs)
self.observer = logobserver.BufferLogObserver()
self.addLogObserver("stdio", self.observer)
@defer.inlineCallbacks
def run(self) -> Generator[Any, object, Any]:
try:
configure_log: Log = yield self.getLog("stdio")
except Exception:
configure_log: Log = yield self.addLog("stdio")
# Takes precedence.
configure_log.addStdout("checking if there's a .ci/buildbot.nix...\n")
ci_buildbot_defn_exists = yield self.pathExists('build/.ci/buildbot.nix')
if ci_buildbot_defn_exists:
configure_log.addStdout(".ci/buildbot.nix found, configured for non-flake CI\n")
self.build.addStepsAfterCurrentStep(
[
make_job_evaluator(
"evaluate `.ci/buildbot.nix` jobs",
self.evaluator_settings,
False
)
]
)
return SUCCESS
flake_exists = yield self.pathExists('build/flake.nix')
if flake_exists:
configure_log.addStdout(f"flake.nix found")
self.build.addStepsAfterCurrentStep([
make_job_evaluator(
"evaluate `flake.nix` jobs",
self.evaluator_settings,
True
)
]
)
return SUCCESS
configure_log.addStdout("neither flake.nix found neither .ci/buildbot.nix, no CI to run!")
return SUCCESS
2023-09-10 08:11:56 +00:00
class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep):
2023-12-26 20:56:36 +00:00
"""Builds a nix derivation."""
2023-09-10 08:11:56 +00:00
def __init__(self, **kwargs: Any) -> None:
kwargs = self.setupShellMixin(kwargs)
super().__init__(**kwargs)
@defer.inlineCallbacks
def run(self) -> Generator[Any, object, Any]:
if error := self.getProperty("error"):
attr = self.getProperty("attr")
# show eval error
error_log: Log = yield self.addLog("nix_error")
error_log.addStderr(f"{attr} failed to evaluate:\n{error}")
return util.FAILURE
if self.getProperty("isCached"):
yield self.addCompleteLog(
"cached outpath from previous builds",
# buildbot apparently hides the first line in the ui?
f'\n{self.getProperty("out_path")}\n')
return util.SKIPPED
2023-09-10 08:11:56 +00:00
# run `nix build`
cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand()
yield self.runCommand(cmd)
return cmd.results()
2023-09-10 08:11:56 +00:00
def nix_eval_config(
gerrit_config: GerritConfig,
project: GerritProject,
2023-09-10 08:11:56 +00:00
worker_names: list[str],
supported_systems: list[str],
eval_lock: util.MasterLock,
2023-11-12 06:07:30 +00:00
worker_count: int,
max_memory_size: int,
2023-09-10 08:11:56 +00:00
) -> util.BuilderConfig:
"""
Uses nix-eval-jobs to evaluate the entrypoint of this project.
2023-09-10 08:11:56 +00:00
For each evaluated attribute a new build pipeline is started.
"""
factory = util.BuildFactory()
gerrit_private_key = None
with open(project.private_sshkey_path, 'r') as f:
gerrit_private_key = f.read()
if gerrit_private_key is None:
raise RuntimeError('No gerrit private key to fetch the repositories')
2023-09-10 08:11:56 +00:00
# check out the source
factory.addStep(
steps.Gerrit(
repourl=f'{gerrit_config.repourl_template}/{project.name}',
mode="full",
retry=[60, 60],
timeout=3600,
sshPrivateKey=gerrit_private_key
2023-12-26 20:56:36 +00:00
),
2023-09-10 08:11:56 +00:00
)
# use one gcroots directory per worker. this should be scoped to the largest unique resource
# in charge of builds (ie, buildnumber is too narrow) to not litter the system with permanent
# gcroots in case of worker restarts.
# TODO perhaps we should clean the entire /drvs/ directory up too during startup.
2024-01-15 12:41:48 +00:00
drv_gcroots_dir = util.Interpolate(
"/nix/var/nix/gcroots/per-user/buildbot-worker/%(prop:project)s/drvs/%(prop:workername)s/",
2024-01-15 12:41:48 +00:00
)
2023-09-10 08:11:56 +00:00
eval_settings = EvaluatorSettings(
supported_systems=supported_systems,
worker_count=worker_count,
max_memory_size=max_memory_size,
gc_roots_dir=drv_gcroots_dir,
lock=eval_lock
)
# NixConfigure will choose
# how to add a NixEvalCommand job
# based on whether there's a flake.nix or
# a .ci/buildbot.nix.
2023-09-10 08:11:56 +00:00
factory.addStep(
NixConfigure(
eval_settings
)
2023-09-10 08:11:56 +00:00
)
2024-01-15 12:41:48 +00:00
factory.addStep(
steps.ShellCommand(
name="Cleanup drv paths",
command=[
"rm",
"-rf",
drv_gcroots_dir,
],
alwaysRun=True,
2024-01-15 12:41:48 +00:00
),
)
2023-09-10 08:11:56 +00:00
return util.BuilderConfig(
name=f"{project.name}/nix-eval",
workernames=worker_names,
project=project.name,
factory=factory,
properties=dict(status_name="nix-eval"),
)
def nix_build_config(
project: GerritProject,
worker_arch: str,
2023-09-10 08:11:56 +00:00
worker_names: list[str],
build_stores: list[str],
signing_keyfile: str | None = None,
binary_cache_config: S3BinaryCacheConfig | None = None
2023-09-10 08:11:56 +00:00
) -> util.BuilderConfig:
2023-12-26 20:56:36 +00:00
"""Builds one nix flake attribute."""
2023-09-10 08:11:56 +00:00
factory = util.BuildFactory()
# pick a store to run the build on
# TODO proper scheduling instead of picking the first builder
build_store = build_stores[0]
2023-09-10 08:11:56 +00:00
factory.addStep(
NixBuildCommand(
env={},
name="Build flake attr",
command=[
"nix",
"build",
"-L",
"--option",
"keep-going",
"true",
# do not build directly on the coordinator
"--max-jobs", "0",
# stop stuck builds after 20 minutes
"--max-silent-time",
str(60 * 20),
# kill builds after two hours regardless of activity
"--timeout",
"7200",
"--builders",
builders_spec,
"--store",
build_store,
"--eval-store",
"ssh-ng://localhost",
2023-09-10 08:11:56 +00:00
"--out-link",
util.Interpolate("result-%(prop:attr)s"),
util.Interpolate("%(prop:drv_path)s^*"),
],
# 3 hours, defaults to 20 minutes
# We increase this over the default since the build output might end up in a different `nix build`.
timeout=60 * 60 * 3,
2023-09-10 08:11:56 +00:00
haltOnFailure=True,
2023-12-26 20:56:36 +00:00
),
2023-09-10 08:11:56 +00:00
)
2023-10-09 15:13:46 +00:00
if signing_keyfile is not None:
factory.addStep(
steps.ShellCommand(
name="Sign the store path",
command=[
"nix",
"store",
"sign",
"--store",
build_store,
"--key-file",
signing_keyfile,
util.Interpolate(
"%(prop:drv_path)s^*"
)
]
),
)
if binary_cache_config is not None:
factory.addStep(
steps.ShellCommand(
name="Upload the store path to the cache",
command=[
"nix",
"copy",
"--store",
build_store,
"--to",
f"s3://{binary_cache_config.bucket}?profile={binary_cache_config.profile}&region={binary_cache_config.region}&endpoint={binary_cache_config.endpoint}",
util.Property(
"out_path"
)
]
)
)
2023-09-10 08:11:56 +00:00
factory.addStep(
steps.ShellCommand(
name="Register gcroot",
command=[
"nix-store",
"--add-root",
# FIXME: cleanup old build attributes
util.Interpolate(
2023-12-26 20:56:36 +00:00
"/nix/var/nix/gcroots/per-user/buildbot-worker/%(prop:project)s/%(prop:attr)s",
2023-09-10 08:11:56 +00:00
),
"-r",
util.Property("out_path"),
],
2023-11-18 07:18:46 +00:00
doStepIf=lambda s: s.getProperty("branch")
2023-10-12 10:43:57 +00:00
== s.getProperty("github.repository.default_branch"),
2023-12-26 20:56:36 +00:00
),
2023-09-10 08:11:56 +00:00
)
factory.addStep(
steps.ShellCommand(
name="Delete temporary gcroots",
command=["rm", "-f", util.Interpolate("result-%(prop:attr)s")],
2023-12-26 20:56:36 +00:00
),
2023-09-10 08:11:56 +00:00
)
2023-09-10 08:11:56 +00:00
return util.BuilderConfig(
name=f"{project.name}/nix-build/{worker_arch}",
2023-09-10 08:11:56 +00:00
project=project.name,
workernames=worker_names,
collapseRequests=False,
env={},
factory=factory,
)
2023-09-17 20:14:56 +00:00
def assemble_secret_file_path(secret_name: str) -> Path:
2023-09-17 20:14:56 +00:00
directory = os.environ.get("CREDENTIALS_DIRECTORY")
if directory is None:
print("directory not set", file=sys.stderr)
sys.exit(1)
return Path(directory).joinpath(secret_name)
2023-09-17 20:14:56 +00:00
def read_secret_file(secret_name: str) -> str:
return assemble_secret_file_path(secret_name).read_text().rstrip()
2023-09-17 20:14:56 +00:00
def config_for_project(
config: dict[str, Any],
gerrit_config: GerritConfig,
project: GerritProject,
2023-09-17 20:14:56 +00:00
worker_names: list[str],
nix_supported_systems: list[str],
2023-11-12 06:07:30 +00:00
nix_eval_worker_count: int,
2023-09-17 20:14:56 +00:00
nix_eval_max_memory_size: int,
eval_lock: util.MasterLock,
nix_builders: list[NixBuilder],
signing_keyfile: str | None = None,
binary_cache_config: S3BinaryCacheConfig | None = None
2023-09-17 20:14:56 +00:00
) -> Project:
config["projects"].append(Project(project.name))
config["schedulers"].extend(
[
# build everything pertaining to a project
# TODO(raito): will this catch also post-merge? we don't really care about that… do we?
2023-09-17 20:14:56 +00:00
schedulers.SingleBranchScheduler(
name=f"{project.name}-changes",
2023-09-17 20:14:56 +00:00
change_filter=util.ChangeFilter(
project=project.name,
2023-09-17 20:14:56 +00:00
),
builderNames=[f"{project.name}/nix-eval"],
),
# this is triggered from `nix-eval`
*(
schedulers.Triggerable(
name=f"{project.name}-nix-build-{arch}",
builderNames=[f"{project.name}/nix-build/{arch}"],
)
for arch in nix_supported_systems + [ "other" ]
2023-09-17 20:14:56 +00:00
),
# allow to manually trigger a nix-build
schedulers.ForceScheduler(
name=f"{project.name}-force",
2023-10-27 08:49:40 +00:00
builderNames=[f"{project.name}/nix-eval"],
2023-10-27 08:35:26 +00:00
properties=[
util.StringParameter(
name="project",
label="Name of the Gerrit repository.",
2023-10-27 08:49:40 +00:00
default=project.name,
2023-12-26 20:56:36 +00:00
),
2023-10-27 08:49:40 +00:00
],
2023-09-17 20:14:56 +00:00
),
2023-12-26 20:56:36 +00:00
],
2023-09-17 20:14:56 +00:00
)
2023-09-17 20:14:56 +00:00
config["builders"].extend(
[
# Since all workers run on the same machine, we only assign one of them to do the evaluation.
# This should prevent exessive memory usage.
nix_eval_config(
gerrit_config,
2023-09-17 20:14:56 +00:00
project,
[ f"{w}-other" for w in worker_names ],
2023-09-17 20:14:56 +00:00
supported_systems=nix_supported_systems,
2023-11-12 06:07:30 +00:00
worker_count=nix_eval_worker_count,
2023-09-17 20:14:56 +00:00
max_memory_size=nix_eval_max_memory_size,
eval_lock=eval_lock,
2023-09-17 20:14:56 +00:00
),
*(
nix_build_config(
project,
arch,
[ f"{w}-{arch}" for w in worker_names ],
[b.to_nix_store() for b in nix_builders if arch in b.systems or arch == "other"],
signing_keyfile=signing_keyfile,
binary_cache_config=binary_cache_config
)
for arch in nix_supported_systems + [ "other" ]
2023-09-17 20:14:56 +00:00
),
2023-12-26 20:56:36 +00:00
],
2023-09-17 20:14:56 +00:00
)
class PeriodicWithStartup(schedulers.Periodic):
def __init__(self, *args: Any, run_on_startup: bool = False, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.run_on_startup = run_on_startup
@defer.inlineCallbacks
def activate(self) -> Generator[Any, object, Any]:
if self.run_on_startup:
yield self.setState("last_build", None)
yield super().activate()
def gerritReviewFmt(url, data):
if 'build' not in data:
raise ValueError('`build` is supposed to be present to format a build')
build = data['build']
if 'builder' not in build and 'name' not in build['builder']:
raise ValueError('either `builder` or `builder.name` is not present in the build dictionary, unexpected format request')
builderName = build['builder']['name']
result = build['results']
if result == util.RETRY:
return dict()
if builderName != f'{build["properties"].get("event.project")}/nix-eval':
return dict()
failed = build['properties'].get('failed_builds', [[]])[0]
labels = {
'Verified': -1 if result != util.SUCCESS else 1,
}
message = "Buildbot finished compiling your patchset!\n"
message += "The result is: %s\n" % util.Results[result].upper()
if result != util.SUCCESS:
message += "\nFailed checks:\n"
for check, how, urls in failed:
if not urls:
message += " "
message += f" - {check}: {how}"
if urls:
message += f" (see {', '.join(urls)})"
message += "\n"
if url:
message += "\nFor more details visit:\n"
message += build['url'] + "\n"
return dict(message=message, labels=labels)
class GerritNixConfigurator(ConfiguratorBase):
2023-09-17 20:14:56 +00:00
"""Janitor is a configurator which create a Janitor Builder with all needed Janitor steps"""
def __init__(
self,
2023-12-26 20:56:36 +00:00
# Shape of this file: [ { "name": "<worker-name>", "pass": "<worker-password>", "cores": "<cpu-cores>" } ]
gerrit_server: str,
gerrit_user: str,
gerrit_port: int,
gerrit_sshkey_path: str,
projects: list[str],
2023-10-12 13:59:26 +00:00
url: str,
allowed_origins: list[str],
nix_builders: list[dict[str, Any]],
2023-09-17 20:14:56 +00:00
nix_supported_systems: list[str],
2023-11-12 06:07:30 +00:00
nix_eval_worker_count: int | None,
nix_eval_max_memory_size: int,
2023-12-26 20:56:36 +00:00
nix_workers_secret_name: str = "buildbot-nix-workers", # noqa: S107
signing_keyfile: str | None = None,
prometheus_config: dict[str, int | str] | None = None,
binary_cache_config: dict[str, str] | None = None,
auth_method: AuthBase | None = None,
manhole: Any = None,
2023-09-17 20:14:56 +00:00
) -> None:
super().__init__()
self.manhole = manhole
self.allowed_origins = allowed_origins
self.gerrit_server = gerrit_server
self.gerrit_user = gerrit_user
self.gerrit_port = gerrit_port
self.gerrit_sshkey_path = str(gerrit_sshkey_path)
self.gerrit_config = GerritConfig(domain=self.gerrit_server,
username=self.gerrit_user,
port=self.gerrit_port)
self.projects = projects
2023-09-17 20:14:56 +00:00
self.nix_workers_secret_name = nix_workers_secret_name
self.nix_eval_max_memory_size = nix_eval_max_memory_size
2023-11-12 06:07:30 +00:00
self.nix_eval_worker_count = nix_eval_worker_count
2023-09-17 20:14:56 +00:00
self.nix_supported_systems = nix_supported_systems
self.nix_builders: list[NixBuilder] = [NixBuilder(**builder_cfg) for builder_cfg in nix_builders]
self.gerrit_change_source = GerritChangeSource(gerrit_server, gerrit_user, gerritport=gerrit_port, identity_file=gerrit_sshkey_path)
2023-10-12 13:59:26 +00:00
self.url = url
self.prometheus_config = prometheus_config
if binary_cache_config is not None:
self.binary_cache_config = S3BinaryCacheConfig(**binary_cache_config)
else:
self.binary_cache_config = None
self.signing_keyfile = signing_keyfile
2023-09-17 20:14:56 +00:00
self.auth_method = auth_method
2023-09-17 20:14:56 +00:00
def configure(self, config: dict[str, Any]) -> None:
worker_config = json.loads(read_secret_file(self.nix_workers_secret_name))
worker_names = []
if self.manhole is not None:
config["manhole"] = self.manhole
config.setdefault("projects", [])
config.setdefault("secretsProviders", [])
config.setdefault("www", {
'allowed_origins': self.allowed_origins
})
2023-09-17 20:14:56 +00:00
for item in worker_config:
cores = item.get("cores", 0)
for i in range(cores):
for arch in self.nix_supported_systems + ["other"]:
worker_name = f"{item['name']}-{i:03}"
config["workers"].append(worker.Worker(f"{worker_name}-{arch}", item["pass"]))
worker_names.append(worker_name)
2023-09-17 20:14:56 +00:00
eval_lock = util.MasterLock("nix-eval")
2023-10-12 13:59:26 +00:00
for project in self.projects:
config_for_project(
config,
self.gerrit_config,
GerritProject(name=project, private_sshkey_path=self.gerrit_sshkey_path),
worker_names,
self.nix_supported_systems,
self.nix_eval_worker_count or multiprocessing.cpu_count(),
self.nix_eval_max_memory_size,
eval_lock,
self.nix_builders,
signing_keyfile=self.signing_keyfile,
binary_cache_config=self.binary_cache_config
)
2023-10-12 13:59:26 +00:00
config["change_source"] = self.gerrit_change_source
config["services"].append(
reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user,
port=self.gerrit_port,
identity_file=self.gerrit_sshkey_path,
generators=[
# gerritReviewCB / self.url
BuildStatusGenerator(
message_formatter=MessageFormatterFunction(
lambda data: gerritReviewFmt(self.url, data),
"plain",
want_properties=True,
want_steps=True
),
),
])
# startCB, summaryCB are too noisy, we won't use them.
)
if self.prometheus_config is not None:
config['services'].append(reporters.Prometheus(port=self.prometheus_config.get('port', 9100), interface=self.prometheus_config.get('address', '')))
# Upstream defaults pretend they already do something similar
# but they didn't work, hence the custom function.
2024-03-28 02:52:13 +00:00
def gerritBranchKey(b):
ref = b['branch']
if not ref.startswith('refs/changes/'):
return ref
return ref.rsplit('/', 1)[0]
2024-03-28 02:52:13 +00:00
config["services"].append(
util.OldBuildCanceller(
"build_canceller",
2024-03-28 02:52:13 +00:00
filters=[
(
[
f"{project}/nix-{kind}"
2024-03-28 02:52:13 +00:00
for kind in [ "eval" ] + [
f"build/{arch}"
for arch in self.nix_supported_systems + [ "other" ]
]
],
util.SourceStampFilter(project_eq=[project])
2024-03-28 02:52:13 +00:00
)
for project in self.projects
2024-03-28 02:52:13 +00:00
],
branch_key=gerritBranchKey
)
)
2023-09-17 20:14:56 +00:00
systemd_secrets = secrets.SecretInAFile(
2023-12-26 20:56:36 +00:00
dirname=os.environ["CREDENTIALS_DIRECTORY"],
2023-09-17 20:14:56 +00:00
)
config["secretsProviders"].append(systemd_secrets)
config["www"].setdefault("plugins", {})
if "authz" not in config["www"]:
config["www"]["authz"] = util.Authz(
allowRules=[
util.AnyEndpointMatcher(role="admin", defaultDeny=False),
util.StopBuildEndpointMatcher(role="owner"),
util.AnyControlEndpointMatcher(role="admin"),
],
roleMatchers=[
# A user must have buildbot-<something> to have the role <something>
# e.g. buildbot-admin to be admin.
util.RolesFromGroups(groupPrefix="buildbot-"),
util.RolesFromOwner(role="owner")
],
)
if "auth" not in config["www"] and self.auth_method is not None:
config["www"]["auth"] = self.auth_method