diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 3f5fd19..da676a1 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -1,9 +1,8 @@ import json import multiprocessing import os -import signal import sys -import uuid +import graphlib from collections import defaultdict from collections.abc import Generator from dataclasses import dataclass @@ -19,6 +18,15 @@ from buildbot.process.results import ALL_RESULTS, statusToString from buildbot.steps.trigger import Trigger from buildbot.util import asyncSleep from buildbot.www.authz.endpointmatchers import EndpointMatcherBase, Match +from buildbot.www.oauth2 import OAuth2Auth +from buildbot.changes.gerritchangesource import GerritChangeSource +from buildbot.reporters.utils import getURLForBuild +from buildbot.reporters.utils import getURLForBuildrequest +from buildbot.process.buildstep import CANCELLED +from buildbot.process.buildstep import EXCEPTION +from buildbot.process.buildstep import SUCCESS +from buildbot.process.results import worst_status +from buildbot_nix.binary_cache import LocalSigner if TYPE_CHECKING: from buildbot.process.log import Log @@ -27,107 +35,242 @@ from twisted.internet import defer, threads from twisted.logger import Logger from twisted.python.failure import Failure +from .binary_cache import S3BinaryCacheConfig + from .github_projects import ( - GithubProject, - create_project_hook, - load_projects, - refresh_projects, slugify_project_name, ) -SKIPPED_BUILDER_NAME = "skipped-builds" - log = Logger() +class LixSystemsOAuth2(OAuth2Auth): + name = 'Lix' + faIcon = 'fa-login' + resourceEndpoint = "https://identity.lix.systems" + # is passing scope necessary? + authUri = 'https://identity.lix.systems/realms/lix-project/protocol/openid-connect/auth' + tokenUri = 'https://identity.lix.systems/realms/lix-project/protocol/openid-connect/token' class BuildbotNixError(Exception): pass +@dataclass +class GerritProject: + # `project` field. + name: str -class BuildTrigger(Trigger): - """Dynamic trigger that creates a build for every attribute.""" - +class BuildTrigger(steps.BuildStep): def __init__( self, - builds_scheduler: str, - skipped_builds_scheduler: str, + builds_scheduler_group: str, jobs: list[dict[str, Any]], + all_deps: dict[str, Any], **kwargs: Any, ) -> None: - if "name" not in kwargs: - kwargs["name"] = "trigger" self.jobs = jobs + self.all_deps = all_deps self.config = None - self.builds_scheduler = builds_scheduler - self.skipped_builds_scheduler = skipped_builds_scheduler - Trigger.__init__( - self, - waitForFinish=True, - schedulerNames=[builds_scheduler, skipped_builds_scheduler], - haltOnFailure=True, - flunkOnFailure=True, - sourceStamps=[], - alwaysUseLatest=False, - updateSourceStamp=False, - **kwargs, - ) + self.builds_scheduler_group = builds_scheduler_group + self._result_list = [] + self.ended = False + self.waitForFinishDeferred = None + self.brids = [] + self.description = f"building {len(jobs)} hydra jobs" + super().__init__(**kwargs) - def createTriggerProperties(self, props: Any) -> Any: # noqa: N802 - return props + def interrupt(self, reason): + # We cancel the buildrequests, as the data api handles + # both cases: + # - build started: stop is sent, + # - build not created yet: related buildrequests are set to CANCELLED. + # Note that there is an identified race condition though (more details + # are available at buildbot.data.buildrequests). + for brid in self.brids: + self.master.data.control( + "cancel", {'reason': 'parent build was interrupted'}, ("buildrequests", brid) + ) + if self.running and not self.ended: + self.ended = True + # if we are interrupted because of a connection lost, we interrupt synchronously + if self.build.conn is None and self.waitForFinishDeferred is not None: + self.waitForFinishDeferred.cancel() - def getSchedulersAndProperties(self) -> list[tuple[str, Properties]]: # noqa: N802 + def getSchedulerByName(self, name): + schedulers = self.master.scheduler_manager.namedServices + if name not in schedulers: + raise ValueError(f"unknown triggered scheduler: {repr(name)}") + sch = schedulers[name] + # todo: check ITriggerableScheduler + return sch + + def schedule_one(self, build_props, job): + source = f"nix-eval-lix" + attr = job.get("attr", "eval-error") + name = attr + name = f"hydraJobs.{name}" + error = job.get("error") + props = Properties() + props.setProperty("virtual_builder_name", name, source) + props.setProperty("status_name", f"nix-build .#hydraJobs.{attr}", source) + props.setProperty("virtual_builder_tags", "", source) + + if error is not None: + props.setProperty("error", error, source) + return (f"{self.builds_scheduler_group}-other", props) + + drv_path = job.get("drvPath") + system = job.get("system") + out_path = job.get("outputs", {}).get("out") + + build_props.setProperty(f"{attr}-out_path", out_path, source) + build_props.setProperty(f"{attr}-drv_path", drv_path, source) + + props.setProperty("attr", attr, source) + props.setProperty("system", system, source) + props.setProperty("drv_path", drv_path, source) + props.setProperty("out_path", out_path, source) + props.setProperty("isCached", job.get("isCached"), source) + + return (f"{self.builds_scheduler_group}-{system}", props) + + @defer.inlineCallbacks + def _add_results(self, brid): + @defer.inlineCallbacks + def _is_buildrequest_complete(brid): + buildrequest = yield self.master.db.buildrequests.getBuildRequest(brid) + return buildrequest['complete'] + + event = ('buildrequests', str(brid), 'complete') + yield self.master.mq.waitUntilEvent(event, lambda: _is_buildrequest_complete(brid)) + builds = yield self.master.db.builds.getBuilds(buildrequestid=brid) + for build in builds: + self._result_list.append(build["results"]) + self.updateSummary() + + def prepareSourcestampListForTrigger(self): + ss_for_trigger = {} + objs_from_build = self.build.getAllSourceStamps() + for ss in objs_from_build: + ss_for_trigger[ss.codebase] = ss.asDict() + + trigger_values = [ss_for_trigger[k] for k in sorted(ss_for_trigger.keys())] + return trigger_values + + @defer.inlineCallbacks + def run(self): + self.running = True build_props = self.build.getProperties() - repo_name = build_props.getProperty( - "github.base.repo.full_name", - build_props.getProperty("github.repository.full_name"), - ) - project_id = slugify_project_name(repo_name) - source = f"nix-eval-{project_id}" + source = f"nix-eval-lix" + logs: Log = yield self.addLog("build info") - triggered_schedulers = [] - for job in self.jobs: - attr = job.get("attr", "eval-error") - name = attr - if repo_name is not None: - name = f"github:{repo_name}#checks.{name}" - else: - name = f"checks.{name}" - error = job.get("error") - props = Properties() - props.setProperty("virtual_builder_name", name, source) - props.setProperty("status_name", f"nix-build .#checks.{attr}", source) - props.setProperty("virtual_builder_tags", "", source) + builds_to_schedule = list(self.jobs) + build_schedule_order = [] + sorter = graphlib.TopologicalSorter(self.all_deps) + for item in sorter.static_order(): + i = 0 + while i < len(builds_to_schedule): + if item == builds_to_schedule[i].get("drvPath"): + build_schedule_order.append(builds_to_schedule[i]) + del builds_to_schedule[i] + else: + i += 1 - if error is not None: - props.setProperty("error", error, source) - triggered_schedulers.append((self.skipped_builds_scheduler, props)) + done = [] + scheduled = [] + failed = [] + all_results = SUCCESS + ss_for_trigger = self.prepareSourcestampListForTrigger() + while not self.ended and (len(build_schedule_order) > 0 or len(scheduled) > 0): + schedule_now = [] + for build in list(build_schedule_order): + if self.all_deps.get(build.get("drvPath"), []) == []: + build_schedule_order.remove(build) + schedule_now.append(build) + for job in schedule_now: + if job.get('isCached'): + logs.addStdout(f"Cached {job.get('attr')} ({job.get('drvPath')}) - skipping\n") + for dep in self.all_deps: + if job.get("drvPath") in self.all_deps[dep]: + self.all_deps[dep].remove(job.get("drvPath")) + continue + logs.addStdout(f"Scheduling {job.get('attr')} ({job.get('drvPath')})\n") + (scheduler, props) = self.schedule_one(build_props, job) + scheduler = self.getSchedulerByName(scheduler) + + idsDeferred, resultsDeferred = scheduler.trigger( + waited_for = True, + sourcestamps = ss_for_trigger, + set_props = props, + parent_buildid = self.build.buildid, + parent_relationship = "Triggered from", + ) + + brids = {} + try: + _, brids = yield idsDeferred + except Exception as e: + yield self.addLogWithException(e) + results = EXCEPTION + scheduled.append((job, brids, resultsDeferred)) + + for brid in brids.values(): + url = getURLForBuildrequest(self.master, brid) + yield self.addURL(f"{scheduler.name} #{brid}", url) + self._add_results(brid) + self.brids.append(brid) + if len(scheduled) == 0: + if len(build_schedule_order) == 0: + logs.addStderr('Ran out of builds\n') + break continue + wait_for_next = defer.DeferredList([results for _, _, results in scheduled], fireOnOneCallback = True, fireOnOneErrback=True) + self.waitForFinishDeferred = wait_for_next + results, index = yield wait_for_next + job, brids, _ = scheduled[index] + done.append((job, brids, results)) + del scheduled[index] + result = results[0] + logs.addStdout(f'Build {job.get("attr")} ({job.get("drvPath")}) finished, result {util.Results[result].upper()}\n') + if result != SUCCESS: + failed_checks = [] + failed_paths = [job.get('drvPath')] + removed = [] + failed.append(( + job.get("attr"), + "failed", + [ getURLForBuildrequest(self.master, brid) for brid in brids.values() ] + )) + while True: + old_paths = list(failed_paths) + for build in list(build_schedule_order): + deps = self.all_deps.get(build.get("drvPath"), []) + for path in old_paths: + if path in deps: + failed_checks.append(build) + failed_paths.append(build.get("drvPath")) + build_schedule_order.remove(build) + removed.append(build.get("attr")) + failed.append((build.get("attr"), f"dependency {job.get('attr')} failed", [])) - if job.get("isCached"): - triggered_schedulers.append((self.skipped_builds_scheduler, props)) - continue - - drv_path = job.get("drvPath") - system = job.get("system") - out_path = job.get("outputs", {}).get("out") - - build_props.setProperty(f"{attr}-out_path", out_path, source) - build_props.setProperty(f"{attr}-drv_path", drv_path, source) - - props.setProperty("attr", attr, source) - props.setProperty("system", system, source) - props.setProperty("drv_path", drv_path, source) - props.setProperty("out_path", out_path, source) - # we use this to identify builds when running a retry - props.setProperty("build_uuid", str(uuid.uuid4()), source) - - triggered_schedulers.append((self.builds_scheduler, props)) - return triggered_schedulers + break + if old_paths == failed_paths: + break + if len(removed) > 3: + yield logs.addStdout(' Skipping jobs: ' + ', '.join(removed[:3]) + f', ... ({len(removed) - 3} more)\n') + else: + yield logs.addStdout(' Skipping jobs: ' + ', '.join(removed) + '\n') + all_results = worst_status(result, all_results) + for dep in self.all_deps: + if job.get("drvPath") in self.all_deps[dep]: + self.all_deps[dep].remove(job.get("drvPath")) + yield logs.addHeader('Done!\n') + yield logs.finish() + build_props.setProperty("failed_builds", failed, "nix-eval-lix") + if self.ended: + return util.CANCELLED + return all_results def getCurrentSummary(self) -> dict[str, str]: # noqa: N802 - """The original build trigger will the generic builder name `nix-build` in this case, which is not helpful""" - if not self.triggeredNames: - return {"step": "running"} summary = [] if self._result_list: for status in ALL_RESULTS: @@ -153,7 +296,7 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): @defer.inlineCallbacks def run(self) -> Generator[Any, object, Any]: - # run nix-eval-jobs --flake .#checks to generate the dict of stages + # run nix-eval-jobs --flake .#hydraJobs to generate the dict of stages cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand() yield self.runCommand(cmd) @@ -172,24 +315,48 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): raise BuildbotNixError(msg) from e jobs.append(job) build_props = self.build.getProperties() - repo_name = build_props.getProperty( - "github.base.repo.full_name", - build_props.getProperty("github.repository.full_name"), - ) - project_id = slugify_project_name(repo_name) filtered_jobs = [] for job in jobs: system = job.get("system") if not system or system in self.supported_systems: # report eval errors filtered_jobs.append(job) + drv_show_log: Log = yield self.getLog("stdio") + drv_show_log.addStdout(f"getting derivation infos\n") + cmd = yield self.makeRemoteShellCommand( + stdioLogName=None, + collectStdout=True, + command=( + ["nix", "derivation", "show", "--recursive"] + + [ drv for drv in (job.get("drvPath") for job in filtered_jobs) if drv ] + ), + ) + yield self.runCommand(cmd) + drv_show_log.addStdout(f"done\n") + try: + drv_info = json.loads(cmd.stdout) + except json.JSONDecodeError as e: + msg = f"Failed to parse `nix derivation show` output for {cmd.command}" + raise BuildbotNixError(msg) from e + all_deps = dict() + for drv, info in drv_info.items(): + all_deps[drv] = set(info.get("inputDrvs").keys()) + def closure_of(key, deps): + r, size = set([key]), 0 + while len(r) != size: + size = len(r) + r.update(*[ deps[k] for k in r ]) + return r.difference([key]) + job_set = set(( drv for drv in ( job.get("drvPath") for job in filtered_jobs ) if drv )) + all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set } + self.build.addStepsAfterCurrentStep( [ BuildTrigger( - builds_scheduler=f"{project_id}-nix-build", - skipped_builds_scheduler=f"{project_id}-nix-skipped-build", + builds_scheduler_group=f"lix-nix-build", name="build flake", jobs=filtered_jobs, + all_deps=all_deps, ), ], ) @@ -197,37 +364,6 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): return result -# FIXME this leaks memory... but probably not enough that we care -class RetryCounter: - def __init__(self, retries: int) -> None: - self.builds: dict[uuid.UUID, int] = defaultdict(lambda: retries) - - def retry_build(self, build_id: uuid.UUID) -> int: - retries = self.builds[build_id] - if retries > 1: - self.builds[build_id] = retries - 1 - return retries - return 0 - - -# For now we limit this to two. Often this allows us to make the error log -# shorter because we won't see the logs for all previous succeeded builds -RETRY_COUNTER = RetryCounter(retries=2) - - -class EvalErrorStep(steps.BuildStep): - """Shows the error message of a failed evaluation.""" - - @defer.inlineCallbacks - def run(self) -> Generator[Any, object, Any]: - error = self.getProperty("error") - attr = self.getProperty("attr") - # show eval error - error_log: Log = yield self.addLog("nix_error") - error_log.addStderr(f"{attr} failed to evaluate:\n{error}") - return util.FAILURE - - class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep): """Builds a nix derivation.""" @@ -237,16 +373,25 @@ class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep): @defer.inlineCallbacks def run(self) -> Generator[Any, object, Any]: + if error := self.getProperty("error"): + attr = self.getProperty("attr") + # show eval error + error_log: Log = yield self.addLog("nix_error") + error_log.addStderr(f"{attr} failed to evaluate:\n{error}") + return util.FAILURE + + if self.getProperty("isCached"): + yield self.addCompleteLog( + "cached outpath from previous builds", + # buildbot apparently hides the first line in the ui? + f'\n{self.getProperty("out_path")}\n') + return util.SKIPPED + # run `nix build` cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand() yield self.runCommand(cmd) - res = cmd.results() - if res == util.FAILURE: - retries = RETRY_COUNTER.retry_build(self.getProperty("build_uuid")) - if retries > 0: - return util.RETRY - return res + return cmd.results() class UpdateBuildOutput(steps.BuildStep): @@ -274,88 +419,10 @@ class UpdateBuildOutput(steps.BuildStep): return util.SUCCESS -class ReloadGithubProjects(steps.BuildStep): - name = "reload_github_projects" - - def __init__(self, token: str, project_cache_file: Path, **kwargs: Any) -> None: - self.token = token - self.project_cache_file = project_cache_file - super().__init__(**kwargs) - - def reload_projects(self) -> None: - refresh_projects(self.token, self.project_cache_file) - - @defer.inlineCallbacks - def run(self) -> Generator[Any, object, Any]: - d = threads.deferToThread(self.reload_projects) # type: ignore[no-untyped-call] - - self.error_msg = "" - - def error_cb(failure: Failure) -> int: - self.error_msg += failure.getTraceback() - return util.FAILURE - - d.addCallbacks(lambda _: util.SUCCESS, error_cb) - res = yield d - if res == util.SUCCESS: - # reload the buildbot config - os.kill(os.getpid(), signal.SIGHUP) - return util.SUCCESS - else: - log: Log = yield self.addLog("log") - log.addStderr(f"Failed to reload project list: {self.error_msg}") - return util.FAILURE - - -def reload_github_projects( - worker_names: list[str], - github_token_secret: str, - project_cache_file: Path, -) -> util.BuilderConfig: - """Updates the flake an opens a PR for it.""" - factory = util.BuildFactory() - factory.addStep( - ReloadGithubProjects( - github_token_secret, project_cache_file=project_cache_file - ), - ) - return util.BuilderConfig( - name="reload-github-projects", - workernames=worker_names, - factory=factory, - ) - - -# The builtin retry mechanism doesn't seem to work for github, -# since github is sometimes not delivering the pull request ref fast enough. -class GitWithRetry(steps.Git): - @defer.inlineCallbacks - def run_vc( - self, - branch: str, - revision: str, - patch: str, - ) -> Generator[Any, object, Any]: - retry_counter = 0 - while True: - try: - res = yield super().run_vc(branch, revision, patch) - except Exception as e: # noqa: BLE001 - retry_counter += 1 - if retry_counter == 3: - msg = "Failed to clone" - raise BuildbotNixError(msg) from e - log: Log = yield self.addLog("log") - yield log.addStderr(f"Retrying git clone (error: {e})\n") - yield asyncSleep(2 << retry_counter) # 2, 4, 8 - else: - return res - - def nix_eval_config( - project: GithubProject, + project: GerritProject, + gerrit_private_key: str, worker_names: list[str], - github_token_secret: str, supported_systems: list[str], eval_lock: util.MasterLock, worker_count: int, @@ -366,19 +433,21 @@ def nix_eval_config( """ factory = util.BuildFactory() # check out the source - url_with_secret = util.Interpolate( - f"https://git:%(secret:{github_token_secret})s@github.com/%(prop:project)s", - ) factory.addStep( - GitWithRetry( - repourl=url_with_secret, - method="clean", - submodules=True, - haltOnFailure=True, + steps.Gerrit( + repourl="ssh://buildbot@gerrit.lix.systems:2022/lix", + mode="full", + retry=[60, 60], + timeout=3600, + sshPrivateKey=gerrit_private_key ), ) + # use one gcroots directory per worker. this should be scoped to the largest unique resource + # in charge of builds (ie, buildnumber is too narrow) to not litter the system with permanent + # gcroots in case of worker restarts. + # TODO perhaps we should clean the entire /drvs/ directory up too during startup. drv_gcroots_dir = util.Interpolate( - "/nix/var/nix/gcroots/per-user/buildbot-worker/%(prop:project)s/drvs/", + "/nix/var/nix/gcroots/per-user/buildbot-worker/%(prop:project)s/drvs/%(prop:workername)s/", ) factory.addStep( @@ -400,7 +469,7 @@ def nix_eval_config( "--force-recurse", "--check-cache-status", "--flake", - ".#checks", + ".#hydraJobs", ], haltOnFailure=True, locks=[eval_lock.access("exclusive")], @@ -415,6 +484,7 @@ def nix_eval_config( "-rf", drv_gcroots_dir, ], + alwaysRun=True, ), ) @@ -427,26 +497,13 @@ def nix_eval_config( ) -@dataclass -class CachixConfig: - name: str - signing_key_secret_name: str | None = None - auth_token_secret_name: str | None = None - - def cachix_env(self) -> dict[str, str]: - env = {} - if self.signing_key_secret_name is not None: - env["CACHIX_SIGNING_KEY"] = util.Secret(self.signing_key_secret_name) - if self.auth_token_secret_name is not None: - env["CACHIX_AUTH_TOKEN"] = util.Secret(self.auth_token_secret_name) - return env - - def nix_build_config( - project: GithubProject, + project: GerritProject, + worker_arch: str, worker_names: list[str], - cachix: CachixConfig | None = None, outputs_path: Path | None = None, + signing_keyfile: str | None = None, + binary_cache_config: S3BinaryCacheConfig | None = None ) -> util.BuilderConfig: """Builds one nix flake attribute.""" factory = util.BuildFactory() @@ -461,6 +518,8 @@ def nix_build_config( "--option", "keep-going", "true", + # do not build directly on the coordinator + "--max-jobs", "0", "--option", # stop stuck builds after 20 minutes "--max-silent-time", @@ -476,20 +535,40 @@ def nix_build_config( haltOnFailure=True, ), ) - if cachix: + + if signing_keyfile is not None: factory.addStep( steps.ShellCommand( - name="Upload cachix", - env=cachix.cachix_env(), + name="Sign the store path", command=[ - "cachix", - "push", - cachix.name, - util.Interpolate("result-%(prop:attr)s"), - ], + "nix", + "store", + "sign", + "--key-file", + signing_keyfile, + util.Interpolate( + "%(prop:drv_path)s^*" + ) + ] ), ) + if binary_cache_config is not None: + factory.addStep( + steps.ShellCommand( + name="Upload the store path to the cache", + command=[ + "nix", + "copy", + "--to", + f"s3://{binary_cache_config.bucket}?profile={binary_cache_config.profile}®ion={binary_cache_config.region}&endpoint={binary_cache_config.endpoint}", + util.Property( + "out_path" + ) + ] + ) + ) + factory.addStep( steps.ShellCommand( name="Register gcroot", @@ -521,39 +600,7 @@ def nix_build_config( ), ) return util.BuilderConfig( - name=f"{project.name}/nix-build", - project=project.name, - workernames=worker_names, - collapseRequests=False, - env={}, - factory=factory, - ) - - -def nix_skipped_build_config( - project: GithubProject, - worker_names: list[str], -) -> util.BuilderConfig: - """Dummy builder that is triggered when a build is skipped.""" - factory = util.BuildFactory() - factory.addStep( - EvalErrorStep( - name="Nix evaluation", - doStepIf=lambda s: s.getProperty("error"), - hideStepIf=lambda _, s: not s.getProperty("error"), - ), - ) - - # This is just a dummy step showing the cached build - factory.addStep( - steps.BuildStep( - name="Nix build (cached)", - doStepIf=lambda _: False, - hideStepIf=lambda _, s: s.getProperty("error"), - ), - ) - return util.BuilderConfig( - name=f"{project.name}/nix-skipped-build", + name=f"{project.name}/nix-build/{worker_arch}", project=project.name, workernames=worker_names, collapseRequests=False, @@ -570,204 +617,88 @@ def read_secret_file(secret_name: str) -> str: return Path(directory).joinpath(secret_name).read_text().rstrip() -@dataclass -class GithubConfig: - oauth_id: str - admins: list[str] - - buildbot_user: str - oauth_secret_name: str = "github-oauth-secret" - webhook_secret_name: str = "github-webhook-secret" - token_secret_name: str = "github-token" - project_cache_file: Path = Path("github-project-cache.json") - topic: str | None = "build-with-buildbot" - - def token(self) -> str: - return read_secret_file(self.token_secret_name) - def config_for_project( config: dict[str, Any], - project: GithubProject, + project: GerritProject, worker_names: list[str], - github: GithubConfig, nix_supported_systems: list[str], nix_eval_worker_count: int, nix_eval_max_memory_size: int, eval_lock: util.MasterLock, - cachix: CachixConfig | None = None, outputs_path: Path | None = None, + signing_keyfile: str | None = None, + binary_cache_config: S3BinaryCacheConfig | None = None ) -> Project: config["projects"].append(Project(project.name)) config["schedulers"].extend( [ + # build everything pertaining to a project + # TODO(raito): will this catch also post-merge? we don't really care about that… do we? schedulers.SingleBranchScheduler( - name=f"{project.project_id}-default-branch", + name=f"{project.name}-changes", change_filter=util.ChangeFilter( - repository=project.url, - filter_fn=lambda c: c.branch - == c.properties.getProperty("github.repository.default_branch"), - ), - builderNames=[f"{project.name}/nix-eval"], - treeStableTimer=5, - ), - # this is compatible with bors or github's merge queue - schedulers.SingleBranchScheduler( - name=f"{project.project_id}-merge-queue", - change_filter=util.ChangeFilter( - repository=project.url, - branch_re="(gh-readonly-queue/.*|staging|trying)", - ), - builderNames=[f"{project.name}/nix-eval"], - ), - # build all pull requests - schedulers.SingleBranchScheduler( - name=f"{project.project_id}-prs", - change_filter=util.ChangeFilter( - repository=project.url, - category="pull", + project=project.name, ), builderNames=[f"{project.name}/nix-eval"], ), # this is triggered from `nix-eval` - schedulers.Triggerable( - name=f"{project.project_id}-nix-build", - builderNames=[f"{project.name}/nix-build"], - ), - # this is triggered from `nix-eval` when the build is skipped - schedulers.Triggerable( - name=f"{project.project_id}-nix-skipped-build", - builderNames=[f"{project.name}/nix-skipped-build"], + *( + schedulers.Triggerable( + name=f"{project.name}-nix-build-{arch}", + builderNames=[f"{project.name}/nix-build/{arch}"], + ) + for arch in nix_supported_systems + [ "other" ] ), # allow to manually trigger a nix-build schedulers.ForceScheduler( - name=f"{project.project_id}-force", + name=f"{project.name}-force", builderNames=[f"{project.name}/nix-eval"], properties=[ util.StringParameter( name="project", - label="Name of the GitHub repository.", + label="Name of the Gerrit repository.", default=project.name, ), ], ), ], ) + gerrit_private_key = None + with open('/var/lib/buildbot/master/id_gerrit', 'r') as f: + gerrit_private_key = f.read() + + if gerrit_private_key is None: + raise RuntimeError('No gerrit private key to fetch the repositories') + config["builders"].extend( [ # Since all workers run on the same machine, we only assign one of them to do the evaluation. # This should prevent exessive memory usage. nix_eval_config( project, - worker_names, - github_token_secret=github.token_secret_name, + gerrit_private_key, + [ f"{w}-other" for w in worker_names ], supported_systems=nix_supported_systems, worker_count=nix_eval_worker_count, max_memory_size=nix_eval_max_memory_size, eval_lock=eval_lock, ), - nix_build_config( - project, - worker_names, - cachix=cachix, - outputs_path=outputs_path, + *( + nix_build_config( + project, + arch, + [ f"{w}-{arch}" for w in worker_names ], + outputs_path=outputs_path, + signing_keyfile=signing_keyfile, + binary_cache_config=binary_cache_config + ) + for arch in nix_supported_systems + [ "other" ] ), - nix_skipped_build_config(project, [SKIPPED_BUILDER_NAME]), ], ) -class AnyProjectEndpointMatcher(EndpointMatcherBase): - def __init__(self, builders: set[str] | None = None, **kwargs: Any) -> None: - if builders is None: - builders = set() - self.builders = builders - super().__init__(**kwargs) - - @defer.inlineCallbacks - def check_builder( - self, - endpoint_object: Any, - endpoint_dict: dict[str, Any], - object_type: str, - ) -> Generator[defer.Deferred[Match], Any, Any]: - res = yield endpoint_object.get({}, endpoint_dict) - if res is None: - return None - - builder = yield self.master.data.get(("builders", res["builderid"])) - if builder["name"] in self.builders: - log.warn( - "Builder {builder} allowed by {role}: {builders}", - builder=builder["name"], - role=self.role, - builders=self.builders, - ) - return Match(self.master, **{object_type: res}) - else: - log.warn( - "Builder {builder} not allowed by {role}: {builders}", - builder=builder["name"], - role=self.role, - builders=self.builders, - ) - - def match_BuildEndpoint_rebuild( # noqa: N802 - self, - epobject: Any, - epdict: dict[str, Any], - options: dict[str, Any], - ) -> defer.Deferred[Match]: - return self.check_builder(epobject, epdict, "build") - - def match_BuildEndpoint_stop( # noqa: N802 - self, - epobject: Any, - epdict: dict[str, Any], - options: dict[str, Any], - ) -> defer.Deferred[Match]: - return self.check_builder(epobject, epdict, "build") - - def match_BuildRequestEndpoint_stop( # noqa: N802 - self, - epobject: Any, - epdict: dict[str, Any], - options: dict[str, Any], - ) -> defer.Deferred[Match]: - return self.check_builder(epobject, epdict, "buildrequest") - - -def setup_authz(projects: list[GithubProject], admins: list[str]) -> util.Authz: - allow_rules = [] - allowed_builders_by_org: defaultdict[str, set[str]] = defaultdict( - lambda: {"reload-github-projects"}, - ) - - for project in projects: - if project.belongs_to_org: - for builder in ["nix-build", "nix-skipped-build", "nix-eval"]: - allowed_builders_by_org[project.owner].add(f"{project.name}/{builder}") - - for org, allowed_builders in allowed_builders_by_org.items(): - allow_rules.append( - AnyProjectEndpointMatcher( - builders=allowed_builders, - role=org, - defaultDeny=False, - ), - ) - - allow_rules.append(util.AnyEndpointMatcher(role="admin", defaultDeny=False)) - allow_rules.append(util.AnyControlEndpointMatcher(role="admins")) - return util.Authz( - roleMatchers=[ - util.RolesFromUsername(roles=["admin"], usernames=admins), - util.RolesFromGroups(groupPrefix=""), # so we can match on ORG - ], - allowRules=allow_rules, - ) - - class PeriodicWithStartup(schedulers.Periodic): def __init__(self, *args: Any, run_on_startup: bool = False, **kwargs: Any) -> None: super().__init__(*args, **kwargs) @@ -779,39 +710,115 @@ class PeriodicWithStartup(schedulers.Periodic): yield self.setState("last_build", None) yield super().activate() +def gerritReviewCB(builderName, build, result, master, arg): + if result == util.RETRY: + return dict() -class NixConfigurator(ConfiguratorBase): + if builderName != 'lix/nix-eval': + return dict() + + failed = build['properties'].get('failed_builds', [[]])[0] + + labels = { + 'Verified': -1 if result != util.SUCCESS else 1, + } + + message = "Buildbot finished compiling your patchset!\n" + message += "The result is: %s\n" % util.Results[result].upper() + if result != util.SUCCESS: + message += "\nFailed checks:\n" + for check, how, urls in failed: + if not urls: + message += " " + message += f" - {check}: {how}" + if urls: + message += f" (see {', '.join(urls)})" + message += "\n" + + if arg: + message += "\nFor more details visit:\n" + message += build['url'] + "\n" + + return dict(message=message, labels=labels) + +def gerritStartCB(builderName, build, arg): + message = "Buildbot started compiling your patchset\n" + message += "on configuration: %s\n" % builderName + message += "See your build here: %s" % build['url'] + + return dict(message=message) + +def gerritSummaryCB(buildInfoList, results, status, arg): + success = False + failure = False + + msgs = [] + + for buildInfo in buildInfoList: + msg = "Builder %(name)s %(resultText)s (%(text)s)" % buildInfo + link = buildInfo.get('url', None) + if link: + msg += " - " + link + else: + msg += "." + + msgs.append(msg) + + if buildInfo['result'] == util.SUCCESS: + success = True + else: + failure = True + + if success and not failure: + verified = 1 + else: + verified = -1 + + return dict(message='\n\n'.join(msgs), + labels={ + 'Verified': verified + }) + +class GerritNixConfigurator(ConfiguratorBase): """Janitor is a configurator which create a Janitor Builder with all needed Janitor steps""" def __init__( self, # Shape of this file: [ { "name": "", "pass": "", "cores": "" } ] - github: GithubConfig, + gerrit_server: str, + gerrit_user: str, + gerrit_port: int, + gerrit_sshkey_path: str, url: str, nix_supported_systems: list[str], nix_eval_worker_count: int | None, nix_eval_max_memory_size: int, nix_workers_secret_name: str = "buildbot-nix-workers", # noqa: S107 - cachix: CachixConfig | None = None, + signing_keyfile: str | None = None, + binary_cache_config: dict[str, str] | None = None, outputs_path: str | None = None, ) -> None: super().__init__() + self.gerrit_server = gerrit_server + self.gerrit_user = gerrit_user + self.gerrit_port = gerrit_port self.nix_workers_secret_name = nix_workers_secret_name self.nix_eval_max_memory_size = nix_eval_max_memory_size self.nix_eval_worker_count = nix_eval_worker_count self.nix_supported_systems = nix_supported_systems - self.github = github + self.gerrit_change_source = GerritChangeSource(gerrit_server, gerrit_user, gerritport=gerrit_port, identity_file=gerrit_sshkey_path) self.url = url - self.cachix = cachix + if binary_cache_config is not None: + self.binary_cache_config = S3BinaryCacheConfig(**binary_cache_config) + else: + self.binary_cache_config = None + self.signing_keyfile = signing_keyfile if outputs_path is None: self.outputs_path = None else: self.outputs_path = Path(outputs_path) def configure(self, config: dict[str, Any]) -> None: - projects = load_projects(self.github.token(), self.github.project_cache_file) - if self.github.topic is not None: - projects = [p for p in projects if self.github.topic in p.topics] worker_config = json.loads(read_secret_file(self.nix_workers_secret_name)) worker_names = [] @@ -822,67 +829,66 @@ class NixConfigurator(ConfiguratorBase): for item in worker_config: cores = item.get("cores", 0) for i in range(cores): - worker_name = f"{item['name']}-{i:03}" - config["workers"].append(worker.Worker(worker_name, item["pass"])) - worker_names.append(worker_name) + for arch in self.nix_supported_systems + ["other"]: + worker_name = f"{item['name']}-{i:03}" + config["workers"].append(worker.Worker(f"{worker_name}-{arch}", item["pass"])) + worker_names.append(worker_name) - webhook_secret = read_secret_file(self.github.webhook_secret_name) eval_lock = util.MasterLock("nix-eval") - for project in projects: - create_project_hook( - project.owner, - project.repo, - self.github.token(), - self.url + "change_hook/github", - webhook_secret, - ) - config_for_project( - config, - project, - worker_names, - self.github, - self.nix_supported_systems, - self.nix_eval_worker_count or multiprocessing.cpu_count(), - self.nix_eval_max_memory_size, - eval_lock, - self.cachix, - self.outputs_path, - ) + # Configure the Lix project. + config_for_project( + config, + GerritProject(name="lix"), + worker_names, + self.nix_supported_systems, + self.nix_eval_worker_count or multiprocessing.cpu_count(), + self.nix_eval_max_memory_size, + eval_lock, + self.outputs_path, + signing_keyfile=self.signing_keyfile, + binary_cache_config=self.binary_cache_config + ) - # Reload github projects - config["builders"].append( - reload_github_projects( - [worker_names[0]], - self.github.token(), - self.github.project_cache_file, - ), - ) - config["workers"].append(worker.LocalWorker(SKIPPED_BUILDER_NAME)) - config["schedulers"].extend( - [ - schedulers.ForceScheduler( - name="reload-github-projects", - builderNames=["reload-github-projects"], - buttonName="Update projects", - ), - # project list twice a day and on startup - PeriodicWithStartup( - name="reload-github-projects-bidaily", - builderNames=["reload-github-projects"], - periodicBuildTimer=12 * 60 * 60, - run_on_startup=not self.github.project_cache_file.exists(), - ), - ], - ) + config["change_source"] = self.gerrit_change_source config["services"].append( - reporters.GitHubStatusPush( - token=self.github.token(), - # Since we dynamically create build steps, - # we use `virtual_builder_name` in the webinterface - # so that we distinguish what has beeing build - context=Interpolate("buildbot/%(prop:status_name)s"), - ), + reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, + port=2022, + identity_file='/var/lib/buildbot/master/id_gerrit', + summaryCB=None, + startCB=None, + wantSteps=True, + reviewCB=gerritReviewCB, + reviewArg=self.url) + # startCB=gerritStartCB, + # startArg=self.url, + # summaryCB=gerritSummaryCB, + # summaryArg=self.url) + + ) + + def gerritBranchKey(b): + ref = b['branch'] + if not ref.startswith('refs/changes/'): + return ref + return ref.rsplit('/', 1)[0] + config["services"].append( + util.OldBuildCanceller( + "lix_build_canceller", + filters=[ + ( + [ + f"lix/nix-{kind}" + for kind in [ "eval" ] + [ + f"build/{arch}" + for arch in self.nix_supported_systems + [ "other" ] + ] + ], + util.SourceStampFilter(project_eq=["lix"]) + ) + ], + branch_key=gerritBranchKey + ) ) systemd_secrets = secrets.SecretInAFile( @@ -891,28 +897,6 @@ class NixConfigurator(ConfiguratorBase): config["secretsProviders"].append(systemd_secrets) config["www"].setdefault("plugins", {}) - config["www"]["plugins"].update(dict(base_react={})) - - config["www"].setdefault("change_hook_dialects", {}) - config["www"]["change_hook_dialects"]["github"] = { - "secret": webhook_secret, - "strict": True, - "token": self.github.token(), - "github_property_whitelist": "*", - } if "auth" not in config["www"]: - config["www"].setdefault("avatar_methods", []) - config["www"]["avatar_methods"].append( - util.AvatarGitHub(token=self.github.token()), - ) - config["www"]["auth"] = util.GitHubAuth( - self.github.oauth_id, - read_secret_file(self.github.oauth_secret_name), - apiVersion=4, - ) - - config["www"]["authz"] = setup_authz( - admins=self.github.admins, - projects=projects, - ) + config["www"]["auth"] = LixSystemsOAuth2('buildbot', read_secret_file('buildbot-oauth2-secret'), autologin=True) diff --git a/buildbot_nix/binary_cache.py b/buildbot_nix/binary_cache.py new file mode 100644 index 0000000..8315e21 --- /dev/null +++ b/buildbot_nix/binary_cache.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass + +@dataclass +class S3BinaryCacheConfig: + region: str + bucket: str + endpoint: str + profile: str + +class LocalSigner: + def __init__(self, keyfile: str): + self.keyfile = keyfile diff --git a/buildbot_nix/worker.py b/buildbot_nix/worker.py index 3f7139d..17298a8 100644 --- a/buildbot_nix/worker.py +++ b/buildbot_nix/worker.py @@ -22,8 +22,14 @@ class WorkerConfig: .read_text() .rstrip("\r\n") ) - worker_count: int = int( - os.environ.get("WORKER_COUNT", str(multiprocessing.cpu_count())), + worker_arch_list: dict[str, int] = field( + default_factory=lambda: dict(other=1) | { + arch: int(count) + for arch, count in ( + e.split("=") + for e in os.environ.get("WORKER_ARCH_LIST", "").split(",") + ) + }, ) buildbot_dir: Path = field( default_factory=lambda: Path(require_env("BUILDBOT_DIR")) @@ -34,13 +40,14 @@ class WorkerConfig: def setup_worker( application: components.Componentized, builder_id: int, + arch: str, config: WorkerConfig, ) -> None: - basedir = config.buildbot_dir.parent / f"{config.buildbot_dir.name}-{builder_id:03}" + basedir = config.buildbot_dir.parent / f"{config.buildbot_dir.name}-{builder_id:03}/{arch}" basedir.mkdir(parents=True, exist_ok=True, mode=0o700) hostname = socket.gethostname() - workername = f"{hostname}-{builder_id:03}" + workername = f"{hostname}-{builder_id:03}-{arch}" keepalive = 600 umask = None maxdelay = 300 @@ -66,8 +73,9 @@ def setup_worker( def setup_workers(application: components.Componentized, config: WorkerConfig) -> None: - for i in range(config.worker_count): - setup_worker(application, i, config) + for arch, jobs in config.worker_arch_list.items(): + for i in range(jobs): + setup_worker(application, i, arch, config) # note: this line is matched against to check that this is a worker diff --git a/examples/default.nix b/examples/default.nix index f9fb42e..f59a01a 100644 --- a/examples/default.nix +++ b/examples/default.nix @@ -46,14 +46,6 @@ in # optional nix-eval-jobs settings # evalWorkerCount = 8; # limit number of concurrent evaluations # evalMaxMemorySize = "2048"; # limit memory usage per evaluation - - # optional cachix - #cachix = { - # name = "my-cachix"; - # # One of the following is required: - # signingKey = "/var/lib/secrets/cachix-key"; - # authToken = "/var/lib/secrets/cachix-token"; - #}; }; }) buildbot-nix.nixosModules.buildbot-master diff --git a/flake.nix b/flake.nix index e6f5dab..b06669d 100644 --- a/flake.nix +++ b/flake.nix @@ -20,7 +20,7 @@ ] ++ inputs.nixpkgs.lib.optional (inputs.treefmt-nix ? flakeModule) ./nix/treefmt/flake-module.nix; systems = [ "x86_64-linux" ]; flake = { - nixosModules.buildbot-master = ./nix/master.nix; + nixosModules.buildbot-coordinator = ./nix/coordinator.nix; nixosModules.buildbot-worker = ./nix/worker.nix; nixosConfigurations = diff --git a/nix/master.nix b/nix/coordinator.nix similarity index 52% rename from nix/master.nix rename to nix/coordinator.nix index 6383d5e..3ed2824 100644 --- a/nix/master.nix +++ b/nix/coordinator.nix @@ -4,82 +4,25 @@ , ... }: let - cfg = config.services.buildbot-nix.master; + cfg = config.services.buildbot-nix.coordinator; in { options = { - services.buildbot-nix.master = { - enable = lib.mkEnableOption "buildbot-master"; + services.buildbot-nix.coordinator = { + enable = lib.mkEnableOption "buildbot-coordinator"; dbUrl = lib.mkOption { type = lib.types.str; default = "postgresql://@/buildbot"; description = "Postgresql database url"; }; - cachix = { - name = lib.mkOption { - type = lib.types.nullOr lib.types.str; - default = null; - description = "Cachix name"; - }; - - signingKeyFile = lib.mkOption { - type = lib.types.nullOr lib.types.path; - default = null; - description = "Cachix signing key"; - }; - - authTokenFile = lib.mkOption { - type = lib.types.nullOr lib.types.str; - default = null; - description = "Cachix auth token"; - }; - }; - github = { - tokenFile = lib.mkOption { - type = lib.types.path; - description = "Github token file"; - }; - webhookSecretFile = lib.mkOption { - type = lib.types.path; - description = "Github webhook secret file"; - }; - oauthSecretFile = lib.mkOption { - type = lib.types.path; - description = "Github oauth secret file"; - }; - # TODO: make this an option - # https://github.com/organizations/numtide/settings/applications - # Application name: BuildBot - # Homepage URL: https://buildbot.numtide.com - # Authorization callback URL: https://buildbot.numtide.com/auth/login - # oauth_token: 2516248ec6289e4d9818122cce0cbde39e4b788d - oauthId = lib.mkOption { - type = lib.types.str; - description = "Github oauth id. Used for the login button"; - }; - # Most likely you want to use the same user as for the buildbot - user = lib.mkOption { - type = lib.types.str; - description = "Github user that is used for the buildbot"; - }; - admins = lib.mkOption { - type = lib.types.listOf lib.types.str; - default = [ ]; - description = "Users that are allowed to login to buildbot, trigger builds and change settings"; - }; - topic = lib.mkOption { - type = lib.types.nullOr lib.types.str; - default = "build-with-buildbot"; - description = '' - Projects that have this topic will be built by buildbot. - If null, all projects that the buildbot github user has access to, are built. - ''; - }; - }; workersFile = lib.mkOption { type = lib.types.path; description = "File containing a list of nix workers"; }; + oauth2SecretFile = lib.mkOption { + type = lib.types.path; + description = "File containing an OAuth 2 client secret"; + }; buildSystems = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ pkgs.hostPlatform.system ]; @@ -114,6 +57,41 @@ in default = null; example = "/var/www/buildbot/nix-outputs"; }; + + signingKeyFile = lib.mkOption { + type = lib.types.nullOr lib.types.path; + description = "A path to a Nix signing key"; + default = null; + example = "/run/agenix.d/signing-key"; + }; + + binaryCache = { + enable = lib.mkEnableOption " binary cache upload to a S3 bucket"; + profileCredentialsFile = lib.mkOption { + type = lib.types.nullOr lib.types.path; + description = "A path to the various AWS profile credentials related to the S3 bucket containing a profile named `default`"; + default = null; + example = "/run/agenix.d/aws-profile"; + }; + bucket = lib.mkOption { + type = lib.types.nullOr lib.types.str; + description = "Bucket where to store the data"; + default = null; + example = "lix-cache"; + }; + endpoint = lib.mkOption { + type = lib.types.nullOr lib.types.str; + description = "Endpoint for the S3 server"; + default = null; + example = "s3.lix.systems"; + }; + region = lib.mkOption { + type = lib.types.nullOr lib.types.str; + description = "Region for the S3 bucket"; + default = null; + example = "garage"; + }; + }; }; }; config = lib.mkIf cfg.enable { @@ -126,13 +104,6 @@ in isSystemUser = true; }; - assertions = [ - { - assertion = cfg.cachix.name != null -> cfg.cachix.signingKeyFile != null || cfg.cachix.authTokenFile != null; - message = "if cachix.name is provided, then cachix.signingKeyFile and cachix.authTokenFile must be set"; - } - ]; - services.buildbot-master = { enable = true; @@ -144,30 +115,29 @@ in home = "/var/lib/buildbot"; extraImports = '' from datetime import timedelta - from buildbot_nix import GithubConfig, NixConfigurator, CachixConfig + from buildbot_nix import GerritNixConfigurator ''; configurators = [ '' util.JanitorConfigurator(logHorizon=timedelta(weeks=4), hour=12, dayOfWeek=6) '' '' - NixConfigurator( - github=GithubConfig( - oauth_id=${builtins.toJSON cfg.github.oauthId}, - admins=${builtins.toJSON cfg.github.admins}, - buildbot_user=${builtins.toJSON cfg.github.user}, - topic=${builtins.toJSON cfg.github.topic}, - ), - cachix=${if cfg.cachix.name == null then "None" else "CachixConfig( - name=${builtins.toJSON cfg.cachix.name}, - signing_key_secret_name=${if cfg.cachix.signingKeyFile != null then builtins.toJSON "cachix-signing-key" else "None"}, - auth_token_secret_name=${if cfg.cachix.authTokenFile != null then builtins.toJSON "cachix-auth-token" else "None"}, - )"}, + GerritNixConfigurator( + "gerrit.lix.systems", + "buildbot", + 2022, + "/var/lib/buildbot/master/id_gerrit", url=${builtins.toJSON config.services.buildbot-master.buildbotUrl}, nix_eval_max_memory_size=${builtins.toJSON cfg.evalMaxMemorySize}, nix_eval_worker_count=${if cfg.evalWorkerCount == null then "None" else builtins.toString cfg.evalWorkerCount}, nix_supported_systems=${builtins.toJSON cfg.buildSystems}, outputs_path=${if cfg.outputsPath == null then "None" else builtins.toJSON cfg.outputsPath}, + # Signing key file must be available on the workers and readable. + signing_keyfile=${if cfg.signingKeyFile == null then "None" else builtins.toJSON cfg.signingKeyFile}, + binary_cache_config=${if (!cfg.binaryCache.enable) then "None" else builtins.toJSON { + inherit (cfg.binaryCache) bucket region endpoint; + profile = "default"; + }} ) '' ]; @@ -177,31 +147,41 @@ in hasSSL = host.forceSSL || host.addSSL; in "${if hasSSL then "https" else "http"}://${cfg.domain}/"; - dbUrl = config.services.buildbot-nix.master.dbUrl; + dbUrl = cfg.dbUrl; pythonPackages = ps: [ ps.requests ps.treq ps.psycopg2 (ps.toPythonModule pkgs.buildbot-worker) - pkgs.buildbot-plugins.www-react + pkgs.buildbot-plugins.www (pkgs.python3.pkgs.callPackage ../default.nix { }) ]; }; + # TODO(raito): we assume worker runs on coordinator. please clean up this later. + systemd.services.buildbot-worker.serviceConfig.Environment = + lib.mkIf cfg.binaryCache.enable ( + let + awsConfigFile = pkgs.writeText "config.ini" '' + [default] + region = ${cfg.binaryCache.region} + endpoint_url = ${cfg.binaryCache.endpoint} + ''; + in + [ + "AWS_CONFIG_FILE=${awsConfigFile}" + "AWS_SHARED_CREDENTIALS_FILE=${cfg.binaryCache.profileCredentialsFile}" + ] + ); + systemd.services.buildbot-master = { after = [ "postgresql.service" ]; serviceConfig = { # in master.py we read secrets from $CREDENTIALS_DIRECTORY LoadCredential = [ - "github-token:${cfg.github.tokenFile}" - "github-webhook-secret:${cfg.github.webhookSecretFile}" - "github-oauth-secret:${cfg.github.oauthSecretFile}" "buildbot-nix-workers:${cfg.workersFile}" - ] - ++ lib.optional (cfg.cachix.signingKeyFile != null) - "cachix-signing-key:${builtins.toString cfg.cachix.signingKeyFile}" - ++ lib.optional (cfg.cachix.authTokenFile != null) - "cachix-auth-token:${builtins.toString cfg.cachix.authTokenFile}"; + "buildbot-oauth2-secret:${cfg.oauth2SecretFile}" + ]; }; }; @@ -215,16 +195,20 @@ in }; services.nginx.enable = true; - services.nginx.virtualHosts.${cfg.domain} = { + services.nginx.virtualHosts.${cfg.domain} = + let + port = config.services.buildbot-master.port; + in + { locations = { - "/".proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-master.port}/"; + "/".proxyPass = "http://127.0.0.1:${builtins.toString port}/"; "/sse" = { - proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-master.port}/sse"; + proxyPass = "http://127.0.0.1:${builtins.toString port}/sse"; # proxy buffering will prevent sse to work extraConfig = "proxy_buffering off;"; }; "/ws" = { - proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-master.port}/ws"; + proxyPass = "http://127.0.0.1:${builtins.toString port}/ws"; proxyWebsockets = true; # raise the proxy timeout for the websocket extraConfig = "proxy_read_timeout 6000s;"; @@ -234,11 +218,8 @@ in }; }; - systemd.tmpfiles.rules = [ - # delete legacy gcroot location, can be dropped after 2024-06-01 - "R /var/lib/buildbot-worker/gcroot - - - - -" - ] ++ lib.optional (cfg.outputsPath != null) - # Allow buildbot-master to write to this directory + systemd.tmpfiles.rules = lib.optional (cfg.outputsPath != null) + # Allow buildbot-coordinator to write to this directory "d ${cfg.outputsPath} 0755 buildbot buildbot - -"; }; } diff --git a/nix/worker.nix b/nix/worker.nix index cf804fa..3be1b3b 100644 --- a/nix/worker.nix +++ b/nix/worker.nix @@ -19,15 +19,19 @@ in defaultText = "pkgs.buildbot-worker"; description = "The buildbot-worker package to use."; }; - masterUrl = lib.mkOption { + coordinatorUrl = lib.mkOption { type = lib.types.str; default = "tcp:host=localhost:port=9989"; - description = "The buildbot master url."; + description = "The buildbot coordinator url."; }; workerPasswordFile = lib.mkOption { type = lib.types.path; description = "The buildbot worker password file."; }; + workerArchitectures = lib.mkOption { + type = lib.types.attrsOf lib.types.int; + description = "Nix `system`s the worker should feel responsible for."; + }; }; }; config = lib.mkIf cfg.enable { @@ -54,15 +58,17 @@ in after = [ "network.target" "buildbot-master.service" ]; wantedBy = [ "multi-user.target" ]; path = [ - pkgs.cachix pkgs.git pkgs.openssh pkgs.nix pkgs.nix-eval-jobs ]; environment.PYTHONPATH = "${python.withPackages (_: [cfg.package])}/${python.sitePackages}"; - environment.MASTER_URL = cfg.masterUrl; + environment.MASTER_URL = cfg.coordinatorUrl; environment.BUILDBOT_DIR = buildbotDir; + environment.WORKER_ARCH_LIST = + lib.concatStringsSep "," + (lib.mapAttrsToList (arch: jobs: "${arch}=${toString jobs}") cfg.workerArchitectures); serviceConfig = { # We rather want the CI job to fail on OOM than to have a broken buildbot worker. @@ -70,7 +76,9 @@ in OOMPolicy = "continue"; LoadCredential = [ "worker-password-file:${cfg.workerPasswordFile}" ]; - Environment = [ "WORKER_PASSWORD_FILE=%d/worker-password-file" ]; + Environment = [ + "WORKER_PASSWORD_FILE=%d/worker-password-file" + ]; Type = "simple"; User = "buildbot-worker"; Group = "buildbot-worker";