From acfd225e6daeb9f61229bd551a5ea848d950e516 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 15:59:44 +0100 Subject: [PATCH 01/45] buildbot/__init__.py: rework the file for gerrit usecase, our oauth2 instance Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 381 +++++++++++---------------------------- 1 file changed, 104 insertions(+), 277 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 3f5fd19..17084af 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -1,7 +1,6 @@ import json import multiprocessing import os -import signal import sys import uuid from collections import defaultdict @@ -19,6 +18,8 @@ from buildbot.process.results import ALL_RESULTS, statusToString from buildbot.steps.trigger import Trigger from buildbot.util import asyncSleep from buildbot.www.authz.endpointmatchers import EndpointMatcherBase, Match +from buildbot.www.oauth2 import OAuth2Auth +from buildbot.changes.gerritchangesource import GerritChangeSource if TYPE_CHECKING: from buildbot.process.log import Log @@ -28,10 +29,6 @@ from twisted.logger import Logger from twisted.python.failure import Failure from .github_projects import ( - GithubProject, - create_project_hook, - load_projects, - refresh_projects, slugify_project_name, ) @@ -39,10 +36,19 @@ SKIPPED_BUILDER_NAME = "skipped-builds" log = Logger() +class LixSystemsOAuth2(OAuth2Auth): + name = 'identity-lix-systems' + faIcon = '...' + resourceEndpoint = '' + authUri = '' + tokenUri = '' class BuildbotNixError(Exception): pass +@dataclass +class GerritProject: + name: str class BuildTrigger(Trigger): """Dynamic trigger that creates a build for every attribute.""" @@ -274,58 +280,6 @@ class UpdateBuildOutput(steps.BuildStep): return util.SUCCESS -class ReloadGithubProjects(steps.BuildStep): - name = "reload_github_projects" - - def __init__(self, token: str, project_cache_file: Path, **kwargs: Any) -> None: - self.token = token - self.project_cache_file = project_cache_file - super().__init__(**kwargs) - - def reload_projects(self) -> None: - refresh_projects(self.token, self.project_cache_file) - - @defer.inlineCallbacks - def run(self) -> Generator[Any, object, Any]: - d = threads.deferToThread(self.reload_projects) # type: ignore[no-untyped-call] - - self.error_msg = "" - - def error_cb(failure: Failure) -> int: - self.error_msg += failure.getTraceback() - return util.FAILURE - - d.addCallbacks(lambda _: util.SUCCESS, error_cb) - res = yield d - if res == util.SUCCESS: - # reload the buildbot config - os.kill(os.getpid(), signal.SIGHUP) - return util.SUCCESS - else: - log: Log = yield self.addLog("log") - log.addStderr(f"Failed to reload project list: {self.error_msg}") - return util.FAILURE - - -def reload_github_projects( - worker_names: list[str], - github_token_secret: str, - project_cache_file: Path, -) -> util.BuilderConfig: - """Updates the flake an opens a PR for it.""" - factory = util.BuildFactory() - factory.addStep( - ReloadGithubProjects( - github_token_secret, project_cache_file=project_cache_file - ), - ) - return util.BuilderConfig( - name="reload-github-projects", - workernames=worker_names, - factory=factory, - ) - - # The builtin retry mechanism doesn't seem to work for github, # since github is sometimes not delivering the pull request ref fast enough. class GitWithRetry(steps.Git): @@ -353,9 +307,8 @@ class GitWithRetry(steps.Git): def nix_eval_config( - project: GithubProject, + project: GerritProject, worker_names: list[str], - github_token_secret: str, supported_systems: list[str], eval_lock: util.MasterLock, worker_count: int, @@ -366,15 +319,12 @@ def nix_eval_config( """ factory = util.BuildFactory() # check out the source - url_with_secret = util.Interpolate( - f"https://git:%(secret:{github_token_secret})s@github.com/%(prop:project)s", - ) factory.addStep( - GitWithRetry( - repourl=url_with_secret, - method="clean", - submodules=True, - haltOnFailure=True, + steps.Gerrit( + repourl=project.url, + mode="full", + retry=[60, 60], + timeout=3600 ), ) drv_gcroots_dir = util.Interpolate( @@ -443,7 +393,7 @@ class CachixConfig: def nix_build_config( - project: GithubProject, + project: GerritProject, worker_names: list[str], cachix: CachixConfig | None = None, outputs_path: Path | None = None, @@ -531,7 +481,7 @@ def nix_build_config( def nix_skipped_build_config( - project: GithubProject, + project: GerritProject, worker_names: list[str], ) -> util.BuilderConfig: """Dummy builder that is triggered when a build is skipped.""" @@ -570,27 +520,11 @@ def read_secret_file(secret_name: str) -> str: return Path(directory).joinpath(secret_name).read_text().rstrip() -@dataclass -class GithubConfig: - oauth_id: str - admins: list[str] - - buildbot_user: str - oauth_secret_name: str = "github-oauth-secret" - webhook_secret_name: str = "github-webhook-secret" - token_secret_name: str = "github-token" - project_cache_file: Path = Path("github-project-cache.json") - topic: str | None = "build-with-buildbot" - - def token(self) -> str: - return read_secret_file(self.token_secret_name) - def config_for_project( config: dict[str, Any], - project: GithubProject, + project: GerritProject, worker_names: list[str], - github: GithubConfig, nix_supported_systems: list[str], nix_eval_worker_count: int, nix_eval_max_memory_size: int, @@ -601,25 +535,6 @@ def config_for_project( config["projects"].append(Project(project.name)) config["schedulers"].extend( [ - schedulers.SingleBranchScheduler( - name=f"{project.project_id}-default-branch", - change_filter=util.ChangeFilter( - repository=project.url, - filter_fn=lambda c: c.branch - == c.properties.getProperty("github.repository.default_branch"), - ), - builderNames=[f"{project.name}/nix-eval"], - treeStableTimer=5, - ), - # this is compatible with bors or github's merge queue - schedulers.SingleBranchScheduler( - name=f"{project.project_id}-merge-queue", - change_filter=util.ChangeFilter( - repository=project.url, - branch_re="(gh-readonly-queue/.*|staging|trying)", - ), - builderNames=[f"{project.name}/nix-eval"], - ), # build all pull requests schedulers.SingleBranchScheduler( name=f"{project.project_id}-prs", @@ -660,7 +575,6 @@ def config_for_project( nix_eval_config( project, worker_names, - github_token_secret=github.token_secret_name, supported_systems=nix_supported_systems, worker_count=nix_eval_worker_count, max_memory_size=nix_eval_max_memory_size, @@ -677,97 +591,6 @@ def config_for_project( ) -class AnyProjectEndpointMatcher(EndpointMatcherBase): - def __init__(self, builders: set[str] | None = None, **kwargs: Any) -> None: - if builders is None: - builders = set() - self.builders = builders - super().__init__(**kwargs) - - @defer.inlineCallbacks - def check_builder( - self, - endpoint_object: Any, - endpoint_dict: dict[str, Any], - object_type: str, - ) -> Generator[defer.Deferred[Match], Any, Any]: - res = yield endpoint_object.get({}, endpoint_dict) - if res is None: - return None - - builder = yield self.master.data.get(("builders", res["builderid"])) - if builder["name"] in self.builders: - log.warn( - "Builder {builder} allowed by {role}: {builders}", - builder=builder["name"], - role=self.role, - builders=self.builders, - ) - return Match(self.master, **{object_type: res}) - else: - log.warn( - "Builder {builder} not allowed by {role}: {builders}", - builder=builder["name"], - role=self.role, - builders=self.builders, - ) - - def match_BuildEndpoint_rebuild( # noqa: N802 - self, - epobject: Any, - epdict: dict[str, Any], - options: dict[str, Any], - ) -> defer.Deferred[Match]: - return self.check_builder(epobject, epdict, "build") - - def match_BuildEndpoint_stop( # noqa: N802 - self, - epobject: Any, - epdict: dict[str, Any], - options: dict[str, Any], - ) -> defer.Deferred[Match]: - return self.check_builder(epobject, epdict, "build") - - def match_BuildRequestEndpoint_stop( # noqa: N802 - self, - epobject: Any, - epdict: dict[str, Any], - options: dict[str, Any], - ) -> defer.Deferred[Match]: - return self.check_builder(epobject, epdict, "buildrequest") - - -def setup_authz(projects: list[GithubProject], admins: list[str]) -> util.Authz: - allow_rules = [] - allowed_builders_by_org: defaultdict[str, set[str]] = defaultdict( - lambda: {"reload-github-projects"}, - ) - - for project in projects: - if project.belongs_to_org: - for builder in ["nix-build", "nix-skipped-build", "nix-eval"]: - allowed_builders_by_org[project.owner].add(f"{project.name}/{builder}") - - for org, allowed_builders in allowed_builders_by_org.items(): - allow_rules.append( - AnyProjectEndpointMatcher( - builders=allowed_builders, - role=org, - defaultDeny=False, - ), - ) - - allow_rules.append(util.AnyEndpointMatcher(role="admin", defaultDeny=False)) - allow_rules.append(util.AnyControlEndpointMatcher(role="admins")) - return util.Authz( - roleMatchers=[ - util.RolesFromUsername(roles=["admin"], usernames=admins), - util.RolesFromGroups(groupPrefix=""), # so we can match on ORG - ], - allowRules=allow_rules, - ) - - class PeriodicWithStartup(schedulers.Periodic): def __init__(self, *args: Any, run_on_startup: bool = False, **kwargs: Any) -> None: super().__init__(*args, **kwargs) @@ -779,14 +602,72 @@ class PeriodicWithStartup(schedulers.Periodic): yield self.setState("last_build", None) yield super().activate() +def gerritReviewCB(builderName, build, result, master, arg): + if result == util.RETRY: + return dict() -class NixConfigurator(ConfiguratorBase): + message = "Buildbot finished compiling your patchset\n" + message += "on configuration: %s\n" % builderName + message += "The result is: %s\n" % util.Results[result].upper() + + if arg: + message += "\nFor more details visit:\n" + message += build['url'] + "\n" + + if result == util.SUCCESS: + verified = 1 + else: + verified = -1 + + return dict(message=message, labels={'Verified': verified}) + +def gerritStartCB(builderName, build, arg): + message = "Buildbot started compiling your patchset\n" + message += "on configuration: %s\n" % builderName + message += "See your build here: %s" % build['url'] + + return dict(message=message) + +def gerritSummaryCB(buildInfoList, results, status, arg): + success = False + failure = False + + msgs = [] + + for buildInfo in buildInfoList: + msg = "Builder %(name)s %(resultText)s (%(text)s)" % buildInfo + link = buildInfo.get('url', None) + if link: + msg += " - " + link + else: + msg += "." + + msgs.append(msg) + + if buildInfo['result'] == util.SUCCESS: + success = True + else: + failure = True + + if success and not failure: + verified = 1 + else: + verified = -1 + + return dict(message='\n\n'.join(msgs), + labels={ + 'Verified': verified + }) + +class GerritNixConfigurator(ConfiguratorBase): """Janitor is a configurator which create a Janitor Builder with all needed Janitor steps""" def __init__( self, # Shape of this file: [ { "name": "", "pass": "", "cores": "" } ] - github: GithubConfig, + gerrit_server: str, + gerrit_user: str, + gerrit_port: int, url: str, nix_supported_systems: list[str], nix_eval_worker_count: int | None, @@ -796,11 +677,14 @@ class NixConfigurator(ConfiguratorBase): outputs_path: str | None = None, ) -> None: super().__init__() + self.gerrit_server = gerrit_server + self.gerrit_user = gerrit_user + self.gerrit_port = gerrit_port self.nix_workers_secret_name = nix_workers_secret_name self.nix_eval_max_memory_size = nix_eval_max_memory_size self.nix_eval_worker_count = nix_eval_worker_count self.nix_supported_systems = nix_supported_systems - self.github = github + self.gerrit_change_source = GerritChangeSource(gerrit_server, gerrit_user, gerritport=gerrit_port) self.url = url self.cachix = cachix if outputs_path is None: @@ -809,9 +693,6 @@ class NixConfigurator(ConfiguratorBase): self.outputs_path = Path(outputs_path) def configure(self, config: dict[str, Any]) -> None: - projects = load_projects(self.github.token(), self.github.project_cache_file) - if self.github.topic is not None: - projects = [p for p in projects if self.github.topic in p.topics] worker_config = json.loads(read_secret_file(self.nix_workers_secret_name)) worker_names = [] @@ -826,63 +707,30 @@ class NixConfigurator(ConfiguratorBase): config["workers"].append(worker.Worker(worker_name, item["pass"])) worker_names.append(worker_name) - webhook_secret = read_secret_file(self.github.webhook_secret_name) eval_lock = util.MasterLock("nix-eval") - for project in projects: - create_project_hook( - project.owner, - project.repo, - self.github.token(), - self.url + "change_hook/github", - webhook_secret, - ) - config_for_project( - config, - project, - worker_names, - self.github, - self.nix_supported_systems, - self.nix_eval_worker_count or multiprocessing.cpu_count(), - self.nix_eval_max_memory_size, - eval_lock, - self.cachix, - self.outputs_path, - ) + # TODO: initialize Lix + # config_for_project( + # config, + # project, + # worker_names, + # self.nix_supported_systems, + # self.nix_eval_worker_count or multiprocessing.cpu_count(), + # self.nix_eval_max_memory_size, + # eval_lock, + # self.cachix, + # self.outputs_path, + # ) - # Reload github projects - config["builders"].append( - reload_github_projects( - [worker_names[0]], - self.github.token(), - self.github.project_cache_file, - ), - ) config["workers"].append(worker.LocalWorker(SKIPPED_BUILDER_NAME)) - config["schedulers"].extend( - [ - schedulers.ForceScheduler( - name="reload-github-projects", - builderNames=["reload-github-projects"], - buttonName="Update projects", - ), - # project list twice a day and on startup - PeriodicWithStartup( - name="reload-github-projects-bidaily", - builderNames=["reload-github-projects"], - periodicBuildTimer=12 * 60 * 60, - run_on_startup=not self.github.project_cache_file.exists(), - ), - ], - ) config["services"].append( - reporters.GitHubStatusPush( - token=self.github.token(), - # Since we dynamically create build steps, - # we use `virtual_builder_name` in the webinterface - # so that we distinguish what has beeing build - context=Interpolate("buildbot/%(prop:status_name)s"), - ), + reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, + reviewCB=gerritReviewCB, + reviewArg=self.url, + startCB=gerritStartCB, + startArg=self.url, + summaryCB=gerritSummaryCB, + summaryArg=self.url) ) systemd_secrets = secrets.SecretInAFile( @@ -893,26 +741,5 @@ class NixConfigurator(ConfiguratorBase): config["www"].setdefault("plugins", {}) config["www"]["plugins"].update(dict(base_react={})) - config["www"].setdefault("change_hook_dialects", {}) - config["www"]["change_hook_dialects"]["github"] = { - "secret": webhook_secret, - "strict": True, - "token": self.github.token(), - "github_property_whitelist": "*", - } - if "auth" not in config["www"]: - config["www"].setdefault("avatar_methods", []) - config["www"]["avatar_methods"].append( - util.AvatarGitHub(token=self.github.token()), - ) - config["www"]["auth"] = util.GitHubAuth( - self.github.oauth_id, - read_secret_file(self.github.oauth_secret_name), - apiVersion=4, - ) - - config["www"]["authz"] = setup_authz( - admins=self.github.admins, - projects=projects, - ) + config["www"]["auth"] = LixSystemsOAuth2() From 329d9dd6d4e9c50bc139863e08d7d5e911826eea Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 16:11:47 +0100 Subject: [PATCH 02/45] nix/coordinator: rename it into what this really is No need to use legacy names. Signed-off-by: Raito Bezarius --- flake.nix | 2 +- nix/{master.nix => coordinator.nix} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename nix/{master.nix => coordinator.nix} (100%) diff --git a/flake.nix b/flake.nix index e6f5dab..b06669d 100644 --- a/flake.nix +++ b/flake.nix @@ -20,7 +20,7 @@ ] ++ inputs.nixpkgs.lib.optional (inputs.treefmt-nix ? flakeModule) ./nix/treefmt/flake-module.nix; systems = [ "x86_64-linux" ]; flake = { - nixosModules.buildbot-master = ./nix/master.nix; + nixosModules.buildbot-coordinator = ./nix/coordinator.nix; nixosModules.buildbot-worker = ./nix/worker.nix; nixosConfigurations = diff --git a/nix/master.nix b/nix/coordinator.nix similarity index 100% rename from nix/master.nix rename to nix/coordinator.nix From beea96da2c7a70a187ef39a90dfce1b73a5c58f6 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 16:10:47 +0100 Subject: [PATCH 03/45] nix/coordinator: simplify the module Signed-off-by: Raito Bezarius --- nix/coordinator.nix | 82 +++++++-------------------------------------- 1 file changed, 13 insertions(+), 69 deletions(-) diff --git a/nix/coordinator.nix b/nix/coordinator.nix index 6383d5e..6d2a904 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -4,12 +4,12 @@ , ... }: let - cfg = config.services.buildbot-nix.master; + cfg = config.services.buildbot-nix.coordinator; in { options = { - services.buildbot-nix.master = { - enable = lib.mkEnableOption "buildbot-master"; + services.buildbot-nix.coordinator = { + enable = lib.mkEnableOption "buildbot-coordinator"; dbUrl = lib.mkOption { type = lib.types.str; default = "postgresql://@/buildbot"; @@ -34,48 +34,6 @@ in description = "Cachix auth token"; }; }; - github = { - tokenFile = lib.mkOption { - type = lib.types.path; - description = "Github token file"; - }; - webhookSecretFile = lib.mkOption { - type = lib.types.path; - description = "Github webhook secret file"; - }; - oauthSecretFile = lib.mkOption { - type = lib.types.path; - description = "Github oauth secret file"; - }; - # TODO: make this an option - # https://github.com/organizations/numtide/settings/applications - # Application name: BuildBot - # Homepage URL: https://buildbot.numtide.com - # Authorization callback URL: https://buildbot.numtide.com/auth/login - # oauth_token: 2516248ec6289e4d9818122cce0cbde39e4b788d - oauthId = lib.mkOption { - type = lib.types.str; - description = "Github oauth id. Used for the login button"; - }; - # Most likely you want to use the same user as for the buildbot - user = lib.mkOption { - type = lib.types.str; - description = "Github user that is used for the buildbot"; - }; - admins = lib.mkOption { - type = lib.types.listOf lib.types.str; - default = [ ]; - description = "Users that are allowed to login to buildbot, trigger builds and change settings"; - }; - topic = lib.mkOption { - type = lib.types.nullOr lib.types.str; - default = "build-with-buildbot"; - description = '' - Projects that have this topic will be built by buildbot. - If null, all projects that the buildbot github user has access to, are built. - ''; - }; - }; workersFile = lib.mkOption { type = lib.types.path; description = "File containing a list of nix workers"; @@ -144,7 +102,7 @@ in home = "/var/lib/buildbot"; extraImports = '' from datetime import timedelta - from buildbot_nix import GithubConfig, NixConfigurator, CachixConfig + from buildbot_nix import NixConfigurator, CachixConfig ''; configurators = [ '' @@ -152,18 +110,10 @@ in '' '' NixConfigurator( - github=GithubConfig( - oauth_id=${builtins.toJSON cfg.github.oauthId}, - admins=${builtins.toJSON cfg.github.admins}, - buildbot_user=${builtins.toJSON cfg.github.user}, - topic=${builtins.toJSON cfg.github.topic}, + gerrit=GerritConfig( + ... ), - cachix=${if cfg.cachix.name == null then "None" else "CachixConfig( - name=${builtins.toJSON cfg.cachix.name}, - signing_key_secret_name=${if cfg.cachix.signingKeyFile != null then builtins.toJSON "cachix-signing-key" else "None"}, - auth_token_secret_name=${if cfg.cachix.authTokenFile != null then builtins.toJSON "cachix-auth-token" else "None"}, - )"}, - url=${builtins.toJSON config.services.buildbot-master.buildbotUrl}, + url=${builtins.toJSON config.services.buildbot-coordinator.buildbotUrl}, nix_eval_max_memory_size=${builtins.toJSON cfg.evalMaxMemorySize}, nix_eval_worker_count=${if cfg.evalWorkerCount == null then "None" else builtins.toString cfg.evalWorkerCount}, nix_supported_systems=${builtins.toJSON cfg.buildSystems}, @@ -177,7 +127,7 @@ in hasSSL = host.forceSSL || host.addSSL; in "${if hasSSL then "https" else "http"}://${cfg.domain}/"; - dbUrl = config.services.buildbot-nix.master.dbUrl; + dbUrl = config.services.buildbot-nix.coordinator.dbUrl; pythonPackages = ps: [ ps.requests ps.treq @@ -193,9 +143,6 @@ in serviceConfig = { # in master.py we read secrets from $CREDENTIALS_DIRECTORY LoadCredential = [ - "github-token:${cfg.github.tokenFile}" - "github-webhook-secret:${cfg.github.webhookSecretFile}" - "github-oauth-secret:${cfg.github.oauthSecretFile}" "buildbot-nix-workers:${cfg.workersFile}" ] ++ lib.optional (cfg.cachix.signingKeyFile != null) @@ -217,14 +164,14 @@ in services.nginx.enable = true; services.nginx.virtualHosts.${cfg.domain} = { locations = { - "/".proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-master.port}/"; + "/".proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-coordinator.port}/"; "/sse" = { - proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-master.port}/sse"; + proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-coordinator.port}/sse"; # proxy buffering will prevent sse to work extraConfig = "proxy_buffering off;"; }; "/ws" = { - proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-master.port}/ws"; + proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-coordinator.port}/ws"; proxyWebsockets = true; # raise the proxy timeout for the websocket extraConfig = "proxy_read_timeout 6000s;"; @@ -234,11 +181,8 @@ in }; }; - systemd.tmpfiles.rules = [ - # delete legacy gcroot location, can be dropped after 2024-06-01 - "R /var/lib/buildbot-worker/gcroot - - - - -" - ] ++ lib.optional (cfg.outputsPath != null) - # Allow buildbot-master to write to this directory + systemd.tmpfiles.rules = lib.optional (cfg.outputsPath != null) + # Allow buildbot-coordinator to write to this directory "d ${cfg.outputsPath} 0755 buildbot buildbot - -"; }; } From 5f7b3e0cdbd0afe0a6e2a0bf451783fb5a053799 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 16:23:03 +0100 Subject: [PATCH 04/45] buildbot: instantiate fully LixSystemsOAuth2 Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 17084af..fbcca6e 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -37,11 +37,12 @@ SKIPPED_BUILDER_NAME = "skipped-builds" log = Logger() class LixSystemsOAuth2(OAuth2Auth): - name = 'identity-lix-systems' - faIcon = '...' - resourceEndpoint = '' - authUri = '' - tokenUri = '' + name = 'Lix' + faIcon = 'fa-login' + resourceEndpoint = "https://identity.lix.systems" + # is passing scope necessary? + authUri = 'https://identity.lix.systems/realms/lix-project/protocol/openid-connect/auth' + tokenUri = 'https://identity.lix.systems/realms/lix-project/protocol/openid-connect/token' class BuildbotNixError(Exception): pass @@ -722,6 +723,7 @@ class GerritNixConfigurator(ConfiguratorBase): # self.outputs_path, # ) + config["change_source"] = self.gerrit_change_source config["workers"].append(worker.LocalWorker(SKIPPED_BUILDER_NAME)) config["services"].append( reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, @@ -742,4 +744,4 @@ class GerritNixConfigurator(ConfiguratorBase): config["www"]["plugins"].update(dict(base_react={})) if "auth" not in config["www"]: - config["www"]["auth"] = LixSystemsOAuth2() + config["www"]["auth"] = LixSystemsOAuth2('buildbot', read_secret_file('buildbot-oauth2-secret'), autologin=True) From 7ace78107c8cf13ecba879c75ad2f482610fa788 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 17:34:52 +0100 Subject: [PATCH 05/45] buildbot: further Gerritification - `BuildTrigger` still depends on `github` properties. Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index fbcca6e..fb8658b 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -49,6 +49,7 @@ class BuildbotNixError(Exception): @dataclass class GerritProject: + # `project` field. name: str class BuildTrigger(Trigger): @@ -322,7 +323,6 @@ def nix_eval_config( # check out the source factory.addStep( steps.Gerrit( - repourl=project.url, mode="full", retry=[60, 60], timeout=3600 @@ -536,28 +536,28 @@ def config_for_project( config["projects"].append(Project(project.name)) config["schedulers"].extend( [ - # build all pull requests + # build everything pertaining to a project + # TODO(raito): will this catch also post-merge? we don't really care about that… do we? schedulers.SingleBranchScheduler( - name=f"{project.project_id}-prs", + name=f"{project.name}-changes", change_filter=util.ChangeFilter( - repository=project.url, - category="pull", + project=project.name, ), builderNames=[f"{project.name}/nix-eval"], ), # this is triggered from `nix-eval` schedulers.Triggerable( - name=f"{project.project_id}-nix-build", + name=f"{project.name}-nix-build", builderNames=[f"{project.name}/nix-build"], ), # this is triggered from `nix-eval` when the build is skipped schedulers.Triggerable( - name=f"{project.project_id}-nix-skipped-build", + name=f"{project.name}-nix-skipped-build", builderNames=[f"{project.name}/nix-skipped-build"], ), # allow to manually trigger a nix-build schedulers.ForceScheduler( - name=f"{project.project_id}-force", + name=f"{project.name}-force", builderNames=[f"{project.name}/nix-eval"], properties=[ util.StringParameter( @@ -710,18 +710,18 @@ class GerritNixConfigurator(ConfiguratorBase): eval_lock = util.MasterLock("nix-eval") - # TODO: initialize Lix - # config_for_project( - # config, - # project, - # worker_names, - # self.nix_supported_systems, - # self.nix_eval_worker_count or multiprocessing.cpu_count(), - # self.nix_eval_max_memory_size, - # eval_lock, - # self.cachix, - # self.outputs_path, - # ) + # Configure the Lix project. + config_for_project( + config, + GerritProject(name="lix"), + worker_names, + self.nix_supported_systems, + self.nix_eval_worker_count or multiprocessing.cpu_count(), + self.nix_eval_max_memory_size, + eval_lock, + self.cachix, + self.outputs_path, + ) config["change_source"] = self.gerrit_change_source config["workers"].append(worker.LocalWorker(SKIPPED_BUILDER_NAME)) From 101612eb70f92f13c803bab7b2af0c3cc8b6c903 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 18:39:34 +0100 Subject: [PATCH 06/45] nix/coordinator: fix various errors Signed-off-by: Raito Bezarius --- nix/coordinator.nix | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/nix/coordinator.nix b/nix/coordinator.nix index 6d2a904..cc4577e 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -113,7 +113,7 @@ in gerrit=GerritConfig( ... ), - url=${builtins.toJSON config.services.buildbot-coordinator.buildbotUrl}, + url=${builtins.toJSON config.services.buildbot-master.buildbotUrl}, nix_eval_max_memory_size=${builtins.toJSON cfg.evalMaxMemorySize}, nix_eval_worker_count=${if cfg.evalWorkerCount == null then "None" else builtins.toString cfg.evalWorkerCount}, nix_supported_systems=${builtins.toJSON cfg.buildSystems}, @@ -127,7 +127,7 @@ in hasSSL = host.forceSSL || host.addSSL; in "${if hasSSL then "https" else "http"}://${cfg.domain}/"; - dbUrl = config.services.buildbot-nix.coordinator.dbUrl; + dbUrl = cfg.dbUrl; pythonPackages = ps: [ ps.requests ps.treq @@ -162,16 +162,20 @@ in }; services.nginx.enable = true; - services.nginx.virtualHosts.${cfg.domain} = { + services.nginx.virtualHosts.${cfg.domain} = + let + port = config.services.buildbot-master.port; + in + { locations = { - "/".proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-coordinator.port}/"; + "/".proxyPass = "http://127.0.0.1:${builtins.toString port}/"; "/sse" = { - proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-coordinator.port}/sse"; + proxyPass = "http://127.0.0.1:${builtins.toString port}/sse"; # proxy buffering will prevent sse to work extraConfig = "proxy_buffering off;"; }; "/ws" = { - proxyPass = "http://127.0.0.1:${builtins.toString config.services.buildbot-coordinator.port}/ws"; + proxyPass = "http://127.0.0.1:${builtins.toString port}/ws"; proxyWebsockets = true; # raise the proxy timeout for the websocket extraConfig = "proxy_read_timeout 6000s;"; From 81bd57ffac3cefc6a0f5cbe9cd527bacb0b26769 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 18:43:14 +0100 Subject: [PATCH 07/45] nix/coordinator: instantiate the GerritNixConfigurator Signed-off-by: Raito Bezarius --- nix/coordinator.nix | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nix/coordinator.nix b/nix/coordinator.nix index cc4577e..6f380ba 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -102,17 +102,17 @@ in home = "/var/lib/buildbot"; extraImports = '' from datetime import timedelta - from buildbot_nix import NixConfigurator, CachixConfig + from buildbot_nix import GerritNixConfigurator, CachixConfig ''; configurators = [ '' util.JanitorConfigurator(logHorizon=timedelta(weeks=4), hour=12, dayOfWeek=6) '' '' - NixConfigurator( - gerrit=GerritConfig( - ... - ), + GerritNixConfigurator( + "gerrit.lix.systems", + "buildbot", + 2022, url=${builtins.toJSON config.services.buildbot-master.buildbotUrl}, nix_eval_max_memory_size=${builtins.toJSON cfg.evalMaxMemorySize}, nix_eval_worker_count=${if cfg.evalWorkerCount == null then "None" else builtins.toString cfg.evalWorkerCount}, From 7ad9c1a3786399148512c514a08e2f7b24ccccc1 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 18:47:15 +0100 Subject: [PATCH 08/45] nix/coordinator: introduce OAuth2 client secret Signed-off-by: Raito Bezarius --- nix/coordinator.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nix/coordinator.nix b/nix/coordinator.nix index 6f380ba..c3b7359 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -38,6 +38,10 @@ in type = lib.types.path; description = "File containing a list of nix workers"; }; + oauth2SecretFile = lib.mkOption { + type = lib.types.path; + description = "File containing an OAuth 2 client secret"; + }; buildSystems = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ pkgs.hostPlatform.system ]; @@ -144,6 +148,7 @@ in # in master.py we read secrets from $CREDENTIALS_DIRECTORY LoadCredential = [ "buildbot-nix-workers:${cfg.workersFile}" + "buildbot-oauth2-secret:${cfg.oauth2SecretFile}" ] ++ lib.optional (cfg.cachix.signingKeyFile != null) "cachix-signing-key:${builtins.toString cfg.cachix.signingKeyFile}" From 5a8ab145e61420ed6a67984e6f4214f580a37a0b Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 19:00:41 +0100 Subject: [PATCH 09/45] buildbot: add repourl Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index fb8658b..0a94e87 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -323,6 +323,7 @@ def nix_eval_config( # check out the source factory.addStep( steps.Gerrit( + repourl="git://git@git.lix.systems/lix-project/lix", mode="full", retry=[60, 60], timeout=3600 From 9f98533dd767f3623ddf8bd0c26f54beedf787ac Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 19:33:59 +0100 Subject: [PATCH 10/45] =?UTF-8?q?nix/worker:=20rename=20master=20=E2=86=92?= =?UTF-8?q?=20coordinator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raito Bezarius --- nix/worker.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nix/worker.nix b/nix/worker.nix index cf804fa..ab56a38 100644 --- a/nix/worker.nix +++ b/nix/worker.nix @@ -19,10 +19,10 @@ in defaultText = "pkgs.buildbot-worker"; description = "The buildbot-worker package to use."; }; - masterUrl = lib.mkOption { + coordinatorUrl = lib.mkOption { type = lib.types.str; default = "tcp:host=localhost:port=9989"; - description = "The buildbot master url."; + description = "The buildbot coordinator url."; }; workerPasswordFile = lib.mkOption { type = lib.types.path; @@ -61,7 +61,7 @@ in pkgs.nix-eval-jobs ]; environment.PYTHONPATH = "${python.withPackages (_: [cfg.package])}/${python.sitePackages}"; - environment.MASTER_URL = cfg.masterUrl; + environment.MASTER_URL = cfg.coordinatorUrl; environment.BUILDBOT_DIR = buildbotDir; serviceConfig = { From 94e3d7aeda8af34935137a5e1810ead6d77258c8 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 21:35:59 +0100 Subject: [PATCH 11/45] nix/coordinator: support specific private SSH keys Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 0a94e87..c4ef97a 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -310,6 +310,7 @@ class GitWithRetry(steps.Git): def nix_eval_config( project: GerritProject, + gerrit_private_key: str, worker_names: list[str], supported_systems: list[str], eval_lock: util.MasterLock, @@ -326,7 +327,8 @@ def nix_eval_config( repourl="git://git@git.lix.systems/lix-project/lix", mode="full", retry=[60, 60], - timeout=3600 + timeout=3600, + sshPrivateKey=gerrit_private_key ), ) drv_gcroots_dir = util.Interpolate( @@ -570,12 +572,20 @@ def config_for_project( ), ], ) + gerrit_private_key = None + with open('/var/lib/buildbot/master/id_gerrit', 'r') as f: + gerrit_private_key = f.read() + + if gerrit_private_key is None: + raise RuntimeError('No gerrit private key to fetch the repositories') + config["builders"].extend( [ # Since all workers run on the same machine, we only assign one of them to do the evaluation. # This should prevent exessive memory usage. nix_eval_config( project, + gerrit_private_key, worker_names, supported_systems=nix_supported_systems, worker_count=nix_eval_worker_count, @@ -670,6 +680,7 @@ class GerritNixConfigurator(ConfiguratorBase): gerrit_server: str, gerrit_user: str, gerrit_port: int, + gerrit_sshkey_path: str, url: str, nix_supported_systems: list[str], nix_eval_worker_count: int | None, @@ -686,7 +697,7 @@ class GerritNixConfigurator(ConfiguratorBase): self.nix_eval_max_memory_size = nix_eval_max_memory_size self.nix_eval_worker_count = nix_eval_worker_count self.nix_supported_systems = nix_supported_systems - self.gerrit_change_source = GerritChangeSource(gerrit_server, gerrit_user, gerritport=gerrit_port) + self.gerrit_change_source = GerritChangeSource(gerrit_server, gerrit_user, gerritport=gerrit_port, identity_file=gerrit_sshkey_path) self.url = url self.cachix = cachix if outputs_path is None: From e92a2225f7736e78eed7e40adac063527be91077 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 21:36:47 +0100 Subject: [PATCH 12/45] nix/coordinator: use a special key for service account Signed-off-by: Raito Bezarius --- nix/coordinator.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/nix/coordinator.nix b/nix/coordinator.nix index c3b7359..80a21f0 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -117,6 +117,7 @@ in "gerrit.lix.systems", "buildbot", 2022, + "/var/lib/buildbot/master/id_gerrit", url=${builtins.toJSON config.services.buildbot-master.buildbotUrl}, nix_eval_max_memory_size=${builtins.toJSON cfg.evalMaxMemorySize}, nix_eval_worker_count=${if cfg.evalWorkerCount == null then "None" else builtins.toString cfg.evalWorkerCount}, From 670a1d1281c60a28a201fa03c2579f22e109f179 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 22:48:44 +0100 Subject: [PATCH 13/45] nix/coordinator: fix repourl Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index c4ef97a..1b1bdff 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -324,7 +324,7 @@ def nix_eval_config( # check out the source factory.addStep( steps.Gerrit( - repourl="git://git@git.lix.systems/lix-project/lix", + repourl="ssh://buildbot@gerrit.lix.systems:2022/lix", mode="full", retry=[60, 60], timeout=3600, @@ -565,7 +565,7 @@ def config_for_project( properties=[ util.StringParameter( name="project", - label="Name of the GitHub repository.", + label="Name of the Gerrit repository.", default=project.name, ), ], From 3895b90493891943dbd92521c61a6404c9be665f Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 22:48:49 +0100 Subject: [PATCH 14/45] nix/coordinator: disable reporter Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 1b1bdff..feefa00 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -737,15 +737,15 @@ class GerritNixConfigurator(ConfiguratorBase): config["change_source"] = self.gerrit_change_source config["workers"].append(worker.LocalWorker(SKIPPED_BUILDER_NAME)) - config["services"].append( - reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, - reviewCB=gerritReviewCB, - reviewArg=self.url, - startCB=gerritStartCB, - startArg=self.url, - summaryCB=gerritSummaryCB, - summaryArg=self.url) - ) + # config["services"].append( + # reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, + # reviewCB=gerritReviewCB, + # reviewArg=self.url, + # startCB=gerritStartCB, + # startArg=self.url, + # summaryCB=gerritSummaryCB, + # summaryArg=self.url) + # ) systemd_secrets = secrets.SecretInAFile( dirname=os.environ["CREDENTIALS_DIRECTORY"], From 2bcc0ee4e9daf77815c409c40144469b314e5d5e Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 2 Mar 2024 23:22:36 +0100 Subject: [PATCH 15/45] nix/coordinator: hardcode remaining github properties to lix Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index feefa00..a88bd55 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -85,21 +85,13 @@ class BuildTrigger(Trigger): def getSchedulersAndProperties(self) -> list[tuple[str, Properties]]: # noqa: N802 build_props = self.build.getProperties() - repo_name = build_props.getProperty( - "github.base.repo.full_name", - build_props.getProperty("github.repository.full_name"), - ) - project_id = slugify_project_name(repo_name) - source = f"nix-eval-{project_id}" + source = f"nix-eval-lix" triggered_schedulers = [] for job in self.jobs: attr = job.get("attr", "eval-error") name = attr - if repo_name is not None: - name = f"github:{repo_name}#checks.{name}" - else: - name = f"checks.{name}" + name = f"checks.{name}" error = job.get("error") props = Properties() props.setProperty("virtual_builder_name", name, source) @@ -180,11 +172,6 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): raise BuildbotNixError(msg) from e jobs.append(job) build_props = self.build.getProperties() - repo_name = build_props.getProperty( - "github.base.repo.full_name", - build_props.getProperty("github.repository.full_name"), - ) - project_id = slugify_project_name(repo_name) filtered_jobs = [] for job in jobs: system = job.get("system") @@ -194,8 +181,8 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): self.build.addStepsAfterCurrentStep( [ BuildTrigger( - builds_scheduler=f"{project_id}-nix-build", - skipped_builds_scheduler=f"{project_id}-nix-skipped-build", + builds_scheduler=f"lix-nix-build", + skipped_builds_scheduler=f"lix-nix-skipped-build", name="build flake", jobs=filtered_jobs, ), From 9a67a1f2cc58fbc1622a550ecd49060ff89a7a16 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sun, 3 Mar 2024 02:32:05 +0100 Subject: [PATCH 16/45] nix/coordinator: enable reporting to Gerrit Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index a88bd55..ad2210d 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -724,15 +724,17 @@ class GerritNixConfigurator(ConfiguratorBase): config["change_source"] = self.gerrit_change_source config["workers"].append(worker.LocalWorker(SKIPPED_BUILDER_NAME)) - # config["services"].append( - # reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, - # reviewCB=gerritReviewCB, - # reviewArg=self.url, - # startCB=gerritStartCB, - # startArg=self.url, - # summaryCB=gerritSummaryCB, - # summaryArg=self.url) - # ) + config["services"].append( + reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, + port=2022, + identity_file='/var/lib/buildbot/master/id_gerrit', + reviewCB=gerritReviewCB, + reviewArg=self.url, + startCB=gerritStartCB, + startArg=self.url, + summaryCB=gerritSummaryCB, + summaryArg=self.url) + ) systemd_secrets = secrets.SecretInAFile( dirname=os.environ["CREDENTIALS_DIRECTORY"], From 322944f8d1f2ab257ce2032c070006931ac249a7 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Mon, 4 Mar 2024 20:37:13 +0100 Subject: [PATCH 17/45] nix/coordinator(reporting): disable startCB and reviewCB excessive noise. Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index ad2210d..b9a1740 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -728,10 +728,12 @@ class GerritNixConfigurator(ConfiguratorBase): reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, port=2022, identity_file='/var/lib/buildbot/master/id_gerrit', - reviewCB=gerritReviewCB, - reviewArg=self.url, - startCB=gerritStartCB, - startArg=self.url, + reviewCB=None, + startCB=None, + # reviewCB=gerritReviewCB, + # reviewArg=self.url, + # startCB=gerritStartCB, + # startArg=self.url, summaryCB=gerritSummaryCB, summaryArg=self.url) ) From 18d537e5d49c825071e3013261dfac720e27cda1 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Mon, 4 Mar 2024 20:50:12 +0100 Subject: [PATCH 18/45] nix/coordinator(reporting): re-enable reviewCB, disable summaryCB, skip evaluations Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index b9a1740..f172073 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -605,6 +605,9 @@ def gerritReviewCB(builderName, build, result, master, arg): if result == util.RETRY: return dict() + if builderName != 'lix/nix-eval': + return dict() + message = "Buildbot finished compiling your patchset\n" message += "on configuration: %s\n" % builderName message += "The result is: %s\n" % util.Results[result].upper() @@ -728,14 +731,16 @@ class GerritNixConfigurator(ConfiguratorBase): reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, port=2022, identity_file='/var/lib/buildbot/master/id_gerrit', - reviewCB=None, + summaryCB=None, startCB=None, - # reviewCB=gerritReviewCB, - # reviewArg=self.url, + wantSteps=True, + reviewCB=gerritReviewCB, + reviewArg=self.url) # startCB=gerritStartCB, # startArg=self.url, - summaryCB=gerritSummaryCB, - summaryArg=self.url) + # summaryCB=gerritSummaryCB, + # summaryArg=self.url) + ) systemd_secrets = secrets.SecretInAFile( From c487ada514b867b43b2b74a48620d550204f6ba9 Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Thu, 7 Mar 2024 04:40:58 +0000 Subject: [PATCH 19/45] review callback: Set labels, render list of failed checks in reporter --- buildbot_nix/__init__.py | 60 ++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index f172073..4d864ce 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -608,20 +608,64 @@ def gerritReviewCB(builderName, build, result, master, arg): if builderName != 'lix/nix-eval': return dict() - message = "Buildbot finished compiling your patchset\n" - message += "on configuration: %s\n" % builderName + all_checks = {} + for step in build['steps']: + if step['name'] != 'build flake': + continue + + for url in step['urls']: + if url['name'].startswith('success: checks.'): + path = url['name'].split(' ')[1] + all_checks[path] = (True, url['url']) + elif url['name'].startswith('failure: checks.'): + path = url['name'].split(' ')[1] + all_checks[path] = (False, url['url']) + + collected_oses = {} + for check in all_checks: + arch = check.split('.')[1] + os = arch.split('-')[1] + (success, failure) = collected_oses.get(os, (0, 0)) + if all_checks[check][0]: + success += 1 + else: + failure += 1 + + collected_oses[os] = (success, failure) + labels = {} + + if 'linux' in collected_oses: + (success, failure) = collected_oses['linux'] + if success > 0 and failure == 0: + labels['Verified-On-Linux'] = 1 + elif failure > 0: + labels['Verified-On-Linux'] = -1 + + if 'darwin' in collected_oses: + (success, failure) = collected_oses['darwin'] + if success > 0 and failure == 0: + labels['Verified-On-Darwin'] = 1 + elif failure > 0: + labels['Verified-On-Darwin'] = -1 + + message = "Buildbot finished compiling your patchset!\n" message += "The result is: %s\n" % util.Results[result].upper() + if result != util.SUCCESS: + successful_checks = [] + failed_checks = [] + for check in all_checks: + if not all_checks[check][0]: + failed_checks.append(f" - {check} (see {all_checks[check][1]})") + + if len(failed_checks) > 0: + message += "Failed checks:\n" + "\n".join(failed_checks) + "\n" + if arg: message += "\nFor more details visit:\n" message += build['url'] + "\n" - if result == util.SUCCESS: - verified = 1 - else: - verified = -1 - - return dict(message=message, labels={'Verified': verified}) + return dict(message=message, labels=labels) def gerritStartCB(builderName, build, arg): message = "Buildbot started compiling your patchset\n" From ec2ef903ab517087297adff189b153234fefa82c Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Fri, 8 Mar 2024 23:28:49 +0000 Subject: [PATCH 20/45] use .#hydraJobs rather than .#checks --- buildbot_nix/__init__.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 4d864ce..2f50695 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -91,11 +91,11 @@ class BuildTrigger(Trigger): for job in self.jobs: attr = job.get("attr", "eval-error") name = attr - name = f"checks.{name}" + name = f"hydraJobs.{name}" error = job.get("error") props = Properties() props.setProperty("virtual_builder_name", name, source) - props.setProperty("status_name", f"nix-build .#checks.{attr}", source) + props.setProperty("status_name", f"nix-build .#hydraJobs.{attr}", source) props.setProperty("virtual_builder_tags", "", source) if error is not None: @@ -153,7 +153,7 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): @defer.inlineCallbacks def run(self) -> Generator[Any, object, Any]: - # run nix-eval-jobs --flake .#checks to generate the dict of stages + # run nix-eval-jobs --flake .#hydraJobs to generate the dict of stages cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand() yield self.runCommand(cmd) @@ -341,7 +341,7 @@ def nix_eval_config( "--force-recurse", "--check-cache-status", "--flake", - ".#checks", + ".#hydraJobs", ], haltOnFailure=True, locks=[eval_lock.access("exclusive")], @@ -614,17 +614,21 @@ def gerritReviewCB(builderName, build, result, master, arg): continue for url in step['urls']: - if url['name'].startswith('success: checks.'): + if url['name'].startswith('success: hydraJobs.'): path = url['name'].split(' ')[1] all_checks[path] = (True, url['url']) - elif url['name'].startswith('failure: checks.'): + elif url['name'].startswith('failure: hydraJobs.'): path = url['name'].split(' ')[1] all_checks[path] = (False, url['url']) collected_oses = {} for check in all_checks: - arch = check.split('.')[1] - os = arch.split('-')[1] + arch = check.split('.')[-1] + if not arch.endswith('-linux') and not arch.endswith('-darwin'): + # Not an architecture-specific job, just a test + os = "test" + else: + os = arch.split('-')[1] (success, failure) = collected_oses.get(os, (0, 0)) if all_checks[check][0]: success += 1 From e1dfa0e545e2243ae04620e7d70c366bae37cc5f Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Sun, 10 Mar 2024 13:03:16 +0000 Subject: [PATCH 21/45] Remove cachix from dependencies The rest of the Cachix infrastructure is intact. For now. --- nix/worker.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/nix/worker.nix b/nix/worker.nix index ab56a38..28afdff 100644 --- a/nix/worker.nix +++ b/nix/worker.nix @@ -54,7 +54,6 @@ in after = [ "network.target" "buildbot-master.service" ]; wantedBy = [ "multi-user.target" ]; path = [ - pkgs.cachix pkgs.git pkgs.openssh pkgs.nix From fdfeef8ad415552f8866ec2f79847bed98e2b142 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 10 Mar 2024 17:34:22 +0100 Subject: [PATCH 22/45] remove retry logic retries don't help us very much, in fact they mostly hurt by repeating builds that failed for non-transient reasons. retries could help with workers dropping while running a build, but those rare cases are better to restart manually than to pend at least twice the ci time for commits that simply do not build cleanly. --- buildbot_nix/__init__.py | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 2f50695..dc17dce 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -2,7 +2,6 @@ import json import multiprocessing import os import sys -import uuid from collections import defaultdict from collections.abc import Generator from dataclasses import dataclass @@ -118,8 +117,6 @@ class BuildTrigger(Trigger): props.setProperty("system", system, source) props.setProperty("drv_path", drv_path, source) props.setProperty("out_path", out_path, source) - # we use this to identify builds when running a retry - props.setProperty("build_uuid", str(uuid.uuid4()), source) triggered_schedulers.append((self.builds_scheduler, props)) return triggered_schedulers @@ -192,24 +189,6 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): return result -# FIXME this leaks memory... but probably not enough that we care -class RetryCounter: - def __init__(self, retries: int) -> None: - self.builds: dict[uuid.UUID, int] = defaultdict(lambda: retries) - - def retry_build(self, build_id: uuid.UUID) -> int: - retries = self.builds[build_id] - if retries > 1: - self.builds[build_id] = retries - 1 - return retries - return 0 - - -# For now we limit this to two. Often this allows us to make the error log -# shorter because we won't see the logs for all previous succeeded builds -RETRY_COUNTER = RetryCounter(retries=2) - - class EvalErrorStep(steps.BuildStep): """Shows the error message of a failed evaluation.""" @@ -236,12 +215,7 @@ class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep): cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand() yield self.runCommand(cmd) - res = cmd.results() - if res == util.FAILURE: - retries = RETRY_COUNTER.retry_build(self.getProperty("build_uuid")) - if retries > 0: - return util.RETRY - return res + return cmd.results() class UpdateBuildOutput(steps.BuildStep): From 0b2545b0361a07e0eb100e3df9d27654f1f285b1 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Mon, 11 Mar 2024 05:37:00 +0100 Subject: [PATCH 23/45] remove unused GitWithRetry --- buildbot_nix/__init__.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index dc17dce..a858cd1 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -243,32 +243,6 @@ class UpdateBuildOutput(steps.BuildStep): return util.SUCCESS -# The builtin retry mechanism doesn't seem to work for github, -# since github is sometimes not delivering the pull request ref fast enough. -class GitWithRetry(steps.Git): - @defer.inlineCallbacks - def run_vc( - self, - branch: str, - revision: str, - patch: str, - ) -> Generator[Any, object, Any]: - retry_counter = 0 - while True: - try: - res = yield super().run_vc(branch, revision, patch) - except Exception as e: # noqa: BLE001 - retry_counter += 1 - if retry_counter == 3: - msg = "Failed to clone" - raise BuildbotNixError(msg) from e - log: Log = yield self.addLog("log") - yield log.addStderr(f"Retrying git clone (error: {e})\n") - yield asyncSleep(2 << retry_counter) # 2, 4, 8 - else: - return res - - def nix_eval_config( project: GerritProject, gerrit_private_key: str, From 753df8e34033fc4b5c237f5135b9222ed30320b4 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Mon, 11 Mar 2024 05:45:33 +0100 Subject: [PATCH 24/45] remove cachix we aren't using it and it's somewhat in the way of our efforts to improve scheduling and stuff. --- buildbot_nix/__init__.py | 34 ---------------------------------- examples/default.nix | 8 -------- nix/coordinator.nix | 34 ++-------------------------------- 3 files changed, 2 insertions(+), 74 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index a858cd1..43d9fe0 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -316,25 +316,9 @@ def nix_eval_config( ) -@dataclass -class CachixConfig: - name: str - signing_key_secret_name: str | None = None - auth_token_secret_name: str | None = None - - def cachix_env(self) -> dict[str, str]: - env = {} - if self.signing_key_secret_name is not None: - env["CACHIX_SIGNING_KEY"] = util.Secret(self.signing_key_secret_name) - if self.auth_token_secret_name is not None: - env["CACHIX_AUTH_TOKEN"] = util.Secret(self.auth_token_secret_name) - return env - - def nix_build_config( project: GerritProject, worker_names: list[str], - cachix: CachixConfig | None = None, outputs_path: Path | None = None, ) -> util.BuilderConfig: """Builds one nix flake attribute.""" @@ -365,19 +349,6 @@ def nix_build_config( haltOnFailure=True, ), ) - if cachix: - factory.addStep( - steps.ShellCommand( - name="Upload cachix", - env=cachix.cachix_env(), - command=[ - "cachix", - "push", - cachix.name, - util.Interpolate("result-%(prop:attr)s"), - ], - ), - ) factory.addStep( steps.ShellCommand( @@ -468,7 +439,6 @@ def config_for_project( nix_eval_worker_count: int, nix_eval_max_memory_size: int, eval_lock: util.MasterLock, - cachix: CachixConfig | None = None, outputs_path: Path | None = None, ) -> Project: config["projects"].append(Project(project.name)) @@ -530,7 +500,6 @@ def config_for_project( nix_build_config( project, worker_names, - cachix=cachix, outputs_path=outputs_path, ), nix_skipped_build_config(project, [SKIPPED_BUILDER_NAME]), @@ -672,7 +641,6 @@ class GerritNixConfigurator(ConfiguratorBase): nix_eval_worker_count: int | None, nix_eval_max_memory_size: int, nix_workers_secret_name: str = "buildbot-nix-workers", # noqa: S107 - cachix: CachixConfig | None = None, outputs_path: str | None = None, ) -> None: super().__init__() @@ -685,7 +653,6 @@ class GerritNixConfigurator(ConfiguratorBase): self.nix_supported_systems = nix_supported_systems self.gerrit_change_source = GerritChangeSource(gerrit_server, gerrit_user, gerritport=gerrit_port, identity_file=gerrit_sshkey_path) self.url = url - self.cachix = cachix if outputs_path is None: self.outputs_path = None else: @@ -717,7 +684,6 @@ class GerritNixConfigurator(ConfiguratorBase): self.nix_eval_worker_count or multiprocessing.cpu_count(), self.nix_eval_max_memory_size, eval_lock, - self.cachix, self.outputs_path, ) diff --git a/examples/default.nix b/examples/default.nix index f9fb42e..f59a01a 100644 --- a/examples/default.nix +++ b/examples/default.nix @@ -46,14 +46,6 @@ in # optional nix-eval-jobs settings # evalWorkerCount = 8; # limit number of concurrent evaluations # evalMaxMemorySize = "2048"; # limit memory usage per evaluation - - # optional cachix - #cachix = { - # name = "my-cachix"; - # # One of the following is required: - # signingKey = "/var/lib/secrets/cachix-key"; - # authToken = "/var/lib/secrets/cachix-token"; - #}; }; }) buildbot-nix.nixosModules.buildbot-master diff --git a/nix/coordinator.nix b/nix/coordinator.nix index 80a21f0..797d339 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -15,25 +15,6 @@ in default = "postgresql://@/buildbot"; description = "Postgresql database url"; }; - cachix = { - name = lib.mkOption { - type = lib.types.nullOr lib.types.str; - default = null; - description = "Cachix name"; - }; - - signingKeyFile = lib.mkOption { - type = lib.types.nullOr lib.types.path; - default = null; - description = "Cachix signing key"; - }; - - authTokenFile = lib.mkOption { - type = lib.types.nullOr lib.types.str; - default = null; - description = "Cachix auth token"; - }; - }; workersFile = lib.mkOption { type = lib.types.path; description = "File containing a list of nix workers"; @@ -88,13 +69,6 @@ in isSystemUser = true; }; - assertions = [ - { - assertion = cfg.cachix.name != null -> cfg.cachix.signingKeyFile != null || cfg.cachix.authTokenFile != null; - message = "if cachix.name is provided, then cachix.signingKeyFile and cachix.authTokenFile must be set"; - } - ]; - services.buildbot-master = { enable = true; @@ -106,7 +80,7 @@ in home = "/var/lib/buildbot"; extraImports = '' from datetime import timedelta - from buildbot_nix import GerritNixConfigurator, CachixConfig + from buildbot_nix import GerritNixConfigurator ''; configurators = [ '' @@ -150,11 +124,7 @@ in LoadCredential = [ "buildbot-nix-workers:${cfg.workersFile}" "buildbot-oauth2-secret:${cfg.oauth2SecretFile}" - ] - ++ lib.optional (cfg.cachix.signingKeyFile != null) - "cachix-signing-key:${builtins.toString cfg.cachix.signingKeyFile}" - ++ lib.optional (cfg.cachix.authTokenFile != null) - "cachix-auth-token:${builtins.toString cfg.cachix.authTokenFile}"; + ]; }; }; From 156e6e3deae3edd06de7de1dc1dd20de0b6a4c74 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Mon, 11 Mar 2024 06:24:51 +0100 Subject: [PATCH 25/45] remove skipped-builds builder run all of them on the normal build worker. this significantly simplifies the overall scheduler/builder config and removes a triplication of possible builds paths. --- buildbot_nix/__init__.py | 80 +++++++++------------------------------- 1 file changed, 17 insertions(+), 63 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 43d9fe0..b795afe 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -31,8 +31,6 @@ from .github_projects import ( slugify_project_name, ) -SKIPPED_BUILDER_NAME = "skipped-builds" - log = Logger() class LixSystemsOAuth2(OAuth2Auth): @@ -57,7 +55,6 @@ class BuildTrigger(Trigger): def __init__( self, builds_scheduler: str, - skipped_builds_scheduler: str, jobs: list[dict[str, Any]], **kwargs: Any, ) -> None: @@ -66,11 +63,10 @@ class BuildTrigger(Trigger): self.jobs = jobs self.config = None self.builds_scheduler = builds_scheduler - self.skipped_builds_scheduler = skipped_builds_scheduler Trigger.__init__( self, waitForFinish=True, - schedulerNames=[builds_scheduler, skipped_builds_scheduler], + schedulerNames=[builds_scheduler], haltOnFailure=True, flunkOnFailure=True, sourceStamps=[], @@ -99,11 +95,7 @@ class BuildTrigger(Trigger): if error is not None: props.setProperty("error", error, source) - triggered_schedulers.append((self.skipped_builds_scheduler, props)) - continue - - if job.get("isCached"): - triggered_schedulers.append((self.skipped_builds_scheduler, props)) + triggered_schedulers.append((self.builds_scheduler, props)) continue drv_path = job.get("drvPath") @@ -117,6 +109,7 @@ class BuildTrigger(Trigger): props.setProperty("system", system, source) props.setProperty("drv_path", drv_path, source) props.setProperty("out_path", out_path, source) + props.setProperty("isCached", job.get("isCached"), source) triggered_schedulers.append((self.builds_scheduler, props)) return triggered_schedulers @@ -179,7 +172,6 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): [ BuildTrigger( builds_scheduler=f"lix-nix-build", - skipped_builds_scheduler=f"lix-nix-skipped-build", name="build flake", jobs=filtered_jobs, ), @@ -189,19 +181,6 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): return result -class EvalErrorStep(steps.BuildStep): - """Shows the error message of a failed evaluation.""" - - @defer.inlineCallbacks - def run(self) -> Generator[Any, object, Any]: - error = self.getProperty("error") - attr = self.getProperty("attr") - # show eval error - error_log: Log = yield self.addLog("nix_error") - error_log.addStderr(f"{attr} failed to evaluate:\n{error}") - return util.FAILURE - - class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep): """Builds a nix derivation.""" @@ -211,6 +190,20 @@ class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep): @defer.inlineCallbacks def run(self) -> Generator[Any, object, Any]: + if error := self.getProperty("error"): + attr = self.getProperty("attr") + # show eval error + error_log: Log = yield self.addLog("nix_error") + error_log.addStderr(f"{attr} failed to evaluate:\n{error}") + return util.FAILURE + + if self.getProperty("isCached"): + yield self.addCompleteLog( + "cached outpath from previous builds", + # buildbot apparently hides the first line in the ui? + f'\n{self.getProperty("out_path")}\n') + return util.SKIPPED + # run `nix build` cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand() yield self.runCommand(cmd) @@ -390,38 +383,6 @@ def nix_build_config( ) -def nix_skipped_build_config( - project: GerritProject, - worker_names: list[str], -) -> util.BuilderConfig: - """Dummy builder that is triggered when a build is skipped.""" - factory = util.BuildFactory() - factory.addStep( - EvalErrorStep( - name="Nix evaluation", - doStepIf=lambda s: s.getProperty("error"), - hideStepIf=lambda _, s: not s.getProperty("error"), - ), - ) - - # This is just a dummy step showing the cached build - factory.addStep( - steps.BuildStep( - name="Nix build (cached)", - doStepIf=lambda _: False, - hideStepIf=lambda _, s: s.getProperty("error"), - ), - ) - return util.BuilderConfig( - name=f"{project.name}/nix-skipped-build", - project=project.name, - workernames=worker_names, - collapseRequests=False, - env={}, - factory=factory, - ) - - def read_secret_file(secret_name: str) -> str: directory = os.environ.get("CREDENTIALS_DIRECTORY") if directory is None: @@ -458,11 +419,6 @@ def config_for_project( name=f"{project.name}-nix-build", builderNames=[f"{project.name}/nix-build"], ), - # this is triggered from `nix-eval` when the build is skipped - schedulers.Triggerable( - name=f"{project.name}-nix-skipped-build", - builderNames=[f"{project.name}/nix-skipped-build"], - ), # allow to manually trigger a nix-build schedulers.ForceScheduler( name=f"{project.name}-force", @@ -502,7 +458,6 @@ def config_for_project( worker_names, outputs_path=outputs_path, ), - nix_skipped_build_config(project, [SKIPPED_BUILDER_NAME]), ], ) @@ -688,7 +643,6 @@ class GerritNixConfigurator(ConfiguratorBase): ) config["change_source"] = self.gerrit_change_source - config["workers"].append(worker.LocalWorker(SKIPPED_BUILDER_NAME)) config["services"].append( reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, port=2022, From f869b52a8d4ecb2c8d1076639c3286d6e3956cfd Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Mon, 11 Mar 2024 06:40:14 +0100 Subject: [PATCH 26/45] use build-local gc-root directory without this two builds can interfere with each other if: - builds 1 and 2 start - build 1 is starved of workers - build 2 finishes, removes the shared gcroots directory - gc runs - build 1 schedules more builds whose .drvs have now been removed using a dedicated directory for each build fixes this. we now also need to set alwaysRun on the cleanup command or we risk littering the system with stale gc roots when a build fails. --- buildbot_nix/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index b795afe..fc58e34 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -259,8 +259,12 @@ def nix_eval_config( sshPrivateKey=gerrit_private_key ), ) + # use one gcroots directory per worker. this should be scoped to the largest unique resource + # in charge of builds (ie, buildnumber is too narrow) to not litter the system with permanent + # gcroots in case of worker restarts. + # TODO perhaps we should clean the entire /drvs/ directory up too during startup. drv_gcroots_dir = util.Interpolate( - "/nix/var/nix/gcroots/per-user/buildbot-worker/%(prop:project)s/drvs/", + "/nix/var/nix/gcroots/per-user/buildbot-worker/%(prop:project)s/drvs/%(prop:workername)s/", ) factory.addStep( @@ -297,6 +301,7 @@ def nix_eval_config( "-rf", drv_gcroots_dir, ], + alwaysRun=True, ), ) From e9874c3d987eebe77e0bd6c665f4069eb067e711 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 10 Mar 2024 21:27:45 +0100 Subject: [PATCH 27/45] wip: dependency-tracked build triggering --- buildbot_nix/__init__.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index fc58e34..5e28500 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -56,11 +56,13 @@ class BuildTrigger(Trigger): self, builds_scheduler: str, jobs: list[dict[str, Any]], + drv_info: dict[str, Any], **kwargs: Any, ) -> None: if "name" not in kwargs: kwargs["name"] = "trigger" self.jobs = jobs + self.drv_info = drv_info self.config = None self.builds_scheduler = builds_scheduler Trigger.__init__( @@ -82,6 +84,20 @@ class BuildTrigger(Trigger): build_props = self.build.getProperties() source = f"nix-eval-lix" + all_deps = dict() + for drv, info in self.drv_info.items(): + all_deps[drv] = set(info.get("inputDrvs").keys()) + def closure_of(key, deps): + r, size = set([key]), 0 + while len(r) != size: + size = len(r) + r.update(*[ deps[k] for k in r ]) + return r.difference([key]) + job_set = set(( drv for drv in ( job.get("drvPath") for job in self.jobs ) if drv )) + all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set } + + build_props.setProperty("sched_state", all_deps, source, True) + triggered_schedulers = [] for job in self.jobs: attr = job.get("attr", "eval-error") @@ -168,12 +184,31 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): if not system or system in self.supported_systems: # report eval errors filtered_jobs.append(job) + drv_show_log: Log = yield self.getLog("stdio") + drv_show_log.addStdout(f"getting derivation infos\n") + cmd = yield self.makeRemoteShellCommand( + stdioLogName=None, + collectStdout=True, + command=( + ["nix", "derivation", "show", "--recursive"] + + [ drv for drv in (job.get("drvPath") for job in filtered_jobs) if drv ] + ), + ) + yield self.runCommand(cmd) + drv_show_log.addStdout(f"done\n") + try: + drv_info = json.loads(cmd.stdout) + except json.JSONDecodeError as e: + msg = f"Failed to parse `nix derivation show` output for {cmd.command}" + raise BuildbotNixError(msg) from e + self.build.addStepsAfterCurrentStep( [ BuildTrigger( builds_scheduler=f"lix-nix-build", name="build flake", jobs=filtered_jobs, + drv_info=drv_info, ), ], ) From 28ca39af258ccd05668ffdbfb4f227bda5f2c145 Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Sun, 10 Mar 2024 21:27:24 +0000 Subject: [PATCH 28/45] WIP: Replace Trigger with custom logic --- buildbot_nix/__init__.py | 153 +++++++++++++++++++++++++++------------ 1 file changed, 106 insertions(+), 47 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 5e28500..3a5b4d3 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -2,6 +2,7 @@ import json import multiprocessing import os import sys +import graphlib from collections import defaultdict from collections.abc import Generator from dataclasses import dataclass @@ -19,6 +20,11 @@ from buildbot.util import asyncSleep from buildbot.www.authz.endpointmatchers import EndpointMatcherBase, Match from buildbot.www.oauth2 import OAuth2Auth from buildbot.changes.gerritchangesource import GerritChangeSource +from buildbot.reporters.utils import getURLForBuild +from buildbot.reporters.utils import getURLForBuildrequest +from buildbot.process.buildstep import CANCELLED +from buildbot.process.buildstep import EXCEPTION +from buildbot.process.buildstep import SUCCESS if TYPE_CHECKING: from buildbot.process.log import Log @@ -49,9 +55,7 @@ class GerritProject: # `project` field. name: str -class BuildTrigger(Trigger): - """Dynamic trigger that creates a build for every attribute.""" - +class BuildTrigger(steps.BuildStep): def __init__( self, builds_scheduler: str, @@ -59,28 +63,64 @@ class BuildTrigger(Trigger): drv_info: dict[str, Any], **kwargs: Any, ) -> None: - if "name" not in kwargs: - kwargs["name"] = "trigger" self.jobs = jobs self.drv_info = drv_info self.config = None self.builds_scheduler = builds_scheduler - Trigger.__init__( - self, - waitForFinish=True, - schedulerNames=[builds_scheduler], - haltOnFailure=True, - flunkOnFailure=True, - sourceStamps=[], - alwaysUseLatest=False, - updateSourceStamp=False, - **kwargs, - ) - def createTriggerProperties(self, props: Any) -> Any: # noqa: N802 - return props + def getSchedulerByName(self, name): + schedulers = self.master.scheduler_manager.namedServices + if name not in schedulers: + raise ValueError(f"unknown triggered scheduler: {repr(name)}") + sch = schedulers[name] + # todo: check ITriggerableScheduler + return sch - def getSchedulersAndProperties(self) -> list[tuple[str, Properties]]: # noqa: N802 + def schedule_one(self, build_props, job): + attr = job.get("attr", "eval-error") + name = attr + name = f"hydraJobs.{name}" + error = job.get("error") + props = Properties() + props.setProperty("virtual_builder_name", name, source) + props.setProperty("status_name", f"nix-build .#hydraJobs.{attr}", source) + props.setProperty("virtual_builder_tags", "", source) + + if error is not None: + props.setProperty("error", error, source) + return (self.builds_scheduler, props) + + drv_path = job.get("drvPath") + system = job.get("system") + out_path = job.get("outputs", {}).get("out") + + build_props.setProperty(f"{attr}-out_path", out_path, source) + build_props.setProperty(f"{attr}-drv_path", drv_path, source) + + props.setProperty("attr", attr, source) + props.setProperty("system", system, source) + props.setProperty("drv_path", drv_path, source) + props.setProperty("out_path", out_path, source) + props.setProperty("isCached", job.get("isCached"), source) + + return (self.builds_scheduler, props) + + @defer.inlineCallbacks + def _add_results(self, brid, results): + @defer.inlineCallbacks + def _is_buildrequest_complete(brid): + buildrequest = yield self.master.db.buildrequests.getBuildRequest(brid) + return buildrequest['complete'] + + event = ('buildrequests', str(brid), 'complete') + yield self.master.mq.waitUntilEvent(event, lambda: _is_buildrequest_complete(brid)) + builds = yield self.master.db.builds.getBuilds(buildrequestid=brid) + for build in builds: + self._result_list.append(build["results"]) + self.updateSummary() + + @defer.inlineCallbacks + def run(self): build_props = self.build.getProperties() source = f"nix-eval-lix" @@ -95,40 +135,59 @@ class BuildTrigger(Trigger): return r.difference([key]) job_set = set(( drv for drv in ( job.get("drvPath") for job in self.jobs ) if drv )) all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set } + builds_to_schedule = list(self.jobs) + build_schedule_order = [] + sorter = graphlib.TopologicalSorter(all_deps) + for item in sorter.static_order(): + i = 0 + while i < builds_to_schedule.len(): + if item == builds_to_schedule[i].get("drvPath"): + build_schedule_order.append(builds_to_schedule[i]) + del builds_to_schedule[i] + else: + i += 1 - build_props.setProperty("sched_state", all_deps, source, True) + done = [] + scheduled = [] + while len(build_schedule_order) > 0 and len(scheduled) > 0: + schedule_now = [] + for build in list(build_schedule_order): + if all_deps.get(build.get("drvPath"), []) == []: + build_schedule_order.remove(build) + schedule_now.append(build) - triggered_schedulers = [] - for job in self.jobs: - attr = job.get("attr", "eval-error") - name = attr - name = f"hydraJobs.{name}" - error = job.get("error") - props = Properties() - props.setProperty("virtual_builder_name", name, source) - props.setProperty("status_name", f"nix-build .#hydraJobs.{attr}", source) - props.setProperty("virtual_builder_tags", "", source) + for job in schedule_now: + (scheduler, props) = self.schedule_one(build_props, job) + scheduler = self.getSchedulerByName(scheduler) - if error is not None: - props.setProperty("error", error, source) - triggered_schedulers.append((self.builds_scheduler, props)) - continue + idsDeferred, resultsDeferred = scheduler.trigger( + waited_for = True, + sourcestamps = ss_for_trigger, + set_props = props, + parent_buildid = self.build.buildid, + parent_relationship = "Triggered from", + ) - drv_path = job.get("drvPath") - system = job.get("system") - out_path = job.get("outputs", {}).get("out") + brids = {} + try: + _, brids = yield idsDeferred + except Exception as e: + yield self.addLogWithException(e) + results = EXCEPTION + scheduled.append((job, brids, resultsDeferred)) - build_props.setProperty(f"{attr}-out_path", out_path, source) - build_props.setProperty(f"{attr}-drv_path", drv_path, source) + for brid in brids.values(): + url = getURLForBuildrequest(self.master, brid) + yield self.addURL(f"{sch.name} #{brid}", url) + self._add_results(brid) - props.setProperty("attr", attr, source) - props.setProperty("system", system, source) - props.setProperty("drv_path", drv_path, source) - props.setProperty("out_path", out_path, source) - props.setProperty("isCached", job.get("isCached"), source) - - triggered_schedulers.append((self.builds_scheduler, props)) - return triggered_schedulers + wait_for_next = defer.DeferredList([results for _, _, results in scheduled], fireOnOneCallback = True, fireOnOneErrback=True) + results, index = yield wait_for_next + job, brids, _ = scheduled[index] + done.append((job, brids, results)) + del scheduled[index] + # TODO: remove dep from all_deps + # TODO: calculate final result def getCurrentSummary(self) -> dict[str, str]: # noqa: N802 """The original build trigger will the generic builder name `nix-build` in this case, which is not helpful""" From 4d73275123fdd18603965c2783104cc21fc21445 Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Sun, 10 Mar 2024 22:55:38 +0000 Subject: [PATCH 29/45] Add build result tracking, schedule newly available builds --- buildbot_nix/__init__.py | 87 +++++++++++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 11 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 3a5b4d3..3b7a394 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -25,6 +25,7 @@ from buildbot.reporters.utils import getURLForBuildrequest from buildbot.process.buildstep import CANCELLED from buildbot.process.buildstep import EXCEPTION from buildbot.process.buildstep import SUCCESS +from buildbot.process.results import worst_status if TYPE_CHECKING: from buildbot.process.log import Log @@ -67,6 +68,27 @@ class BuildTrigger(steps.BuildStep): self.drv_info = drv_info self.config = None self.builds_scheduler = builds_scheduler + self._result_list = [] + self.ended = False + self.waitForFinishDeferred = None + super().__init__(**kwargs) + + def interrupt(self, reason): + # We cancel the buildrequests, as the data api handles + # both cases: + # - build started: stop is sent, + # - build not created yet: related buildrequests are set to CANCELLED. + # Note that there is an identified race condition though (more details + # are available at buildbot.data.buildrequests). + for brid in self.brids: + self.master.data.control( + "cancel", {'reason': 'parent build was interrupted'}, ("buildrequests", brid) + ) + if self.running and not self.ended: + self.ended = True + # if we are interrupted because of a connection lost, we interrupt synchronously + if self.build.conn is None and self.waitForFinishDeferred is not None: + self.waitForFinishDeferred.cancel() def getSchedulerByName(self, name): schedulers = self.master.scheduler_manager.namedServices @@ -77,6 +99,7 @@ class BuildTrigger(steps.BuildStep): return sch def schedule_one(self, build_props, job): + source = f"nix-eval-lix" attr = job.get("attr", "eval-error") name = attr name = f"hydraJobs.{name}" @@ -106,7 +129,7 @@ class BuildTrigger(steps.BuildStep): return (self.builds_scheduler, props) @defer.inlineCallbacks - def _add_results(self, brid, results): + def _add_results(self, brid): @defer.inlineCallbacks def _is_buildrequest_complete(brid): buildrequest = yield self.master.db.buildrequests.getBuildRequest(brid) @@ -119,6 +142,15 @@ class BuildTrigger(steps.BuildStep): self._result_list.append(build["results"]) self.updateSummary() + def prepareSourcestampListForTrigger(self): + ss_for_trigger = {} + objs_from_build = self.build.getAllSourceStamps() + for ss in objs_from_build: + ss_for_trigger[ss.codebase] = ss.asDict() + + trigger_values = [ss_for_trigger[k] for k in sorted(ss_for_trigger.keys())] + return trigger_values + @defer.inlineCallbacks def run(self): build_props = self.build.getProperties() @@ -140,7 +172,7 @@ class BuildTrigger(steps.BuildStep): sorter = graphlib.TopologicalSorter(all_deps) for item in sorter.static_order(): i = 0 - while i < builds_to_schedule.len(): + while i < len(builds_to_schedule): if item == builds_to_schedule[i].get("drvPath"): build_schedule_order.append(builds_to_schedule[i]) del builds_to_schedule[i] @@ -149,14 +181,20 @@ class BuildTrigger(steps.BuildStep): done = [] scheduled = [] - while len(build_schedule_order) > 0 and len(scheduled) > 0: + failed = [] + all_results = SUCCESS + ss_for_trigger = self.prepareSourcestampListForTrigger() + while len(build_schedule_order) > 0 or len(scheduled) > 0: + print('Scheduling..') schedule_now = [] for build in list(build_schedule_order): if all_deps.get(build.get("drvPath"), []) == []: build_schedule_order.remove(build) schedule_now.append(build) - + if len(schedule_now) == 0: + print(' No builds to schedule found.') for job in schedule_now: + print(f" - {job.get('attr')}") (scheduler, props) = self.schedule_one(build_props, job) scheduler = self.getSchedulerByName(scheduler) @@ -178,21 +216,46 @@ class BuildTrigger(steps.BuildStep): for brid in brids.values(): url = getURLForBuildrequest(self.master, brid) - yield self.addURL(f"{sch.name} #{brid}", url) + yield self.addURL(f"{scheduler.name} #{brid}", url) self._add_results(brid) - + print('Waiting..') wait_for_next = defer.DeferredList([results for _, _, results in scheduled], fireOnOneCallback = True, fireOnOneErrback=True) + self.waitForFinishDeferred = wait_for_next results, index = yield wait_for_next job, brids, _ = scheduled[index] done.append((job, brids, results)) del scheduled[index] - # TODO: remove dep from all_deps - # TODO: calculate final result + result = results[0] + print(f' Found finished build {job.get("attr")}, result {util.Results[result].upper()}') + if result != SUCCESS: + failed_checks = [] + failed_paths = [] + removed = [] + while True: + old_paths = list(failed_paths) + print(failed_checks, old_paths) + for build in list(build_schedule_order): + deps = all_deps.get(build.get("drvPath"), []) + for path in old_paths: + if path in deps: + failed_checks.append(build) + failed_paths.append(build.get("drvPath")) + build_schedule_order.remove(build) + removed.append(build.get("attr")) + + break + if old_paths == failed_paths: + break + print(' Removed jobs: ' + ', '.join(removed)) + all_results = worst_status(result, all_results) + print(f' New result: {util.Results[all_results].upper()}') + for dep in all_deps: + if job.get("drvPath") in all_deps[dep]: + all_deps[dep].remove(job.get("drvPath")) + print('Done!') + return all_results def getCurrentSummary(self) -> dict[str, str]: # noqa: N802 - """The original build trigger will the generic builder name `nix-build` in this case, which is not helpful""" - if not self.triggeredNames: - return {"step": "running"} summary = [] if self._result_list: for status in ALL_RESULTS: @@ -742,6 +805,7 @@ class GerritNixConfigurator(ConfiguratorBase): ) config["change_source"] = self.gerrit_change_source + """ config["services"].append( reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, port=2022, @@ -757,6 +821,7 @@ class GerritNixConfigurator(ConfiguratorBase): # summaryArg=self.url) ) + """ systemd_secrets = secrets.SecretInAFile( dirname=os.environ["CREDENTIALS_DIRECTORY"], From 9a15348984a63e965a5b931ce7e51d9f0751c9d6 Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Sun, 10 Mar 2024 23:09:27 +0000 Subject: [PATCH 30/45] Fix up a few loose ends --- buildbot_nix/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 3b7a394..5552b91 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -153,6 +153,7 @@ class BuildTrigger(steps.BuildStep): @defer.inlineCallbacks def run(self): + self.running = True build_props = self.build.getProperties() source = f"nix-eval-lix" @@ -184,7 +185,7 @@ class BuildTrigger(steps.BuildStep): failed = [] all_results = SUCCESS ss_for_trigger = self.prepareSourcestampListForTrigger() - while len(build_schedule_order) > 0 or len(scheduled) > 0: + while not self.ended and (len(build_schedule_order) > 0 or len(scheduled) > 0): print('Scheduling..') schedule_now = [] for build in list(build_schedule_order): @@ -229,11 +230,10 @@ class BuildTrigger(steps.BuildStep): print(f' Found finished build {job.get("attr")}, result {util.Results[result].upper()}') if result != SUCCESS: failed_checks = [] - failed_paths = [] + failed_paths = [job.get('drvPath')] removed = [] while True: old_paths = list(failed_paths) - print(failed_checks, old_paths) for build in list(build_schedule_order): deps = all_deps.get(build.get("drvPath"), []) for path in old_paths: @@ -253,6 +253,8 @@ class BuildTrigger(steps.BuildStep): if job.get("drvPath") in all_deps[dep]: all_deps[dep].remove(job.get("drvPath")) print('Done!') + if self.ended: + return util.CANCELLED return all_results def getCurrentSummary(self) -> dict[str, str]: # noqa: N802 From 29a2ef63e23312af0c13ee1c562b0aeec198c1fd Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Mon, 11 Mar 2024 09:05:28 +0100 Subject: [PATCH 31/45] show hydra job count in trigger step previously we immediately triggered all jobs, now we no longer do. showing the total count at least somewhere is nice to have a rough indication of how much longer a build may still need to run. --- buildbot_nix/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 5552b91..456bf65 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -71,6 +71,7 @@ class BuildTrigger(steps.BuildStep): self._result_list = [] self.ended = False self.waitForFinishDeferred = None + self.description = f"building {len(jobs)} hydra jobs" super().__init__(**kwargs) def interrupt(self, reason): From 9933971ab0384f7e8baaa9460c9db9614ee16d6d Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Mon, 11 Mar 2024 09:06:29 +0100 Subject: [PATCH 32/45] re-enable the gerrit status reporter --- buildbot_nix/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 456bf65..19789a2 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -808,7 +808,6 @@ class GerritNixConfigurator(ConfiguratorBase): ) config["change_source"] = self.gerrit_change_source - """ config["services"].append( reporters.GerritStatusPush(self.gerrit_server, self.gerrit_user, port=2022, @@ -824,7 +823,6 @@ class GerritNixConfigurator(ConfiguratorBase): # summaryArg=self.url) ) - """ systemd_secrets = secrets.SecretInAFile( dirname=os.environ["CREDENTIALS_DIRECTORY"], From 13a67b483a2fe474409a1f01840f4172e5f63a07 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Mon, 11 Mar 2024 13:05:12 +0100 Subject: [PATCH 33/45] fix interrupt() can't interrupt with things to interrupt. this is technically duplicated information but keeping parts of the code close to Trigger seems useful. --- buildbot_nix/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 19789a2..84f1b2e 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -71,6 +71,7 @@ class BuildTrigger(steps.BuildStep): self._result_list = [] self.ended = False self.waitForFinishDeferred = None + self.brids = [] self.description = f"building {len(jobs)} hydra jobs" super().__init__(**kwargs) @@ -220,6 +221,7 @@ class BuildTrigger(steps.BuildStep): url = getURLForBuildrequest(self.master, brid) yield self.addURL(f"{scheduler.name} #{brid}", url) self._add_results(brid) + self.brids.append(brid) print('Waiting..') wait_for_next = defer.DeferredList([results for _, _, results in scheduled], fireOnOneCallback = True, fireOnOneErrback=True) self.waitForFinishDeferred = wait_for_next From 51f7b52149a8a2c8e2345d7a6a7ed06d97ffe01f Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Mon, 11 Mar 2024 13:07:35 +0100 Subject: [PATCH 34/45] pre-filter drv_info into all_deps otherwise failure reporting is *enormous* with the entirety of a full derivation info dump in there --- buildbot_nix/__init__.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 84f1b2e..90dd947 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -61,11 +61,11 @@ class BuildTrigger(steps.BuildStep): self, builds_scheduler: str, jobs: list[dict[str, Any]], - drv_info: dict[str, Any], + all_deps: dict[str, Any], **kwargs: Any, ) -> None: self.jobs = jobs - self.drv_info = drv_info + self.all_deps = all_deps self.config = None self.builds_scheduler = builds_scheduler self._result_list = [] @@ -159,20 +159,9 @@ class BuildTrigger(steps.BuildStep): build_props = self.build.getProperties() source = f"nix-eval-lix" - all_deps = dict() - for drv, info in self.drv_info.items(): - all_deps[drv] = set(info.get("inputDrvs").keys()) - def closure_of(key, deps): - r, size = set([key]), 0 - while len(r) != size: - size = len(r) - r.update(*[ deps[k] for k in r ]) - return r.difference([key]) - job_set = set(( drv for drv in ( job.get("drvPath") for job in self.jobs ) if drv )) - all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set } builds_to_schedule = list(self.jobs) build_schedule_order = [] - sorter = graphlib.TopologicalSorter(all_deps) + sorter = graphlib.TopologicalSorter(self.all_deps) for item in sorter.static_order(): i = 0 while i < len(builds_to_schedule): @@ -191,7 +180,7 @@ class BuildTrigger(steps.BuildStep): print('Scheduling..') schedule_now = [] for build in list(build_schedule_order): - if all_deps.get(build.get("drvPath"), []) == []: + if self.all_deps.get(build.get("drvPath"), []) == []: build_schedule_order.remove(build) schedule_now.append(build) if len(schedule_now) == 0: @@ -238,7 +227,7 @@ class BuildTrigger(steps.BuildStep): while True: old_paths = list(failed_paths) for build in list(build_schedule_order): - deps = all_deps.get(build.get("drvPath"), []) + deps = self.all_deps.get(build.get("drvPath"), []) for path in old_paths: if path in deps: failed_checks.append(build) @@ -252,9 +241,9 @@ class BuildTrigger(steps.BuildStep): print(' Removed jobs: ' + ', '.join(removed)) all_results = worst_status(result, all_results) print(f' New result: {util.Results[all_results].upper()}') - for dep in all_deps: - if job.get("drvPath") in all_deps[dep]: - all_deps[dep].remove(job.get("drvPath")) + for dep in self.all_deps: + if job.get("drvPath") in self.all_deps[dep]: + self.all_deps[dep].remove(job.get("drvPath")) print('Done!') if self.ended: return util.CANCELLED @@ -328,6 +317,17 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): except json.JSONDecodeError as e: msg = f"Failed to parse `nix derivation show` output for {cmd.command}" raise BuildbotNixError(msg) from e + all_deps = dict() + for drv, info in drv_info.items(): + all_deps[drv] = set(info.get("inputDrvs").keys()) + def closure_of(key, deps): + r, size = set([key]), 0 + while len(r) != size: + size = len(r) + r.update(*[ deps[k] for k in r ]) + return r.difference([key]) + job_set = set(( drv for drv in ( job.get("drvPath") for job in filtered_jobs ) if drv )) + all_deps = { k: list(closure_of(k, all_deps).intersection(job_set)) for k in job_set } self.build.addStepsAfterCurrentStep( [ @@ -335,7 +335,7 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): builds_scheduler=f"lix-nix-build", name="build flake", jobs=filtered_jobs, - drv_info=drv_info, + all_deps=all_deps, ), ], ) From 5cdef7efb6967575a655b558288e059a3a638f6d Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Mon, 11 Mar 2024 14:44:09 +0100 Subject: [PATCH 35/45] fix status reporting to gerrit also adjust labels from split verified to single verified, split labels were only useful during the pre-ci hours --- buildbot_nix/__init__.py | 70 +++++++++++----------------------------- 1 file changed, 18 insertions(+), 52 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 90dd947..1fe77d8 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -173,7 +173,7 @@ class BuildTrigger(steps.BuildStep): done = [] scheduled = [] - failed = [] + failed = {} all_results = SUCCESS ss_for_trigger = self.prepareSourcestampListForTrigger() while not self.ended and (len(build_schedule_order) > 0 or len(scheduled) > 0): @@ -234,10 +234,15 @@ class BuildTrigger(steps.BuildStep): failed_paths.append(build.get("drvPath")) build_schedule_order.remove(build) removed.append(build.get("attr")) + failed[build.get("attr")] = (f"dependency {job.get('attr')} failed", []) break if old_paths == failed_paths: break + failed[job.get("attr")] = ( + "failed", + [ getURLForBuildrequest(self.master, brid) for brid in brids.values() ] + ) print(' Removed jobs: ' + ', '.join(removed)) all_results = worst_status(result, all_results) print(f' New result: {util.Results[all_results].upper()}') @@ -245,6 +250,7 @@ class BuildTrigger(steps.BuildStep): if job.get("drvPath") in self.all_deps[dep]: self.all_deps[dep].remove(job.get("drvPath")) print('Done!') + build_props.setProperty("failed_builds", failed, "nix-eval-lix") if self.ended: return util.CANCELLED return all_results @@ -647,62 +653,22 @@ def gerritReviewCB(builderName, build, result, master, arg): if builderName != 'lix/nix-eval': return dict() - all_checks = {} - for step in build['steps']: - if step['name'] != 'build flake': - continue + failed = build['properties'].get('failed_builds', [{}])[0] - for url in step['urls']: - if url['name'].startswith('success: hydraJobs.'): - path = url['name'].split(' ')[1] - all_checks[path] = (True, url['url']) - elif url['name'].startswith('failure: hydraJobs.'): - path = url['name'].split(' ')[1] - all_checks[path] = (False, url['url']) - - collected_oses = {} - for check in all_checks: - arch = check.split('.')[-1] - if not arch.endswith('-linux') and not arch.endswith('-darwin'): - # Not an architecture-specific job, just a test - os = "test" - else: - os = arch.split('-')[1] - (success, failure) = collected_oses.get(os, (0, 0)) - if all_checks[check][0]: - success += 1 - else: - failure += 1 - - collected_oses[os] = (success, failure) - labels = {} - - if 'linux' in collected_oses: - (success, failure) = collected_oses['linux'] - if success > 0 and failure == 0: - labels['Verified-On-Linux'] = 1 - elif failure > 0: - labels['Verified-On-Linux'] = -1 - - if 'darwin' in collected_oses: - (success, failure) = collected_oses['darwin'] - if success > 0 and failure == 0: - labels['Verified-On-Darwin'] = 1 - elif failure > 0: - labels['Verified-On-Darwin'] = -1 + labels = { + 'Verified': -1 if failed else 1, + } message = "Buildbot finished compiling your patchset!\n" message += "The result is: %s\n" % util.Results[result].upper() if result != util.SUCCESS: - successful_checks = [] - failed_checks = [] - for check in all_checks: - if not all_checks[check][0]: - failed_checks.append(f" - {check} (see {all_checks[check][1]})") - - if len(failed_checks) > 0: - message += "Failed checks:\n" + "\n".join(failed_checks) + "\n" - + message += "\nFailed checks:\n" + for check, context in sorted(failed.items()): + how, urls = context + message += f" - {check}: {how}" + if urls: + message += f" (see {', '.join(urls)})" + message += "\n" if arg: message += "\nFor more details visit:\n" From e9b3b38bbfe3f9d43c0de7baa436a9f1b92e7c82 Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Mon, 11 Mar 2024 15:05:15 +0000 Subject: [PATCH 36/45] Skip scheduling cached builds; improve reporter message --- buildbot_nix/__init__.py | 49 +++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 1fe77d8..e90bcb2 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -158,6 +158,7 @@ class BuildTrigger(steps.BuildStep): self.running = True build_props = self.build.getProperties() source = f"nix-eval-lix" + logs: Log = yield self.addLog("build info") builds_to_schedule = list(self.jobs) build_schedule_order = [] @@ -173,20 +174,23 @@ class BuildTrigger(steps.BuildStep): done = [] scheduled = [] - failed = {} + failed = [] all_results = SUCCESS ss_for_trigger = self.prepareSourcestampListForTrigger() while not self.ended and (len(build_schedule_order) > 0 or len(scheduled) > 0): - print('Scheduling..') schedule_now = [] for build in list(build_schedule_order): if self.all_deps.get(build.get("drvPath"), []) == []: build_schedule_order.remove(build) schedule_now.append(build) - if len(schedule_now) == 0: - print(' No builds to schedule found.') for job in schedule_now: - print(f" - {job.get('attr')}") + if job.get('isCached'): + logs.addStdout(f"Cached {job.get('attr')} ({job.get('drvPath')}) - skipping\n") + for dep in self.all_deps: + if job.get("drvPath") in self.all_deps[dep]: + self.all_deps[dep].remove(job.get("drvPath")) + continue + logs.addStdout(f"Scheduling {job.get('attr')} ({job.get('drvPath')})\n") (scheduler, props) = self.schedule_one(build_props, job) scheduler = self.getSchedulerByName(scheduler) @@ -211,7 +215,11 @@ class BuildTrigger(steps.BuildStep): yield self.addURL(f"{scheduler.name} #{brid}", url) self._add_results(brid) self.brids.append(brid) - print('Waiting..') + if len(scheduled) == 0: + if len(build_schedule_order) == 0: + logs.addStderr('Ran out of builds\n') + break + continue wait_for_next = defer.DeferredList([results for _, _, results in scheduled], fireOnOneCallback = True, fireOnOneErrback=True) self.waitForFinishDeferred = wait_for_next results, index = yield wait_for_next @@ -219,11 +227,16 @@ class BuildTrigger(steps.BuildStep): done.append((job, brids, results)) del scheduled[index] result = results[0] - print(f' Found finished build {job.get("attr")}, result {util.Results[result].upper()}') + logs.addStdout(f'Build {job.get("attr")} ({job.get("drvPath")}) finished, result {util.Results[result].upper()}\n') if result != SUCCESS: failed_checks = [] failed_paths = [job.get('drvPath')] removed = [] + failed.append(( + job.get("attr"), + "failed", + [ getURLForBuildrequest(self.master, brid) for brid in brids.values() ] + )) while True: old_paths = list(failed_paths) for build in list(build_schedule_order): @@ -234,22 +247,21 @@ class BuildTrigger(steps.BuildStep): failed_paths.append(build.get("drvPath")) build_schedule_order.remove(build) removed.append(build.get("attr")) - failed[build.get("attr")] = (f"dependency {job.get('attr')} failed", []) + failed.append((build.get("attr"), f"dependency {job.get('attr')} failed", [])) break if old_paths == failed_paths: break - failed[job.get("attr")] = ( - "failed", - [ getURLForBuildrequest(self.master, brid) for brid in brids.values() ] - ) - print(' Removed jobs: ' + ', '.join(removed)) + if len(removed) > 3: + yield logs.addStdout(' Skipping jobs: ' + ', '.join(removed[:3]) + f', ... ({len(removed) - 3} more)\n') + else: + yield logs.addStdout(' Skipping jobs: ' + ', '.join(removed) + '\n') all_results = worst_status(result, all_results) - print(f' New result: {util.Results[all_results].upper()}') for dep in self.all_deps: if job.get("drvPath") in self.all_deps[dep]: self.all_deps[dep].remove(job.get("drvPath")) - print('Done!') + yield logs.addHeader('Done!\n') + yield logs.finish() build_props.setProperty("failed_builds", failed, "nix-eval-lix") if self.ended: return util.CANCELLED @@ -653,7 +665,7 @@ def gerritReviewCB(builderName, build, result, master, arg): if builderName != 'lix/nix-eval': return dict() - failed = build['properties'].get('failed_builds', [{}])[0] + failed = build['properties'].get('failed_builds', [[]])[0] labels = { 'Verified': -1 if failed else 1, @@ -663,8 +675,9 @@ def gerritReviewCB(builderName, build, result, master, arg): message += "The result is: %s\n" % util.Results[result].upper() if result != util.SUCCESS: message += "\nFailed checks:\n" - for check, context in sorted(failed.items()): - how, urls = context + for check, how, urls in failed: + if not urls: + message += " " message += f" - {check}: {how}" if urls: message += f" (see {', '.join(urls)})" From 6118daa0a43eaf97670021429eb31ae9af547754 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Mon, 11 Mar 2024 22:32:06 +0100 Subject: [PATCH 37/45] feat: binary cache Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 51 ++++++++++++++++++++++++++++++++++++ buildbot_nix/binary_cache.py | 12 +++++++++ nix/coordinator.nix | 46 ++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 buildbot_nix/binary_cache.py diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index e90bcb2..a3e3b7c 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -26,6 +26,7 @@ from buildbot.process.buildstep import CANCELLED from buildbot.process.buildstep import EXCEPTION from buildbot.process.buildstep import SUCCESS from buildbot.process.results import worst_status +from buildbot_nix.binary_cache import LocalSigner if TYPE_CHECKING: from buildbot.process.log import Log @@ -34,6 +35,8 @@ from twisted.internet import defer, threads from twisted.logger import Logger from twisted.python.failure import Failure +from .binary_cache import S3BinaryCacheConfig + from .github_projects import ( slugify_project_name, ) @@ -498,6 +501,8 @@ def nix_build_config( project: GerritProject, worker_names: list[str], outputs_path: Path | None = None, + signing_keyfile: str | None = None, + binary_cache_config: S3BinaryCacheConfig | None = None ) -> util.BuilderConfig: """Builds one nix flake attribute.""" factory = util.BuildFactory() @@ -528,6 +533,39 @@ def nix_build_config( ), ) + if signing_keyfile is not None: + factory.addStep( + steps.ShellCommand( + name="Sign the store path", + command=[ + "nix", + "store", + "sign", + "--keyfile", + signing_keyfile, + util.Interpolate( + "%(prop:drv_path)s^*" + ) + ] + ), + ) + + if binary_cache_config is not None: + factory.addStep( + steps.ShellCommand( + name="Upload the store path to the cache", + command=[ + "nix", + "copy", + "--to", + f"s3://{binary_cache_config.bucket}?profile={binary_cache_config.profile}®ion={binary_cache_config.region}&endpoint={binary_cache_config.endpoint}", + util.Property( + "out_path" + ) + ] + ) + ) + factory.addStep( steps.ShellCommand( name="Register gcroot", @@ -586,6 +624,8 @@ def config_for_project( nix_eval_max_memory_size: int, eval_lock: util.MasterLock, outputs_path: Path | None = None, + signing_keyfile: str | None = None, + binary_cache_config: S3BinaryCacheConfig | None = None ) -> Project: config["projects"].append(Project(project.name)) config["schedulers"].extend( @@ -642,6 +682,8 @@ def config_for_project( project, worker_names, outputs_path=outputs_path, + signing_keyfile=signing_keyfile, + binary_cache_config=binary_cache_config ), ], ) @@ -742,6 +784,8 @@ class GerritNixConfigurator(ConfiguratorBase): nix_eval_worker_count: int | None, nix_eval_max_memory_size: int, nix_workers_secret_name: str = "buildbot-nix-workers", # noqa: S107 + signing_keyfile: str | None = None, + binary_cache_config: dict[str, str] | None = None, outputs_path: str | None = None, ) -> None: super().__init__() @@ -754,6 +798,11 @@ class GerritNixConfigurator(ConfiguratorBase): self.nix_supported_systems = nix_supported_systems self.gerrit_change_source = GerritChangeSource(gerrit_server, gerrit_user, gerritport=gerrit_port, identity_file=gerrit_sshkey_path) self.url = url + if binary_cache_config is not None: + self.binary_cache_config = S3BinaryCacheConfig(**binary_cache_config) + else: + self.binary_cache_config = None + self.signing_keyfile = signing_keyfile if outputs_path is None: self.outputs_path = None else: @@ -786,6 +835,8 @@ class GerritNixConfigurator(ConfiguratorBase): self.nix_eval_max_memory_size, eval_lock, self.outputs_path, + signing_keyfile=self.signing_keyfile, + binary_cache_config=self.binary_cache_config ) config["change_source"] = self.gerrit_change_source diff --git a/buildbot_nix/binary_cache.py b/buildbot_nix/binary_cache.py new file mode 100644 index 0000000..8315e21 --- /dev/null +++ b/buildbot_nix/binary_cache.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass + +@dataclass +class S3BinaryCacheConfig: + region: str + bucket: str + endpoint: str + profile: str + +class LocalSigner: + def __init__(self, keyfile: str): + self.keyfile = keyfile diff --git a/nix/coordinator.nix b/nix/coordinator.nix index 797d339..5e08d07 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -57,6 +57,34 @@ in default = null; example = "/var/www/buildbot/nix-outputs"; }; + + binaryCache = { + enable = lib.mkEnableOption " binary cache upload to a S3 bucket"; + profileCredentialsFile = lib.mkOption { + type = lib.types.nullOr lib.types.path; + description = "A path to the various AWS profile credentials related to the S3 bucket containing a profile named `default`"; + default = null; + example = "/run/agenix.d/aws-profile"; + }; + bucket = lib.mkOption { + type = lib.types.nullOr lib.types.str; + description = "Bucket where to store the data"; + default = null; + example = "lix-cache"; + }; + endpoint = lib.mkOption { + type = lib.types.nullOr lib.types.str; + description = "Endpoint for the S3 server"; + default = null; + example = "s3.lix.systems"; + }; + region = lib.mkOption { + type = lib.types.nullOr lib.types.str; + description = "Region for the S3 bucket"; + default = null; + example = "garage"; + }; + }; }; }; config = lib.mkIf cfg.enable { @@ -97,6 +125,10 @@ in nix_eval_worker_count=${if cfg.evalWorkerCount == null then "None" else builtins.toString cfg.evalWorkerCount}, nix_supported_systems=${builtins.toJSON cfg.buildSystems}, outputs_path=${if cfg.outputsPath == null then "None" else builtins.toJSON cfg.outputsPath}, + binary_cache_config=${if (!cfg.binaryCache.enable) then "None" else builtins.toJSON { + inherit (cfg.binaryCache) bucket region endpoint; + profile = "default"; + }} ) '' ]; @@ -117,6 +149,20 @@ in ]; }; + # TODO(raito): we assume worker runs on coordinator. please clean up this later. + systemd.services.buildbot-worker.serviceConfig.Environment = + let + awsConfigFile = pkgs.writeText "config.ini" '' + [default] + region = ${cfg.binaryCache.region} + endpoint_url = ${cfg.binaryCache.endpoint} + ''; + in + [ + "AWS_CONFIG_FILE=${awsConfigFile}" + "AWS_SHARED_CREDENTIALS_FILE=${cfg.binaryCache.profileCredentialsFile}" + ]; + systemd.services.buildbot-master = { after = [ "postgresql.service" ]; serviceConfig = { From 8d36ac1d90ac0288f5aac38c5630ec344843be05 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Mon, 11 Mar 2024 23:20:58 +0100 Subject: [PATCH 38/45] feat: signing key Signed-off-by: Raito Bezarius --- buildbot_nix/__init__.py | 2 +- nix/coordinator.nix | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index a3e3b7c..93007be 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -541,7 +541,7 @@ def nix_build_config( "nix", "store", "sign", - "--keyfile", + "--key-file", signing_keyfile, util.Interpolate( "%(prop:drv_path)s^*" diff --git a/nix/coordinator.nix b/nix/coordinator.nix index 5e08d07..9863dc6 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -58,6 +58,13 @@ in example = "/var/www/buildbot/nix-outputs"; }; + signingKeyFile = lib.mkOption { + type = lib.types.nullOr lib.types.path; + description = "A path to a Nix signing key"; + default = null; + example = "/run/agenix.d/signing-key"; + }; + binaryCache = { enable = lib.mkEnableOption " binary cache upload to a S3 bucket"; profileCredentialsFile = lib.mkOption { @@ -125,6 +132,8 @@ in nix_eval_worker_count=${if cfg.evalWorkerCount == null then "None" else builtins.toString cfg.evalWorkerCount}, nix_supported_systems=${builtins.toJSON cfg.buildSystems}, outputs_path=${if cfg.outputsPath == null then "None" else builtins.toJSON cfg.outputsPath}, + # Signing key file must be available on the workers and readable. + signing_keyfile=${if cfg.signingKeyFile == null then "None" else builtins.toJSON cfg.signingKeyFile}, binary_cache_config=${if (!cfg.binaryCache.enable) then "None" else builtins.toJSON { inherit (cfg.binaryCache) bucket region endpoint; profile = "default"; From a9ce436201b59de578cb39e359fd458a9846c733 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Fri, 15 Mar 2024 13:17:25 +0100 Subject: [PATCH 39/45] fix system builds with binary cache disabled --- nix/coordinator.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nix/coordinator.nix b/nix/coordinator.nix index 9863dc6..8cf3f4e 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -160,6 +160,7 @@ in # TODO(raito): we assume worker runs on coordinator. please clean up this later. systemd.services.buildbot-worker.serviceConfig.Environment = + lib.mkIf cfg.binaryCache.enable ( let awsConfigFile = pkgs.writeText "config.ini" '' [default] @@ -170,7 +171,8 @@ in [ "AWS_CONFIG_FILE=${awsConfigFile}" "AWS_SHARED_CREDENTIALS_FILE=${cfg.binaryCache.profileCredentialsFile}" - ]; + ] + ); systemd.services.buildbot-master = { after = [ "postgresql.service" ]; From 5e50a858d7e63d05ae11e9684965240d754a9302 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Fri, 15 Mar 2024 14:40:23 +0100 Subject: [PATCH 40/45] revert to stable web ui the react-based ui is too slow for our needs, janky, the log viewer doesn't work quite right (breaking after ~600 lines of logs viewed), loses updates to sub-builds, and just blanks its entire screen when a build finishes. the old ui doesn't do that. --- buildbot_nix/__init__.py | 1 - nix/coordinator.nix | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 93007be..579ff9d 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -862,7 +862,6 @@ class GerritNixConfigurator(ConfiguratorBase): config["secretsProviders"].append(systemd_secrets) config["www"].setdefault("plugins", {}) - config["www"]["plugins"].update(dict(base_react={})) if "auth" not in config["www"]: config["www"]["auth"] = LixSystemsOAuth2('buildbot', read_secret_file('buildbot-oauth2-secret'), autologin=True) diff --git a/nix/coordinator.nix b/nix/coordinator.nix index 8cf3f4e..3ed2824 100644 --- a/nix/coordinator.nix +++ b/nix/coordinator.nix @@ -153,7 +153,7 @@ in ps.treq ps.psycopg2 (ps.toPythonModule pkgs.buildbot-worker) - pkgs.buildbot-plugins.www-react + pkgs.buildbot-plugins.www (pkgs.python3.pkgs.callPackage ../default.nix { }) ]; }; From d394f35f550f18a1dfe2c24b5f6d7405a65d2a23 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Fri, 15 Mar 2024 14:47:49 +0100 Subject: [PATCH 41/45] use one scheduler and worker set per arch and an additional set for generic tasks like error reporting. this prevents hol blocking for underutilized arches when at least one arch is blocking, as usually happens to us with aarch64-linux. --- buildbot_nix/__init__.py | 47 ++++++++++++++++++++++++---------------- buildbot_nix/worker.py | 11 +++++++--- nix/worker.nix | 9 +++++++- 3 files changed, 44 insertions(+), 23 deletions(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 579ff9d..3e26e6f 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -62,7 +62,7 @@ class GerritProject: class BuildTrigger(steps.BuildStep): def __init__( self, - builds_scheduler: str, + builds_scheduler_group: str, jobs: list[dict[str, Any]], all_deps: dict[str, Any], **kwargs: Any, @@ -70,7 +70,7 @@ class BuildTrigger(steps.BuildStep): self.jobs = jobs self.all_deps = all_deps self.config = None - self.builds_scheduler = builds_scheduler + self.builds_scheduler_group = builds_scheduler_group self._result_list = [] self.ended = False self.waitForFinishDeferred = None @@ -116,7 +116,7 @@ class BuildTrigger(steps.BuildStep): if error is not None: props.setProperty("error", error, source) - return (self.builds_scheduler, props) + return (f"{self.builds_scheduler_group}-other", props) drv_path = job.get("drvPath") system = job.get("system") @@ -131,7 +131,7 @@ class BuildTrigger(steps.BuildStep): props.setProperty("out_path", out_path, source) props.setProperty("isCached", job.get("isCached"), source) - return (self.builds_scheduler, props) + return (f"{self.builds_scheduler_group}-{system}", props) @defer.inlineCallbacks def _add_results(self, brid): @@ -353,7 +353,7 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep): self.build.addStepsAfterCurrentStep( [ BuildTrigger( - builds_scheduler=f"lix-nix-build", + builds_scheduler_group=f"lix-nix-build", name="build flake", jobs=filtered_jobs, all_deps=all_deps, @@ -499,6 +499,7 @@ def nix_eval_config( def nix_build_config( project: GerritProject, + worker_arch: str, worker_names: list[str], outputs_path: Path | None = None, signing_keyfile: str | None = None, @@ -597,7 +598,7 @@ def nix_build_config( ), ) return util.BuilderConfig( - name=f"{project.name}/nix-build", + name=f"{project.name}/nix-build/{worker_arch}", project=project.name, workernames=worker_names, collapseRequests=False, @@ -640,9 +641,12 @@ def config_for_project( builderNames=[f"{project.name}/nix-eval"], ), # this is triggered from `nix-eval` - schedulers.Triggerable( - name=f"{project.name}-nix-build", - builderNames=[f"{project.name}/nix-build"], + *( + schedulers.Triggerable( + name=f"{project.name}-nix-build-{arch}", + builderNames=[f"{project.name}/nix-build/{arch}"], + ) + for arch in nix_supported_systems + [ "other" ] ), # allow to manually trigger a nix-build schedulers.ForceScheduler( @@ -672,18 +676,22 @@ def config_for_project( nix_eval_config( project, gerrit_private_key, - worker_names, + [ f"{w}-other" for w in worker_names ], supported_systems=nix_supported_systems, worker_count=nix_eval_worker_count, max_memory_size=nix_eval_max_memory_size, eval_lock=eval_lock, ), - nix_build_config( - project, - worker_names, - outputs_path=outputs_path, - signing_keyfile=signing_keyfile, - binary_cache_config=binary_cache_config + *( + nix_build_config( + project, + arch, + [ f"{w}-{arch}" for w in worker_names ], + outputs_path=outputs_path, + signing_keyfile=signing_keyfile, + binary_cache_config=binary_cache_config + ) + for arch in nix_supported_systems + [ "other" ] ), ], ) @@ -819,9 +827,10 @@ class GerritNixConfigurator(ConfiguratorBase): for item in worker_config: cores = item.get("cores", 0) for i in range(cores): - worker_name = f"{item['name']}-{i:03}" - config["workers"].append(worker.Worker(worker_name, item["pass"])) - worker_names.append(worker_name) + for arch in self.nix_supported_systems + ["other"]: + worker_name = f"{item['name']}-{i:03}" + config["workers"].append(worker.Worker(f"{worker_name}-{arch}", item["pass"])) + worker_names.append(worker_name) eval_lock = util.MasterLock("nix-eval") diff --git a/buildbot_nix/worker.py b/buildbot_nix/worker.py index 3f7139d..0018915 100644 --- a/buildbot_nix/worker.py +++ b/buildbot_nix/worker.py @@ -25,6 +25,9 @@ class WorkerConfig: worker_count: int = int( os.environ.get("WORKER_COUNT", str(multiprocessing.cpu_count())), ) + worker_arch_list: list[str] = field( + default_factory=lambda: os.environ.get("WORKER_ARCH_LIST", "").split(",") + ["other"], + ) buildbot_dir: Path = field( default_factory=lambda: Path(require_env("BUILDBOT_DIR")) ) @@ -34,13 +37,14 @@ class WorkerConfig: def setup_worker( application: components.Componentized, builder_id: int, + arch: str, config: WorkerConfig, ) -> None: - basedir = config.buildbot_dir.parent / f"{config.buildbot_dir.name}-{builder_id:03}" + basedir = config.buildbot_dir.parent / f"{config.buildbot_dir.name}-{builder_id:03}/{arch}" basedir.mkdir(parents=True, exist_ok=True, mode=0o700) hostname = socket.gethostname() - workername = f"{hostname}-{builder_id:03}" + workername = f"{hostname}-{builder_id:03}-{arch}" keepalive = 600 umask = None maxdelay = 300 @@ -67,7 +71,8 @@ def setup_worker( def setup_workers(application: components.Componentized, config: WorkerConfig) -> None: for i in range(config.worker_count): - setup_worker(application, i, config) + for arch in config.worker_arch_list: + setup_worker(application, i, arch, config) # note: this line is matched against to check that this is a worker diff --git a/nix/worker.nix b/nix/worker.nix index 28afdff..bbbfd81 100644 --- a/nix/worker.nix +++ b/nix/worker.nix @@ -28,6 +28,10 @@ in type = lib.types.path; description = "The buildbot worker password file."; }; + workerArchList = lib.mkOption { + type = lib.types.listOf lib.types.str; + description = "Nix `system`s the worker should feel responsible for."; + }; }; }; config = lib.mkIf cfg.enable { @@ -69,7 +73,10 @@ in OOMPolicy = "continue"; LoadCredential = [ "worker-password-file:${cfg.workerPasswordFile}" ]; - Environment = [ "WORKER_PASSWORD_FILE=%d/worker-password-file" ]; + Environment = [ + "WORKER_PASSWORD_FILE=%d/worker-password-file" + "WORKER_ARCH_LIST=${lib.concatStringsSep "," cfg.workerArchList}" + ]; Type = "simple"; User = "buildbot-worker"; Group = "buildbot-worker"; From 2eaee8f62b3b6d53cc6f062e2e4d4514e2dc6b82 Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Mon, 18 Mar 2024 00:07:34 +0000 Subject: [PATCH 42/45] Fix marking jobs as successful if they never finish evaluating. --- buildbot_nix/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 3e26e6f..9a5a6f7 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -718,7 +718,7 @@ def gerritReviewCB(builderName, build, result, master, arg): failed = build['properties'].get('failed_builds', [[]])[0] labels = { - 'Verified': -1 if failed else 1, + 'Verified': -1 if result != util.SUCCESS else 1, } message = "Buildbot finished compiling your patchset!\n" From 3717bfab048a5aa1fc8be9a59006c198a9224fb6 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Thu, 28 Mar 2024 03:52:13 +0100 Subject: [PATCH 43/45] automatically cancel outdated builds --- buildbot_nix/__init__.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 9a5a6f7..335ece7 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -865,6 +865,30 @@ class GerritNixConfigurator(ConfiguratorBase): ) + def gerritBranchKey(b): + ref = b['branch'] + if not ref.startswith('refs/changes/'): + return ref + return ref.rsplit('/', 1)[0] + config["services"].append( + util.OldBuildCanceller( + "lix_build_canceller", + filters=[ + ( + [ + f"lix/nix-{kind}" + for kind in [ "eval" ] + [ + f"build/{arch}" + for arch in self.nix_supported_systems + [ "other" ] + ] + ], + util.SourceStampFilter(project_eq=["lix"]) + ) + ], + branch_key=gerritBranchKey + ) + ) + systemd_secrets = secrets.SecretInAFile( dirname=os.environ["CREDENTIALS_DIRECTORY"], ) From daa84f4169ce2a4e9a31d5c75aa53e21d9ce8b79 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Fri, 5 Apr 2024 14:12:15 +0200 Subject: [PATCH 44/45] never build on the coordinator for such cases just add the coordinator as a remote builder. --- buildbot_nix/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index 335ece7..da676a1 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -518,6 +518,8 @@ def nix_build_config( "--option", "keep-going", "true", + # do not build directly on the coordinator + "--max-jobs", "0", "--option", # stop stuck builds after 20 minutes "--max-silent-time", From 131fc792f765a367dfe7fa155f35fc425623e20f Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Fri, 5 Apr 2024 15:13:11 +0200 Subject: [PATCH 45/45] allow worker counts to be set per arch --- buildbot_nix/worker.py | 17 ++++++++++------- nix/worker.nix | 8 +++++--- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/buildbot_nix/worker.py b/buildbot_nix/worker.py index 0018915..17298a8 100644 --- a/buildbot_nix/worker.py +++ b/buildbot_nix/worker.py @@ -22,11 +22,14 @@ class WorkerConfig: .read_text() .rstrip("\r\n") ) - worker_count: int = int( - os.environ.get("WORKER_COUNT", str(multiprocessing.cpu_count())), - ) - worker_arch_list: list[str] = field( - default_factory=lambda: os.environ.get("WORKER_ARCH_LIST", "").split(",") + ["other"], + worker_arch_list: dict[str, int] = field( + default_factory=lambda: dict(other=1) | { + arch: int(count) + for arch, count in ( + e.split("=") + for e in os.environ.get("WORKER_ARCH_LIST", "").split(",") + ) + }, ) buildbot_dir: Path = field( default_factory=lambda: Path(require_env("BUILDBOT_DIR")) @@ -70,8 +73,8 @@ def setup_worker( def setup_workers(application: components.Componentized, config: WorkerConfig) -> None: - for i in range(config.worker_count): - for arch in config.worker_arch_list: + for arch, jobs in config.worker_arch_list.items(): + for i in range(jobs): setup_worker(application, i, arch, config) diff --git a/nix/worker.nix b/nix/worker.nix index bbbfd81..3be1b3b 100644 --- a/nix/worker.nix +++ b/nix/worker.nix @@ -28,8 +28,8 @@ in type = lib.types.path; description = "The buildbot worker password file."; }; - workerArchList = lib.mkOption { - type = lib.types.listOf lib.types.str; + workerArchitectures = lib.mkOption { + type = lib.types.attrsOf lib.types.int; description = "Nix `system`s the worker should feel responsible for."; }; }; @@ -66,6 +66,9 @@ in environment.PYTHONPATH = "${python.withPackages (_: [cfg.package])}/${python.sitePackages}"; environment.MASTER_URL = cfg.coordinatorUrl; environment.BUILDBOT_DIR = buildbotDir; + environment.WORKER_ARCH_LIST = + lib.concatStringsSep "," + (lib.mapAttrsToList (arch: jobs: "${arch}=${toString jobs}") cfg.workerArchitectures); serviceConfig = { # We rather want the CI job to fail on OOM than to have a broken buildbot worker. @@ -75,7 +78,6 @@ in LoadCredential = [ "worker-password-file:${cfg.workerPasswordFile}" ]; Environment = [ "WORKER_PASSWORD_FILE=%d/worker-password-file" - "WORKER_ARCH_LIST=${lib.concatStringsSep "," cfg.workerArchList}" ]; Type = "simple"; User = "buildbot-worker";