From b56b8963a283120310ba2802542fbfe787360e35 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 5 Oct 2024 18:14:39 +0200 Subject: [PATCH] feat: introduce Buildbot multi-tenancy This shares the same expression to deploy the Buildbot. Signed-off-by: Raito Bezarius --- common/ssh-keys.nix | 5 ++ flake.nix | 2 + hosts/buildbot/default.nix | 16 +++++ services/buildbot/default.nix | 124 ++++++++++++++++++++++++++-------- services/buildbot/lix.nix | 49 ++++++++++++++ 5 files changed, 167 insertions(+), 29 deletions(-) create mode 100644 services/buildbot/lix.nix diff --git a/common/ssh-keys.nix b/common/ssh-keys.nix index 632a92d..426d953 100644 --- a/common/ssh-keys.nix +++ b/common/ssh-keys.nix @@ -24,6 +24,11 @@ # Lix build01-aarch64-lix = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICC69NZD/zhIB/wUb5odg46bss5g8hH2fDl22bk4qeSW"; + build02-aarch64-lix = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGdJE375pe58RJbhKwXRp3D//+SJ3ssiVZrLsM9CLHn0"; + build01-aarch64-darwin-lix = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMVf1uO0lv5UBti/naW/+amqLxvWZg+StXk9aM+lJ7e4"; + + # Raito infrastructure + epyc-newtype-fr = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOXT9Init1MhKt4rjBANLq0t0bPww/WQZ96uB4AEDrml"; }; users = { diff --git a/flake.nix b/flake.nix index bbb5a55..b7783e6 100644 --- a/flake.nix +++ b/flake.nix @@ -134,6 +134,7 @@ bagel.monitoring.grafana-agent.tenant = "floral"; bagel.secrets.tenant = "floral"; bagel.builders.extra-build-capacity.provider.tenant = "floral"; + bagel.services.buildbot.tenant = "floral"; }) ]; @@ -168,6 +169,7 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDu4cEqZzAI/1vZjSQkTJ4ijIg9nuloOuSKUrnkJIOFn" ]; }; + bagel.services.buildbot.tenant = "lix"; } ]; diff --git a/hosts/buildbot/default.nix b/hosts/buildbot/default.nix index 52df6eb..1ff08f9 100755 --- a/hosts/buildbot/default.nix +++ b/hosts/buildbot/default.nix @@ -2,6 +2,7 @@ config, lib, pkgs, + nodes, ... }: { @@ -26,6 +27,21 @@ bagel.services.buildbot = { enable = true; domain = "buildbot.forkos.org"; + gerrit = + let + cfgGerrit = nodes.gerrit01.config.bagel.services.gerrit; + in + { + domain = cfgGerrit.canonicalDomain; + port = cfgGerrit.port; + username = "buildbot"; + }; + cors.allowedOrigin = "*.forkos.org"; + projects = [ + "buildbot-test" + "nixpkgs" + "infra" + ]; builders = [ "builder-10" ]; }; diff --git a/services/buildbot/default.nix b/services/buildbot/default.nix index dd1bd68..51a0621 100644 --- a/services/buildbot/default.nix +++ b/services/buildbot/default.nix @@ -7,15 +7,69 @@ }: let cfg = config.bagel.services.buildbot; - cfgGerrit = nodes.gerrit01.config.bagel.services.gerrit; ssh-keys = import ../../common/ssh-keys.nix; + freeGbDiskSpace = 20; + extraTenantSpecificBuilders = { + lix = import ./lix.nix { + inherit config nodes; + }; + floral = [ ]; + }.${cfg.tenant or (throw "${cfg.tenant} is not a known tenant")}; + clientId = { + lix = "buildbot"; + floral = "forkos-buildbot"; + }.${cfg.tenant or (throw "${cfg.tenant} is not a known tenant")}; inherit (lib) mkEnableOption mkOption mkIf types; in { options.bagel.services.buildbot = { enable = mkEnableOption "Buildbot"; + + tenant = mkOption { + type = types.enum [ "lix" "floral" ]; + description = "Which buildbot tenant to enable"; + }; + domain = mkOption { type = types.str; + description = "Domain name for this Buildbot"; + }; + + gerrit = { + domain = mkOption { + type = types.str; + description = "Canonical domain of the Gerrit associated to this Buildbot"; + example = [ "cl.forkos.org" ]; + }; + + port = mkOption { + type = types.port; + description = "Gerrit SSH port for this Buildbot"; + }; + + username = mkOption { + type = types.str; + description = "Gerrit service username for this Buildbot"; + }; + }; + + cors.allowedOrigin = mkOption { + type = types.str; + example = "*.forkos.org"; + description = "Allowed origin for Buildbot and NGINX for CORS without the protocol"; + }; + + buildSystems = mkOption { + type = types.listOf (types.enum [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ]); + default = [ "x86_64-linux" ]; + example = [ "x86_64-linux" "aarch64-linux" ]; + description = "Supported build systems for this buildbot instance."; + }; + + projects = mkOption { + type = types.listOf types.str; + example = [ "nixpkgs" ]; + description = "Static list of projects enabled for Buildbot CI"; }; builders = mkOption { @@ -27,28 +81,38 @@ in config = mkIf cfg.enable { networking.firewall.allowedTCPPorts = [ 80 443 ]; - age.secrets.buildbot-worker-password.file = ../../secrets/buildbot-worker-password.age; - age.secrets.buildbot-oauth-secret.file = ../../secrets/buildbot-oauth-secret.age; - age.secrets.buildbot-workers.file = ../../secrets/buildbot-workers.age; - age.secrets.buildbot-service-key.file = ../../secrets/buildbot-service-key.age; + bagel.secrets.files = [ + "buildbot-worker-password" + "buildbot-oauth-secret" + "buildbot-workers" + "buildbot-service-key" + "buildbot-signing-key" + "buildbot-remote-builder-key" + ]; age.secrets.buildbot-signing-key = { - file = ../../secrets/buildbot-signing-key.age; owner = "buildbot-worker"; group = "buildbot-worker"; }; age.secrets.buildbot-remote-builder-key = { - file = ../../secrets/buildbot-remote-builder-key.age; + file = ../../secrets/${cfg.tenant}/buildbot-remote-builder-key.age; owner = "buildbot-worker"; group = "buildbot-worker"; }; - services.nginx.virtualHosts.${cfg.domain} = { - forceSSL = true; - enableACME = true; - extraConfig = '' - add_header Access-Control-Allow-Credentials 'true' always; - add_header Access-Control-Allow-Origin 'https://cl.forkos.org' always; + services.nginx = { + appendHttpConfig = '' + # Our session stuff is too big with the TWISTED_COOKIE in addition. + # Default is usually 4k or 8k. + large_client_header_buffers 4 16k; ''; + virtualHosts.${cfg.domain} = { + forceSSL = true; + enableACME = true; + extraConfig = '' + add_header Access-Control-Allow-Credentials 'true' always; + add_header Access-Control-Allow-Origin 'https://${cfg.cors.allowedOrigin}' always; + ''; + }; }; services.buildbot-nix.worker = { @@ -74,10 +138,12 @@ in enable = true; inherit (cfg) domain; + # TODO(raito): is that really necessary when we can just collect buildMachines' systems? + inherit (cfg) buildSystems; oauth2 = { name = "Lix"; - clientId = "forkos-buildbot"; + inherit clientId; clientSecretFile = config.age.secrets.buildbot-oauth-secret.path; resourceEndpoint = "https://identity.lix.systems"; authUri = "https://identity.lix.systems/realms/lix-project/protocol/openid-connect/auth"; @@ -90,12 +156,7 @@ in workersFile = config.age.secrets.buildbot-workers.path; allowedOrigins = [ - "*.forkos.org" - ]; - - # TODO(raito): is that really necessary when we can just collect buildMachines' systems? - buildSystems = [ - "x86_64-linux" + cfg.cors.allowedOrigin ]; buildMachines = map (n: { @@ -110,20 +171,14 @@ in # Contrary to how Nix works, here we can specify non-base64 public host keys. publicHostKey = ssh-keys.machines.${n}; } - ) cfg.builders; + ) cfg.builders ++ extraTenantSpecificBuilders; gerrit = { - domain = cfgGerrit.canonicalDomain; # Manually managed account… # TODO: https://git.lix.systems/the-distro/infra/issues/69 - username = "buildbot"; - port = cfgGerrit.port; + inherit (cfg.gerrit) domain port username; privateKeyFile = config.age.secrets.buildbot-service-key.path; - projects = [ - "buildbot-test" - "nixpkgs" - "infra" - ]; + inherit (cfg) projects; }; evalWorkerCount = 6; @@ -132,10 +187,21 @@ in signingKeyFile = config.age.secrets.buildbot-signing-key.path; }; + # Make PostgreSQL restart smoother. + systemd.services.postgresql.serviceConfig = { + Restart = "always"; + RestartMaxDelaySec = "5m"; + RestartSteps = 10; + }; + + nix.settings.keep-derivations = true; nix.gc = { automatic = true; dates = "hourly"; + options = '' + --max-freed "$((${toString freeGbDiskSpace} * 1024**3 - 1024 * $(df -P -k /nix/store | tail -n 1 | ${pkgs.gawk}/bin/awk '{ print $4 }')))" + ''; }; }; } diff --git a/services/buildbot/lix.nix b/services/buildbot/lix.nix new file mode 100644 index 0000000..5a70c70 --- /dev/null +++ b/services/buildbot/lix.nix @@ -0,0 +1,49 @@ +{ config, nodes, ... }: +let + ssh-keys = import ../../common/ssh-keys.nix; +in + [ + { + hostName = "build01.aarch64.lix.systems"; + maxJobs = 2; + protocol = "ssh-ng"; + sshKey = config.age.secrets.buildbot-remote-builder-key.path; + sshUser = "nix"; + systems = [ "aarch64-linux" ]; + publicHostKey = ssh-keys.machines.build01-aarch64-lix; + supportedFeatures = nodes.build01-aarch64-lix.config.nix.settings.system-features; + } + { + hostName = "build02.aarch64.lix.systems"; + maxJobs = 4; + protocol = "ssh-ng"; + sshKey = config.age.secrets.buildbot-remote-builder-key.path; + sshUser = "nix"; + systems = [ "aarch64-linux" ]; + publicHostKey = ssh-keys.machines.build02-aarch64-lix; + supportedFeatures = nodes.build02-aarch64-lix.config.nix.settings.system-features; + } + { + hostName = "build01.aarch64-darwin.lix.systems"; + maxJobs = 2; + protocol = "ssh-ng"; + sshKey = config.age.secrets.buildbot-remote-builder-key.path; + sshUser = "m1"; + systems = [ "aarch64-darwin" "x86_64-darwin" ]; + publicHostKey = ssh-keys.machines.build01-aarch64-darwin-lix; + supportedFeatures = [ "big-parallel" ]; + } + # a.k.a. https://git.newtype.fr/newtype/newtype-org-configurations/src/branch/main/docs/epyc.md + { + hostName = "epyc.infra.newtype.fr"; + # at 256G this could run 64 builds but the machine is shared + # (and historically we used no more than 16 concurrent jobs) + maxJobs = 16; + protocol = "ssh-ng"; + sshKey = config.age.secrets.buildbot-remote-builder-key.path; + sshUser = "nix"; + systems = [ "x86_64-linux" "i686-linux" ]; + publicHostKey = ssh-keys.machines.epyc-newtype-fr; + supportedFeatures = [ "benchmark" "big-parallel" "nixos-test" "kvm" ]; + } + ]