infra/services/buildbot/default.nix
raito e0c029ba43 fix: make all buildbot postgres faster
work_mem was effectively absurdly low.

Signed-off-by: Raito Bezarius <masterancpp@gmail.com>
2024-10-22 16:57:24 +02:00

234 lines
7.3 KiB
Nix
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
nodes,
config,
lib,
pkgs,
...
}:
let
cfg = config.bagel.services.buildbot;
ssh-keys = import ../../common/ssh-keys.nix;
freeGbDiskSpace = 20;
extraTenantSpecificBuilders = {
lix = import ./lix.nix {
inherit config nodes;
};
floral = [ ];
}.${cfg.tenant or (throw "${cfg.tenant} is not a known tenant")};
clientId = {
lix = "buildbot";
floral = "forkos-buildbot";
}.${cfg.tenant or (throw "${cfg.tenant} is not a known tenant")};
inherit (lib) mkEnableOption mkOption mkIf types;
in
{
options.bagel.services.buildbot = {
enable = mkEnableOption "Buildbot";
tenant = mkOption {
type = types.enum [ "lix" "floral" ];
description = "Which buildbot tenant to enable";
};
domain = mkOption {
type = types.str;
description = "Domain name for this Buildbot";
};
gerrit = {
domain = mkOption {
type = types.str;
description = "Canonical domain of the Gerrit associated to this Buildbot";
example = [ "cl.forkos.org" ];
};
port = mkOption {
type = types.port;
description = "Gerrit SSH port for this Buildbot";
};
username = mkOption {
type = types.str;
description = "Gerrit service username for this Buildbot";
};
};
cors.allowedOrigins = mkOption {
type = types.listOf types.str;
example = [ "*.forkos.org" ];
description = "Allowed origin for Buildbot and NGINX for CORS without the protocol";
};
buildSystems = mkOption {
type = types.listOf (types.enum [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ]);
default = [ "x86_64-linux" ];
example = [ "x86_64-linux" "aarch64-linux" ];
description = "Supported build systems for this buildbot instance.";
};
projects = mkOption {
type = types.listOf types.str;
example = [ "nixpkgs" ];
description = "Static list of projects enabled for Buildbot CI";
};
builders = mkOption {
type = types.listOf types.str;
description = "List of builders to configure for Buildbot";
example = [ "builder-2" "builder-3" ];
};
};
config = mkIf cfg.enable {
networking.firewall.allowedTCPPorts = [ 80 443 ];
bagel.secrets.files = [
"buildbot-worker-password"
"buildbot-oauth-secret"
"buildbot-workers"
"buildbot-service-key"
"buildbot-signing-key"
"buildbot-remote-builder-key"
];
age.secrets.buildbot-signing-key = {
owner = "buildbot-worker";
group = "buildbot-worker";
};
age.secrets.buildbot-remote-builder-key = {
file = ../../secrets/${cfg.tenant}/buildbot-remote-builder-key.age;
owner = "buildbot-worker";
group = "buildbot-worker";
};
services.nginx = {
recommendedProxySettings = true;
appendHttpConfig = ''
# Our session stuff is too big with the TWISTED_COOKIE in addition.
# Default is usually 4k or 8k.
large_client_header_buffers 4 16k;
'';
virtualHosts.${cfg.domain} = {
forceSSL = true;
enableACME = true;
extraConfig = ''
# This is needed so that logged-in users in Buildbot can include their credentials in their requests.
add_header Access-Control-Allow-Credentials 'true' always;
'';
};
};
services.buildbot-nix.worker = {
enable = true;
workerPasswordFile = config.age.secrets.buildbot-worker-password.path;
# All credits to eldritch horrors for this beauty.
workerArchitectures =
{
# nix-eval-jobs runs under a lock, error reports do not (but are cheap)
other = 8;
} // (
lib.filterAttrs
(n: v: lib.elem n config.services.buildbot-nix.coordinator.buildSystems)
(lib.zipAttrsWith
(_: lib.foldl' lib.add 0)
(lib.concatMap
(m: map (s: { ${s} = m.maxJobs; }) m.systems)
config.services.buildbot-nix.coordinator.buildMachines))
);
};
services.buildbot-nix.coordinator = {
enable = true;
inherit (cfg) domain;
# TODO(raito): is that really necessary when we can just collect buildMachines' systems?
inherit (cfg) buildSystems;
oauth2 = {
name = "Lix";
inherit clientId;
clientSecretFile = config.age.secrets.buildbot-oauth-secret.path;
resourceEndpoint = "https://identity.lix.systems";
authUri = "https://identity.lix.systems/realms/lix-project/protocol/openid-connect/auth";
tokenUri = "https://identity.lix.systems/realms/lix-project/protocol/openid-connect/token";
userinfoUri = "https://identity.lix.systems/realms/lix-project/protocol/openid-connect/userinfo";
};
# TODO(raito): this is not really necessary, we never have remote buildbot workers.
# we can replace all of this with automatic localworker generation on buildbot-nix side.
workersFile = config.age.secrets.buildbot-workers.path;
# We rely on NGINX to do the CORS dance.
allowedOrigins = cfg.cors.allowedOrigins;
buildMachines = map (n: {
hostName = nodes.${n}.config.networking.fqdn;
protocol = "ssh-ng";
# Follows Hydra.
maxJobs = 8;
sshKey = config.age.secrets.buildbot-remote-builder-key.path;
sshUser = "buildbot";
systems = [ "x86_64-linux" ];
supportedFeatures = nodes.${n}.config.nix.settings.system-features;
# Contrary to how Nix works, here we can specify non-base64 public host keys.
publicHostKey = ssh-keys.machines.${n};
}
) cfg.builders ++ extraTenantSpecificBuilders;
gerrit = {
# Manually managed account…
# TODO: https://git.lix.systems/the-distro/infra/issues/69
inherit (cfg.gerrit) domain port username;
privateKeyFile = config.age.secrets.buildbot-service-key.path;
inherit (cfg) projects;
};
evalWorkerCount = 6;
evalMaxMemorySize = "4096";
signingKeyFile = config.age.secrets.buildbot-signing-key.path;
};
# Make PostgreSQL restart smoother.
systemd.services.postgresql.serviceConfig = {
Restart = "always";
RestartMaxDelaySec = "5m";
RestartSteps = 10;
};
services.postgresql.settings = {
# DB Version: 15
# OS Type: linux
# DB Type: web
# Total Memory (RAM): 64 GB
# CPUs num: 16
# Connections num: 100
# Data Storage: ssd
max_connections = 100;
shared_buffers = "16GB";
effective_cache_size = "48GB";
maintenance_work_mem = "2GB";
checkpoint_completion_target = 0.9;
wal_buffers = "16MB";
default_statistics_target = 100;
random_page_cost = 1.1;
effective_io_concurrency = 200;
work_mem = "41943kB";
huge_pages = "try";
min_wal_size = "1GB";
max_wal_size = "4GB";
max_worker_processes = 16;
max_parallel_workers_per_gather = 4;
max_parallel_workers = 16;
max_parallel_maintenance_workers = 4;
};
nix.settings.keep-derivations = true;
nix.gc = {
automatic = true;
dates = "hourly";
options = ''
--max-freed "$((${toString freeGbDiskSpace} * 1024**3 - 1024 * $(df -P -k /nix/store | tail -n 1 | ${pkgs.gawk}/bin/awk '{ print $4 }')))"
'';
};
};
}