chore: introduce finer-grained baremetal management

for multiple roles such as storage or builders.

Signed-off-by: Raito Bezarius <masterancpp@gmail.com>
This commit is contained in:
raito 2024-10-27 20:22:49 +01:00
parent e930a17b0b
commit 13014ce6c3
11 changed files with 240 additions and 111 deletions

View file

@ -8,18 +8,19 @@
fodwatch = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFRyTNfvKl5FcSyzGzw+h+bNFNOxdhvI67WdUZ2iIJ1L";
buildbot = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJgIu6ouagYqBeMLfmn1CbaDJMuZcPH9bnUhkht8GfuB";
git = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEQJcpkCUOx8+5oukMX6lxrYcIX8FyHu8Mc/3+ieKMUn";
bm-0 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBHSNcDGctvlG6BHcJuYIzW9WsBJsts2vpwSketsbXoL";
bm-1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIQOGUjERK7Mx8UPM/rbOdMqVyn1sbWqYOG6CbOzH2wm";
bm-2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMKzXIqCoYElEKIYgjbSpqEcDeOvV+Wo3Agq3jba83cB";
bm-3 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGq0A5233XGt34T097KaEKBUqFvaa7a6nYZRsSO0166l";
bm-4 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB9dVo2xZhgIMDgB1rUj5ApmppL39BtYu/+OFHeduvXr";
bm-5 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE7vZTBxrVHmHpv7slQ8A8XwjjbfN+ZJA0V5C3k0wNBD";
bm-6 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOt1qR/2BRtc6PABuSBulowwJVO6wBNDyEFzh0qsTeOF";
bm-7 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFinAAw1v8TJB8/wcmTVBbHHc4LCYh6z4TO6ViwUPkoh";
bm-8 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGSWHNeqT0kF/e4yVy2ieW98X5QMyCYIYZh9WTmQDs1";
bm-9 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOhws9zGgocVY36dMtOL+CXadpvRMffxoWMkfEcTBJm7";
bm-10 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE7sgIuTSqZiZhp8TvObSbIEhcHHsL5hcmYA22uzwxth";
# bm-11 actually?
build-coord = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINpAEJP7F+XtJBpQP1jTzwXwQgJrFxwEJjPf/rnCXkJA";
builder-0 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBHSNcDGctvlG6BHcJuYIzW9WsBJsts2vpwSketsbXoL";
builder-1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIQOGUjERK7Mx8UPM/rbOdMqVyn1sbWqYOG6CbOzH2wm";
builder-2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMKzXIqCoYElEKIYgjbSpqEcDeOvV+Wo3Agq3jba83cB";
builder-3 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGq0A5233XGt34T097KaEKBUqFvaa7a6nYZRsSO0166l";
builder-4 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB9dVo2xZhgIMDgB1rUj5ApmppL39BtYu/+OFHeduvXr";
builder-5 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE7vZTBxrVHmHpv7slQ8A8XwjjbfN+ZJA0V5C3k0wNBD";
builder-6 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOt1qR/2BRtc6PABuSBulowwJVO6wBNDyEFzh0qsTeOF";
builder-7 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFinAAw1v8TJB8/wcmTVBbHHc4LCYh6z4TO6ViwUPkoh";
builder-8 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGSWHNeqT0kF/e4yVy2ieW98X5QMyCYIYZh9WTmQDs1";
builder-9 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOhws9zGgocVY36dMtOL+CXadpvRMffxoWMkfEcTBJm7";
builder-10 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE7sgIuTSqZiZhp8TvObSbIEhcHHsL5hcmYA22uzwxth";
wob-vpn-gw = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINVytPPW8XnXf/rD5TFzsw//CZc2lBjQLmDzlVGPZsjh";
# Lix

View file

@ -135,9 +135,14 @@
# Tag all machines which have local boot as local bootables.
deployment.tags = lib.mkMerge [
[ "floral" ]
(lib.mkIf (config.bagel.baremetal.builders.enable -> !config.bagel.baremetal.builders.netboot)
# All nodes that can be local booted, including baremetal nodes.
(lib.mkIf (config.bagel.baremetal.enable -> !config.bagel.baremetal.netboot)
[ "localboot" ]
)
# Only baremetal nodes that can be local booted.
(lib.mkIf (config.bagel.baremetal.enable && !config.bagel.baremetal.netboot)
[ "bm-localboot" ]
)
];
bagel.monitoring.grafana-agent.tenant = "floral";
@ -148,13 +153,60 @@
];
# These are Floral baremetal builders.
makeBuilder = i:
makeColoBaremetal = i:
let
enableNetboot = i >= 6;
in
lib.nameValuePair "builder-${toString i}" {
# bm for baremetal.
lib.nameValuePair "bm-${toString i}" {
imports = floralInfraModules;
bagel.baremetal.builders = { enable = true; num = i; netboot = enableNetboot; };
bagel.baremetal = { enable = true; num = i; netboot = enableNetboot; };
};
setFlavorForNode = flavor: i: node: {
imports = [
node
];
bagel.baremetal.${flavor} = {
enable = true;
num = i;
};
};
setFlavorForNodes = flavor: { ranges }: nodes:
let
setFlavor = setFlavorForNode flavor;
# Test if i is in [range.start, range.end[.
inRange = i: range: i >= range.start && i < range.end;
# Perform an enumeration from values to indexes.
reverseEnumerate = list: lib.listToAttrs (lib.zipListsWith (i: x: lib.nameValuePair x i) (lib.range 0 (lib.length list - 1)) list);
# Filter all nodes by the range data for this flavor.
filteredNodes = lib.filterAttrs (name: node: lib.any (r: inRange node.bagel.baremetal.num r) ranges) nodes;
# The enumeration from baremetal numbers to flavor-specific numbers.
indexes = lib.traceValSeq (reverseEnumerate (map (n: toString n.bagel.baremetal.num) (builtins.attrValues filteredNodes)));
in
# Build a new attrset with a new second-level index for the specific flavor.
lib.mapAttrs (name: node: setFlavor indexes.${toString node.bagel.baremetal.num} node) filteredNodes;
closedOpenInterval = a: b: { start = a; end = b; };
interval = a: b: closedOpenInterval a b;
# Returns the singleton {x}.
singleton = x: interval x (x + 1);
# builders: [4, 10].
# storage: [5]
# build-coord: [11].
setStorage = setFlavorForNodes "storage" {
# List the indexes for the baremetal nodes which are supposed to be storage nodes.
ranges = [ (singleton 5) ];
};
# TODO: absorb build-coord which is a fairly normal role _inside_ the baremetal structure and remove an exception for it.
setBuildCoordinators = setFlavorForNodes "build-coord" {
ranges = [ (singleton 11) ];
};
setBuilders = setFlavorForNodes "builders" {
# List the indexes for the baremetal nodes which are supposed to be builders nodes.
ranges = [ (singleton 4) (singleton 10) ];
};
lixInfraModules = commonModules ++ [
@ -182,7 +234,28 @@
}
];
builders = lib.listToAttrs (map makeBuilder [4 5 10 11]);
checkForNonReuse = setOfNodes:
let
setOfNodesNames = lib.mapAttrs (name: nodes: builtins.attrNames nodes) setOfNodes;
in
lib.all (set:
lib.all (anotherSet: set != anotherSet -> lib.intersectLists setOfNodesNames.${set} setOfNodesNames.${anotherSet} == []) (builtins.attrNames setOfNodes)
) (builtins.attrNames setOfNodes);
baremetalNodes =
let
# We consider all possible baremetal systems and we filter out a subset that is activated.
# To configure the set of used machines, configure the `setXYZ` role setter selectors.
allNodes = lib.listToAttrs (lib.genList makeColoBaremetal 11);
perRoles = {
storageNodes = setStorage allNodes;
builderNodes = setBuilders allNodes;
# buildCoordinatorNodes = setBuildCoordinators allNodes;
};
in
assert (lib.assertMsg (checkForNonReuse perRoles) "A baremetal node is simultaneously storage, builder and build coordinator, please review the ranges.");
lib.foldl (a: b: a // b) { } (builtins.attrValues perRoles);
in {
meta.nixpkgs = systemBits.x86_64-linux.pkgs;
# Add any non-x86_64 native systems here.
@ -208,7 +281,7 @@
build01-aarch64-lix.imports = lixInfraModules ++ [ ./hosts/build01-aarch64-lix ];
buildbot-lix.imports = lixInfraModules ++ [ ./hosts/buildbot-lix ];
} // builders;
} // baremetalNodes;
hydraJobs = builtins.mapAttrs (n: v: v.config.system.build.netbootDir or v.config.system.build.toplevel) self.nixosConfigurations;
buildbotJobs = builtins.mapAttrs (_: v: v.config.system.build.toplevel) self.nixosConfigurations;

View file

@ -9,7 +9,8 @@
bagel.services = {
hydra.enable = true;
hydra.builders = map (i: "builder-${builtins.toString i}") [4 5 10];
# TODO: use the roles to avoid setting up builders which are not… builders!
hydra.builders = map (i: "bm-${builtins.toString i}") [4 10];
# Arguably, the build-coordinator is the most sensitive piece of our own infrastructure.
# Henceforth, it can run as well another sensitive piece of the system: the Vault.

View file

@ -3,7 +3,7 @@
let
genBuilders = { offset ? 0, count, f }: builtins.genList (x: rec { name = "builder-${toString (offset + x)}"; value = f name; }) count;
in builtins.listToAttrs (
genBuilders { offset = 4; count = 2; f = name: {
genBuilders { offset = 0; count = 2; f = name: {
cores = 8;
max-jobs = 8;
supported-features = [ "kvm" "nixos-test" ];
@ -11,7 +11,7 @@ in builtins.listToAttrs (
}; }
++
# This builder is exclusively for big-parallel
genBuilders { offset = 10; count = 1; f = name: {
genBuilders { offset = 2; count = 1; f = name: {
cores = 20;
max-jobs = 1;
supported-features = [ "kvm" "nixos-test" "big-parallel" ];

View file

@ -1,15 +1,12 @@
{ pkgs, lib, config, ... }:
let
cfgParent = config.bagel.baremetal;
cfg = config.bagel.baremetal.builders;
in
{
imports = [ ./netboot.nix ];
options = {
bagel.baremetal.builders = {
enable = lib.mkEnableOption "baremetal bagel oven";
netboot = lib.mkEnableOption "netboot";
enable = lib.mkEnableOption "builder role";
num = lib.mkOption {
type = lib.types.int;
};
@ -17,9 +14,6 @@ in
};
config = lib.mkIf cfg.enable {
boot.initrd.availableKernelModules = [ "ahci" "ehci_pci" "usb_storage" "usbhid" "sd_mod" ];
boot.initrd.kernelModules = [ "dm-snapshot" ];
users.users.builder = {
isSystemUser = true;
group = "nogroup";
@ -48,45 +42,21 @@ in
inherit ((import ./assignments.nix).${config.networking.hostName}) max-jobs cores;
};
nixpkgs.hostPlatform = "x86_64-linux";
hardware.cpu.intel.updateMicrocode = true;
fileSystems = {
"/mnt" = {
device = "/dev/disk/by-label/hydra";
fsType = "xfs";
options = ["logbsize=256k"];
};
boot.loader.systemd-boot.enable = true;
boot.loader.efi.canTouchEfiVariables = true;
boot.initrd.systemd.enable = true;
# We want the tmp filesystem on the same filesystem as the hydra store, so that builds can use reflinks
"/tmp" = {
device = "/mnt/tmp";
options = [ "bind" ];
};
};
boot.initrd.services.lvm.enable = true;
boot.kernel.sysctl."fs.xfs.xfssyncd_centisecs" = "12000";
fileSystems = lib.mkMerge [
(lib.mkIf (!cfg.netboot) {
"/" = {
device = "/dev/disk/by-label/root";
fsType = "xfs";
};
"/boot" = {
device = "/dev/disk/by-label/BOOT";
fsType = "vfat";
options = [ "fmask=0022" "dmask=0022" ];
};
})
{
"/mnt" = {
device = "/dev/disk/by-label/hydra";
fsType = "xfs";
options = ["logbsize=256k"];
};
# We want the tmp filesystem on the same filesystem as the hydra store, so that builds can use reflinks
"/tmp" = {
device = "/mnt/tmp";
options = [ "bind" ];
};
}
];
swapDevices = lib.optionals (!cfg.netboot) [
swapDevices = lib.optionals (!cfgParent.netboot) [
{
device = "/swapfile";
size = 50 * 1024; # 50GiB
@ -103,44 +73,7 @@ in
"console=ttyS0,115200"
];
networking.useNetworkd = true;
networking.hostName = "builder-${toString cfg.num}";
networking.domain = "wob01.infra.forkos.org";
systemd.network = {
netdevs = {
"40-uplink" = {
netdevConfig = {
Kind = "bond";
Name = "uplink";
};
bondConfig = {
Mode = "802.3ad";
TransmitHashPolicy = "layer3+4";
};
};
};
networks = {
"40-eno1" = {
name = "eno1";
bond = [ "uplink" ];
};
"40-eno2" = {
name = "eno2";
bond = [ "uplink" ];
};
};
};
networking.interfaces.uplink.ipv6.addresses = [
{ address = "2a01:584:11::1:${toString cfg.num}"; prefixLength = 64; }
];
networking.defaultGateway6 = { interface = "uplink"; address = "2a01:584:11::1"; };
bagel.infra.self.wan = {
family = "inet6";
address = "2a01:584:11::1:${toString cfg.num}";
prefixLength = 64;
};
deployment.targetHost = "2a01:584:11::1:${toString cfg.num}";
deployment.tags = [ "builders" ];
# Why can't we have nice things? https://bugs.openjdk.org/browse/JDK-8170568
@ -184,11 +117,5 @@ in
wantedBy = [ "timers.target" ];
};
systemd.timers.hydra-gc.timerConfig.Persistent = true;
bagel.sysadmin.enable = true;
environment.systemPackages = [ pkgs.ipmitool ];
system.stateVersion = "24.05";
};
}

View file

@ -0,0 +1,12 @@
{
imports = [
# Compute nodes
./builders
# Storage nodes
./storage
# Bases
./netboot.nix
./hardware.nix
];
}

View file

@ -0,0 +1,96 @@
{ pkgs, lib, config, ... }:
let
cfg = config.bagel.baremetal;
in
{
options = {
bagel.baremetal = {
enable = lib.mkEnableOption "baremetal bagel oven";
netboot = lib.mkEnableOption "netboot";
num = lib.mkOption {
type = lib.types.int;
};
};
};
config = lib.mkIf cfg.enable {
boot.initrd.availableKernelModules = [ "ahci" "ehci_pci" "usb_storage" "usbhid" "sd_mod" ];
boot.initrd.kernelModules = [ "dm-snapshot" ];
nixpkgs.hostPlatform = "x86_64-linux";
hardware.cpu.intel.updateMicrocode = true;
boot.loader.systemd-boot.enable = true;
boot.loader.efi.canTouchEfiVariables = true;
boot.initrd.systemd.enable = true;
boot.initrd.services.lvm.enable = true;
boot.kernel.sysctl."fs.xfs.xfssyncd_centisecs" = "12000";
fileSystems = lib.mkIf (!cfg.netboot) {
"/" = {
device = "/dev/disk/by-label/root";
fsType = "xfs";
};
"/boot" = {
device = "/dev/disk/by-label/BOOT";
fsType = "vfat";
options = [ "fmask=0022" "dmask=0022" ];
};
};
zramSwap = {
enable = true;
memoryPercent = 25;
};
boot.kernelParams = [
"console=tty1"
"console=ttyS0,115200"
];
networking.useNetworkd = true;
networking.domain = "wob01.infra.forkos.org";
systemd.network = {
netdevs = {
"40-uplink" = {
netdevConfig = {
Kind = "bond";
Name = "uplink";
};
bondConfig = {
Mode = "802.3ad";
TransmitHashPolicy = "layer3+4";
};
};
};
networks = {
"40-eno1" = {
name = "eno1";
bond = [ "uplink" ];
};
"40-eno2" = {
name = "eno2";
bond = [ "uplink" ];
};
};
};
networking.interfaces.uplink.ipv6.addresses = [
{ address = "2a01:584:11::1:${toString cfg.num}"; prefixLength = 64; }
];
networking.defaultGateway6 = { interface = "uplink"; address = "2a01:584:11::1"; };
bagel.infra.self.wan = {
family = "inet6";
address = "2a01:584:11::1:${toString cfg.num}";
prefixLength = 64;
};
deployment.targetHost = "2a01:584:11::1:${toString cfg.num}";
bagel.sysadmin.enable = true;
environment.systemPackages = [ pkgs.ipmitool ];
system.stateVersion = "24.05";
};
}

View file

@ -1,6 +1,6 @@
{ modulesPath, pkgs, lib, config, extendModules, ... }@node:
let
cfg = config.bagel.baremetal.builders;
cfg = config.bagel.baremetal;
in
{
config = lib.mkIf (cfg.enable && cfg.netboot) {

View file

@ -0,0 +1,19 @@
{ lib, config, ... }:
let
cfg = config.bagel.baremetal.storage;
in
{
options = {
bagel.baremetal.storage = {
enable = lib.mkEnableOption "storage role";
num = lib.mkOption {
type = lib.types.int;
};
};
};
config = lib.mkIf cfg.enable {
networking.hostName = "storage-${toString cfg.num}";
deployment.tags = [ "storage" ];
};
}

View file

@ -12,7 +12,7 @@
./ofborg
./postgres
./forgejo
./baremetal-builder
./baremetal
./buildbot
./newsletter
./s3-revproxy

View file

@ -38,7 +38,7 @@ let
# - generalize to new features
baremetalBuilders = lib.concatStringsSep "\n"
(map (n: let
assignments = (import ../baremetal-builder/assignments.nix).${n} or {
assignments = (import ../baremetal/builders/assignments.nix).${n} or {
inherit (nodes.${n}.config.nix.settings) max-jobs;
supported-features = [ "big-parallel" "kvm" "nixos-test" ];
required-features = [];