From 211ef9c15b2bef5eb9808989d33b8bda2835e256 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sun, 27 Oct 2024 20:22:49 +0100 Subject: [PATCH] chore: introduce finer-grained baremetal management for multiple roles such as storage or builders. Signed-off-by: Raito Bezarius --- common/ssh-keys.nix | 23 ++--- flake.nix | 72 ++++++++++++-- hosts/build-coord/default.nix | 4 +- .../builders}/assignments.nix | 0 .../builders}/default.nix | 98 +++---------------- services/baremetal/default.nix | 12 +++ services/baremetal/hardware.nix | 91 +++++++++++++++++ .../netboot.nix | 2 +- services/baremetal/storage/default.nix | 19 ++++ services/default.nix | 2 +- 10 files changed, 219 insertions(+), 104 deletions(-) rename services/{baremetal-builder => baremetal/builders}/assignments.nix (100%) rename services/{baremetal-builder => baremetal/builders}/default.nix (54%) create mode 100644 services/baremetal/default.nix create mode 100644 services/baremetal/hardware.nix rename services/{baremetal-builder => baremetal}/netboot.nix (99%) create mode 100644 services/baremetal/storage/default.nix diff --git a/common/ssh-keys.nix b/common/ssh-keys.nix index 7e8964e..5fb0e65 100644 --- a/common/ssh-keys.nix +++ b/common/ssh-keys.nix @@ -8,18 +8,19 @@ fodwatch = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFRyTNfvKl5FcSyzGzw+h+bNFNOxdhvI67WdUZ2iIJ1L"; buildbot = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJgIu6ouagYqBeMLfmn1CbaDJMuZcPH9bnUhkht8GfuB"; git = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEQJcpkCUOx8+5oukMX6lxrYcIX8FyHu8Mc/3+ieKMUn"; + bm-0 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBHSNcDGctvlG6BHcJuYIzW9WsBJsts2vpwSketsbXoL"; + bm-1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIQOGUjERK7Mx8UPM/rbOdMqVyn1sbWqYOG6CbOzH2wm"; + bm-2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMKzXIqCoYElEKIYgjbSpqEcDeOvV+Wo3Agq3jba83cB"; + bm-3 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGq0A5233XGt34T097KaEKBUqFvaa7a6nYZRsSO0166l"; + bm-4 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB9dVo2xZhgIMDgB1rUj5ApmppL39BtYu/+OFHeduvXr"; + bm-5 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE7vZTBxrVHmHpv7slQ8A8XwjjbfN+ZJA0V5C3k0wNBD"; + bm-6 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOt1qR/2BRtc6PABuSBulowwJVO6wBNDyEFzh0qsTeOF"; + bm-7 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFinAAw1v8TJB8/wcmTVBbHHc4LCYh6z4TO6ViwUPkoh"; + bm-8 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGSWHNeqT0kF/e4yVy2ieW98X5QMyCYIYZh9WTmQDs1"; + bm-9 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOhws9zGgocVY36dMtOL+CXadpvRMffxoWMkfEcTBJm7"; + bm-10 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE7sgIuTSqZiZhp8TvObSbIEhcHHsL5hcmYA22uzwxth"; + # bm-11 actually? build-coord = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINpAEJP7F+XtJBpQP1jTzwXwQgJrFxwEJjPf/rnCXkJA"; - builder-0 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBHSNcDGctvlG6BHcJuYIzW9WsBJsts2vpwSketsbXoL"; - builder-1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIQOGUjERK7Mx8UPM/rbOdMqVyn1sbWqYOG6CbOzH2wm"; - builder-2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMKzXIqCoYElEKIYgjbSpqEcDeOvV+Wo3Agq3jba83cB"; - builder-3 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGq0A5233XGt34T097KaEKBUqFvaa7a6nYZRsSO0166l"; - builder-4 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB9dVo2xZhgIMDgB1rUj5ApmppL39BtYu/+OFHeduvXr"; - builder-5 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE7vZTBxrVHmHpv7slQ8A8XwjjbfN+ZJA0V5C3k0wNBD"; - builder-6 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOt1qR/2BRtc6PABuSBulowwJVO6wBNDyEFzh0qsTeOF"; - builder-7 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFinAAw1v8TJB8/wcmTVBbHHc4LCYh6z4TO6ViwUPkoh"; - builder-8 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGSWHNeqT0kF/e4yVy2ieW98X5QMyCYIYZh9WTmQDs1"; - builder-9 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOhws9zGgocVY36dMtOL+CXadpvRMffxoWMkfEcTBJm7"; - builder-10 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE7sgIuTSqZiZhp8TvObSbIEhcHHsL5hcmYA22uzwxth"; wob-vpn-gw = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINVytPPW8XnXf/rD5TFzsw//CZc2lBjQLmDzlVGPZsjh"; # Lix diff --git a/flake.nix b/flake.nix index 1f7b1f6..f43f411 100644 --- a/flake.nix +++ b/flake.nix @@ -126,9 +126,14 @@ # Tag all machines which have local boot as local bootables. deployment.tags = lib.mkMerge [ [ "floral" ] - (lib.mkIf (config.bagel.baremetal.builders.enable -> !config.bagel.baremetal.builders.netboot) + # All nodes that can be local booted, including baremetal nodes. + (lib.mkIf (config.bagel.baremetal.enable -> !config.bagel.baremetal.netboot) [ "localboot" ] ) + # Only baremetal nodes that can be local booted. + (lib.mkIf (config.bagel.baremetal.enable && !config.bagel.baremetal.netboot) + [ "bm-localboot" ] + ) ]; bagel.monitoring.grafana-agent.tenant = "floral"; @@ -139,13 +144,50 @@ ]; # These are Floral baremetal builders. - makeBuilder = i: + makeColoBaremetal = i: let enableNetboot = i >= 6; in - lib.nameValuePair "builder-${toString i}" { + # bm for baremetal. + lib.nameValuePair "bm-${toString i}" { imports = floralInfraModules; - bagel.baremetal.builders = { enable = true; num = i; netboot = enableNetboot; }; + bagel.baremetal = { enable = true; num = i; netboot = enableNetboot; }; + }; + + setFlavorForNode = flavor: i: node: { + imports = [ + node + ]; + + bagel.baremetal.${flavor} = { + enable = true; + num = i; + }; + }; + setFlavorForNodes = flavor: { ranges }: nodes: + let + setFlavor = setFlavorForNode flavor; + # Test if i is in [range.start, range.end[. + inRange = i: range: i >= range.start && i < range.end; + # Perform an enumeration from values to indexes. + reverseEnumerate = list: lib.listToAttrs (lib.zipListsWith (i: x: lib.nameValuePair x i) (lib.range 0 (lib.length list - 1)) list); + # Filter all nodes by the range data for this flavor. + filteredNodes = lib.filterAttrs (name: node: lib.any (r: inRange node.bagel.baremetal.num r) ranges) nodes; + # The enumeration from baremetal numbers to flavor-specific numbers. + indexes = lib.traceValSeq (reverseEnumerate (map (n: toString n.bagel.baremetal.num) (builtins.attrValues filteredNodes))); + in + # Build a new attrset with a new second-level index for the specific flavor. + lib.mapAttrs (name: node: setFlavor indexes.${toString node.bagel.baremetal.num} node) filteredNodes; + + closedOpenInterval = a: b: { start = a; end = b; }; + interval = a: b: closedOpenInterval a b; + setStorage = setFlavorForNodes "storage" { + # List the indexes for the baremetal nodes which are supposed to be storage nodes. + ranges = [ (interval 5 6) ]; + }; + setBuilders = setFlavorForNodes "builders" { + # List the indexes for the baremetal nodes which are supposed to be builders nodes. + ranges = [ (interval 0 5) (interval 6 10) ]; }; lixInfraModules = commonModules ++ [ @@ -173,7 +215,25 @@ } ]; - builders = lib.listToAttrs (lib.genList makeBuilder 11); + checkForNonReuse = setOfNodes: + let + setOfNodesNames = lib.mapAttrs (name: nodes: builtins.attrNames nodes) setOfNodes; + in + lib.all (set: + lib.all (anotherSet: set != anotherSet -> lib.intersectLists setOfNodesNames.${set} setOfNodesNames.${anotherSet} == []) (builtins.attrNames setOfNodes) + ) (builtins.attrNames setOfNodes); + + baremetalNodes = + let + allNodes = lib.listToAttrs (lib.genList makeColoBaremetal 11); + perRoles = { + storageNodes = setStorage allNodes; + builderNodes = setBuilders allNodes; + }; + in + assert (lib.assertMsg (checkForNonReuse perRoles) "A baremetal node is simultaneously storage and builder, please review the ranges."); + lib.foldl (a: b: a // b) { } (builtins.attrValues perRoles); + in { meta.nixpkgs = systemBits.x86_64-linux.pkgs; # Add any non-x86_64 native systems here. @@ -199,7 +259,7 @@ build01-aarch64-lix.imports = lixInfraModules ++ [ ./hosts/build01-aarch64-lix ]; buildbot-lix.imports = lixInfraModules ++ [ ./hosts/buildbot-lix ]; - } // builders; + } // baremetalNodes; hydraJobs = builtins.mapAttrs (n: v: v.config.system.build.netbootDir or v.config.system.build.toplevel) self.nixosConfigurations; buildbotJobs = builtins.mapAttrs (_: v: v.config.system.build.toplevel) self.nixosConfigurations; diff --git a/hosts/build-coord/default.nix b/hosts/build-coord/default.nix index 8da91d1..2ad778a 100644 --- a/hosts/build-coord/default.nix +++ b/hosts/build-coord/default.nix @@ -9,8 +9,8 @@ bagel.services = { hydra.enable = true; - # Takes 10 builders (0 → 9). - hydra.builders = lib.genList (i: "builder-${builtins.toString i}") 10; + # Takes 9 builders (0 → 8). + hydra.builders = lib.genList (i: "builder-${builtins.toString i}") 9; }; bagel.monitoring.exporters.hydra.enable = true; diff --git a/services/baremetal-builder/assignments.nix b/services/baremetal/builders/assignments.nix similarity index 100% rename from services/baremetal-builder/assignments.nix rename to services/baremetal/builders/assignments.nix diff --git a/services/baremetal-builder/default.nix b/services/baremetal/builders/default.nix similarity index 54% rename from services/baremetal-builder/default.nix rename to services/baremetal/builders/default.nix index fd9075a..acd3e5e 100644 --- a/services/baremetal-builder/default.nix +++ b/services/baremetal/builders/default.nix @@ -1,15 +1,12 @@ { pkgs, lib, config, ... }: let + cfgParent = config.bagel.baremetal; cfg = config.bagel.baremetal.builders; in { - imports = [ ./netboot.nix ]; - options = { - bagel.baremetal.builders = { - enable = lib.mkEnableOption "baremetal bagel oven"; - netboot = lib.mkEnableOption "netboot"; + enable = lib.mkEnableOption "builder role"; num = lib.mkOption { type = lib.types.int; }; @@ -17,9 +14,6 @@ in }; config = lib.mkIf cfg.enable { - boot.initrd.availableKernelModules = [ "ahci" "ehci_pci" "usb_storage" "usbhid" "sd_mod" ]; - boot.initrd.kernelModules = [ "dm-snapshot" ]; - users.users.builder = { isSystemUser = true; group = "nogroup"; @@ -48,45 +42,21 @@ in inherit ((import ./assignments.nix).${config.networking.hostName}) max-jobs cores; }; - nixpkgs.hostPlatform = "x86_64-linux"; - hardware.cpu.intel.updateMicrocode = true; + fileSystems = { + "/mnt" = { + device = "/dev/disk/by-label/hydra"; + fsType = "xfs"; + options = ["logbsize=256k"]; + }; - boot.loader.systemd-boot.enable = true; - boot.loader.efi.canTouchEfiVariables = true; - boot.initrd.systemd.enable = true; + # We want the tmp filesystem on the same filesystem as the hydra store, so that builds can use reflinks + "/tmp" = { + device = "/mnt/tmp"; + options = [ "bind" ]; + }; + }; - boot.initrd.services.lvm.enable = true; - - boot.kernel.sysctl."fs.xfs.xfssyncd_centisecs" = "12000"; - fileSystems = lib.mkMerge [ - (lib.mkIf (!cfg.netboot) { - "/" = { - device = "/dev/disk/by-label/root"; - fsType = "xfs"; - }; - - "/boot" = { - device = "/dev/disk/by-label/BOOT"; - fsType = "vfat"; - options = [ "fmask=0022" "dmask=0022" ]; - }; - }) - { - "/mnt" = { - device = "/dev/disk/by-label/hydra"; - fsType = "xfs"; - options = ["logbsize=256k"]; - }; - - # We want the tmp filesystem on the same filesystem as the hydra store, so that builds can use reflinks - "/tmp" = { - device = "/mnt/tmp"; - options = [ "bind" ]; - }; - } - ]; - - swapDevices = lib.optionals (!cfg.netboot) [ + swapDevices = lib.optionals (!cfgParent.netboot) [ { device = "/swapfile"; size = 50 * 1024; # 50GiB @@ -103,39 +73,7 @@ in "console=ttyS0,115200" ]; - networking.useNetworkd = true; networking.hostName = "builder-${toString cfg.num}"; - networking.domain = "wob01.infra.forkos.org"; - - systemd.network = { - netdevs = { - "40-uplink" = { - netdevConfig = { - Kind = "bond"; - Name = "uplink"; - }; - bondConfig = { - Mode = "802.3ad"; - TransmitHashPolicy = "layer3+4"; - }; - }; - }; - networks = { - "40-eno1" = { - name = "eno1"; - bond = [ "uplink" ]; - }; - "40-eno2" = { - name = "eno2"; - bond = [ "uplink" ]; - }; - }; - }; - networking.interfaces.uplink.ipv6.addresses = [ - { address = "2a01:584:11::1:${toString cfg.num}"; prefixLength = 64; } - ]; - networking.defaultGateway6 = { interface = "uplink"; address = "2a01:584:11::1"; }; - deployment.targetHost = "2a01:584:11::1:${toString cfg.num}"; deployment.tags = [ "builders" ]; # Why can't we have nice things? https://bugs.openjdk.org/browse/JDK-8170568 @@ -179,11 +117,5 @@ in wantedBy = [ "timers.target" ]; }; systemd.timers.hydra-gc.timerConfig.Persistent = true; - - bagel.sysadmin.enable = true; - - environment.systemPackages = [ pkgs.ipmitool ]; - - system.stateVersion = "24.05"; }; } diff --git a/services/baremetal/default.nix b/services/baremetal/default.nix new file mode 100644 index 0000000..f8d1b30 --- /dev/null +++ b/services/baremetal/default.nix @@ -0,0 +1,12 @@ +{ + imports = [ + # Compute nodes + ./builders + # Storage nodes + ./storage + + # Bases + ./netboot.nix + ./hardware.nix + ]; +} diff --git a/services/baremetal/hardware.nix b/services/baremetal/hardware.nix new file mode 100644 index 0000000..8df6605 --- /dev/null +++ b/services/baremetal/hardware.nix @@ -0,0 +1,91 @@ +{ pkgs, lib, config, ... }: +let + cfg = config.bagel.baremetal; +in +{ + options = { + bagel.baremetal = { + enable = lib.mkEnableOption "baremetal bagel oven"; + netboot = lib.mkEnableOption "netboot"; + num = lib.mkOption { + type = lib.types.int; + }; + }; + }; + + config = lib.mkIf cfg.enable { + boot.initrd.availableKernelModules = [ "ahci" "ehci_pci" "usb_storage" "usbhid" "sd_mod" ]; + boot.initrd.kernelModules = [ "dm-snapshot" ]; + + nixpkgs.hostPlatform = "x86_64-linux"; + hardware.cpu.intel.updateMicrocode = true; + + boot.loader.systemd-boot.enable = true; + boot.loader.efi.canTouchEfiVariables = true; + boot.initrd.systemd.enable = true; + boot.initrd.services.lvm.enable = true; + + boot.kernel.sysctl."fs.xfs.xfssyncd_centisecs" = "12000"; + fileSystems = lib.mkIf (!cfg.netboot) { + "/" = { + device = "/dev/disk/by-label/root"; + fsType = "xfs"; + }; + + "/boot" = { + device = "/dev/disk/by-label/BOOT"; + fsType = "vfat"; + options = [ "fmask=0022" "dmask=0022" ]; + }; + }; + + zramSwap = { + enable = true; + memoryPercent = 25; + }; + + boot.kernelParams = [ + "console=tty1" + "console=ttyS0,115200" + ]; + + networking.useNetworkd = true; + networking.domain = "wob01.infra.forkos.org"; + + systemd.network = { + netdevs = { + "40-uplink" = { + netdevConfig = { + Kind = "bond"; + Name = "uplink"; + }; + bondConfig = { + Mode = "802.3ad"; + TransmitHashPolicy = "layer3+4"; + }; + }; + }; + networks = { + "40-eno1" = { + name = "eno1"; + bond = [ "uplink" ]; + }; + "40-eno2" = { + name = "eno2"; + bond = [ "uplink" ]; + }; + }; + }; + networking.interfaces.uplink.ipv6.addresses = [ + { address = "2a01:584:11::1:${toString cfg.num}"; prefixLength = 64; } + ]; + networking.defaultGateway6 = { interface = "uplink"; address = "2a01:584:11::1"; }; + deployment.targetHost = "2a01:584:11::1:${toString cfg.num}"; + + bagel.sysadmin.enable = true; + + environment.systemPackages = [ pkgs.ipmitool ]; + + system.stateVersion = "24.05"; + }; +} diff --git a/services/baremetal-builder/netboot.nix b/services/baremetal/netboot.nix similarity index 99% rename from services/baremetal-builder/netboot.nix rename to services/baremetal/netboot.nix index 4f6782d..3bfbfc8 100644 --- a/services/baremetal-builder/netboot.nix +++ b/services/baremetal/netboot.nix @@ -1,6 +1,6 @@ { modulesPath, pkgs, lib, config, extendModules, ... }@node: let - cfg = config.bagel.baremetal.builders; + cfg = config.bagel.baremetal; in { config = lib.mkIf (cfg.enable && cfg.netboot) { diff --git a/services/baremetal/storage/default.nix b/services/baremetal/storage/default.nix new file mode 100644 index 0000000..377a72c --- /dev/null +++ b/services/baremetal/storage/default.nix @@ -0,0 +1,19 @@ +{ lib, config, ... }: +let + cfg = config.bagel.baremetal.storage; +in +{ + options = { + bagel.baremetal.storage = { + enable = lib.mkEnableOption "storage role"; + num = lib.mkOption { + type = lib.types.int; + }; + }; + }; + + config = lib.mkIf cfg.enable { + networking.hostName = "storage-${toString cfg.num}"; + deployment.tags = [ "storage" ]; + }; +} diff --git a/services/default.nix b/services/default.nix index 08a2272..cf32088 100644 --- a/services/default.nix +++ b/services/default.nix @@ -11,7 +11,7 @@ ./ofborg ./postgres ./forgejo - ./baremetal-builder + ./baremetal ./buildbot ./newsletter ./s3-revproxy