use one scheduler and worker set per arch

and an additional set for generic tasks like error reporting. this
prevents hol blocking for underutilized arches when at least one arch is
blocking, as usually happens to us with aarch64-linux.
This commit is contained in:
eldritch horrors 2024-03-15 14:47:49 +01:00
parent 5e50a858d7
commit d394f35f55
3 changed files with 44 additions and 23 deletions

View file

@ -62,7 +62,7 @@ class GerritProject:
class BuildTrigger(steps.BuildStep):
def __init__(
self,
builds_scheduler: str,
builds_scheduler_group: str,
jobs: list[dict[str, Any]],
all_deps: dict[str, Any],
**kwargs: Any,
@ -70,7 +70,7 @@ class BuildTrigger(steps.BuildStep):
self.jobs = jobs
self.all_deps = all_deps
self.config = None
self.builds_scheduler = builds_scheduler
self.builds_scheduler_group = builds_scheduler_group
self._result_list = []
self.ended = False
self.waitForFinishDeferred = None
@ -116,7 +116,7 @@ class BuildTrigger(steps.BuildStep):
if error is not None:
props.setProperty("error", error, source)
return (self.builds_scheduler, props)
return (f"{self.builds_scheduler_group}-other", props)
drv_path = job.get("drvPath")
system = job.get("system")
@ -131,7 +131,7 @@ class BuildTrigger(steps.BuildStep):
props.setProperty("out_path", out_path, source)
props.setProperty("isCached", job.get("isCached"), source)
return (self.builds_scheduler, props)
return (f"{self.builds_scheduler_group}-{system}", props)
@defer.inlineCallbacks
def _add_results(self, brid):
@ -353,7 +353,7 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
self.build.addStepsAfterCurrentStep(
[
BuildTrigger(
builds_scheduler=f"lix-nix-build",
builds_scheduler_group=f"lix-nix-build",
name="build flake",
jobs=filtered_jobs,
all_deps=all_deps,
@ -499,6 +499,7 @@ def nix_eval_config(
def nix_build_config(
project: GerritProject,
worker_arch: str,
worker_names: list[str],
outputs_path: Path | None = None,
signing_keyfile: str | None = None,
@ -597,7 +598,7 @@ def nix_build_config(
),
)
return util.BuilderConfig(
name=f"{project.name}/nix-build",
name=f"{project.name}/nix-build/{worker_arch}",
project=project.name,
workernames=worker_names,
collapseRequests=False,
@ -640,9 +641,12 @@ def config_for_project(
builderNames=[f"{project.name}/nix-eval"],
),
# this is triggered from `nix-eval`
schedulers.Triggerable(
name=f"{project.name}-nix-build",
builderNames=[f"{project.name}/nix-build"],
*(
schedulers.Triggerable(
name=f"{project.name}-nix-build-{arch}",
builderNames=[f"{project.name}/nix-build/{arch}"],
)
for arch in nix_supported_systems + [ "other" ]
),
# allow to manually trigger a nix-build
schedulers.ForceScheduler(
@ -672,18 +676,22 @@ def config_for_project(
nix_eval_config(
project,
gerrit_private_key,
worker_names,
[ f"{w}-other" for w in worker_names ],
supported_systems=nix_supported_systems,
worker_count=nix_eval_worker_count,
max_memory_size=nix_eval_max_memory_size,
eval_lock=eval_lock,
),
nix_build_config(
project,
worker_names,
outputs_path=outputs_path,
signing_keyfile=signing_keyfile,
binary_cache_config=binary_cache_config
*(
nix_build_config(
project,
arch,
[ f"{w}-{arch}" for w in worker_names ],
outputs_path=outputs_path,
signing_keyfile=signing_keyfile,
binary_cache_config=binary_cache_config
)
for arch in nix_supported_systems + [ "other" ]
),
],
)
@ -819,9 +827,10 @@ class GerritNixConfigurator(ConfiguratorBase):
for item in worker_config:
cores = item.get("cores", 0)
for i in range(cores):
worker_name = f"{item['name']}-{i:03}"
config["workers"].append(worker.Worker(worker_name, item["pass"]))
worker_names.append(worker_name)
for arch in self.nix_supported_systems + ["other"]:
worker_name = f"{item['name']}-{i:03}"
config["workers"].append(worker.Worker(f"{worker_name}-{arch}", item["pass"]))
worker_names.append(worker_name)
eval_lock = util.MasterLock("nix-eval")

View file

@ -25,6 +25,9 @@ class WorkerConfig:
worker_count: int = int(
os.environ.get("WORKER_COUNT", str(multiprocessing.cpu_count())),
)
worker_arch_list: list[str] = field(
default_factory=lambda: os.environ.get("WORKER_ARCH_LIST", "").split(",") + ["other"],
)
buildbot_dir: Path = field(
default_factory=lambda: Path(require_env("BUILDBOT_DIR"))
)
@ -34,13 +37,14 @@ class WorkerConfig:
def setup_worker(
application: components.Componentized,
builder_id: int,
arch: str,
config: WorkerConfig,
) -> None:
basedir = config.buildbot_dir.parent / f"{config.buildbot_dir.name}-{builder_id:03}"
basedir = config.buildbot_dir.parent / f"{config.buildbot_dir.name}-{builder_id:03}/{arch}"
basedir.mkdir(parents=True, exist_ok=True, mode=0o700)
hostname = socket.gethostname()
workername = f"{hostname}-{builder_id:03}"
workername = f"{hostname}-{builder_id:03}-{arch}"
keepalive = 600
umask = None
maxdelay = 300
@ -67,7 +71,8 @@ def setup_worker(
def setup_workers(application: components.Componentized, config: WorkerConfig) -> None:
for i in range(config.worker_count):
setup_worker(application, i, config)
for arch in config.worker_arch_list:
setup_worker(application, i, arch, config)
# note: this line is matched against to check that this is a worker

View file

@ -28,6 +28,10 @@ in
type = lib.types.path;
description = "The buildbot worker password file.";
};
workerArchList = lib.mkOption {
type = lib.types.listOf lib.types.str;
description = "Nix `system`s the worker should feel responsible for.";
};
};
};
config = lib.mkIf cfg.enable {
@ -69,7 +73,10 @@ in
OOMPolicy = "continue";
LoadCredential = [ "worker-password-file:${cfg.workerPasswordFile}" ];
Environment = [ "WORKER_PASSWORD_FILE=%d/worker-password-file" ];
Environment = [
"WORKER_PASSWORD_FILE=%d/worker-password-file"
"WORKER_ARCH_LIST=${lib.concatStringsSep "," cfg.workerArchList}"
];
Type = "simple";
User = "buildbot-worker";
Group = "buildbot-worker";