remove retry logic

retries don't help us very much, in fact they mostly hurt by repeating
builds that failed for non-transient reasons. retries could help with
workers dropping while running a build, but those rare cases are better
to restart manually than to pend at least twice the ci time for commits
that simply do not build cleanly.
This commit is contained in:
eldritch horrors 2024-03-10 17:34:22 +01:00
parent e1dfa0e545
commit fdfeef8ad4

View file

@ -2,7 +2,6 @@ import json
import multiprocessing import multiprocessing
import os import os
import sys import sys
import uuid
from collections import defaultdict from collections import defaultdict
from collections.abc import Generator from collections.abc import Generator
from dataclasses import dataclass from dataclasses import dataclass
@ -118,8 +117,6 @@ class BuildTrigger(Trigger):
props.setProperty("system", system, source) props.setProperty("system", system, source)
props.setProperty("drv_path", drv_path, source) props.setProperty("drv_path", drv_path, source)
props.setProperty("out_path", out_path, source) props.setProperty("out_path", out_path, source)
# we use this to identify builds when running a retry
props.setProperty("build_uuid", str(uuid.uuid4()), source)
triggered_schedulers.append((self.builds_scheduler, props)) triggered_schedulers.append((self.builds_scheduler, props))
return triggered_schedulers return triggered_schedulers
@ -192,24 +189,6 @@ class NixEvalCommand(buildstep.ShellMixin, steps.BuildStep):
return result return result
# FIXME this leaks memory... but probably not enough that we care
class RetryCounter:
def __init__(self, retries: int) -> None:
self.builds: dict[uuid.UUID, int] = defaultdict(lambda: retries)
def retry_build(self, build_id: uuid.UUID) -> int:
retries = self.builds[build_id]
if retries > 1:
self.builds[build_id] = retries - 1
return retries
return 0
# For now we limit this to two. Often this allows us to make the error log
# shorter because we won't see the logs for all previous succeeded builds
RETRY_COUNTER = RetryCounter(retries=2)
class EvalErrorStep(steps.BuildStep): class EvalErrorStep(steps.BuildStep):
"""Shows the error message of a failed evaluation.""" """Shows the error message of a failed evaluation."""
@ -236,12 +215,7 @@ class NixBuildCommand(buildstep.ShellMixin, steps.BuildStep):
cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand() cmd: remotecommand.RemoteCommand = yield self.makeRemoteShellCommand()
yield self.runCommand(cmd) yield self.runCommand(cmd)
res = cmd.results() return cmd.results()
if res == util.FAILURE:
retries = RETRY_COUNTER.retry_build(self.getProperty("build_uuid"))
if retries > 0:
return util.RETRY
return res
class UpdateBuildOutput(steps.BuildStep): class UpdateBuildOutput(steps.BuildStep):