forked from raito/shared-public-infra
epyc: hack in better lix logging for debugging our CI problems
Sample broken thing: https://buildbot.lix.systems/#/builders/40/builds/3400/steps/1/logs/stdio I have a suspicion it's that the path vanished due to GC or something, and I want to catch the next occurrence. The error: error: build of '/nix/store/f02iygmil30yfy92xf8cwp224lcr2nay-nixos-test-driver-nix-copy-closure.drv' on 'ssh-ng://nix@epyc.infra.newtype.fr' failed: error: some dependencies of '/nix/store/f02iygmil30yfy92xf8cwp224lcr2nay-nixos-test-driver-nix-copy-closure.drv' are missing This is because nrFailed is not zero, which we don't have any god damned idea why that is because there is NO LOG for those. So we don't even know what goal type failed to begin with. ---- WARNING: this updates nixpkgs by two months!! You will potentially experience other breakage.
This commit is contained in:
parent
0a1d7fe2e1
commit
fdc980208d
|
@ -320,11 +320,11 @@
|
|||
},
|
||||
"nixpkgs_2": {
|
||||
"locked": {
|
||||
"lastModified": 1718983919,
|
||||
"narHash": "sha256-+1xgeIow4gJeiwo4ETvMRvWoircnvb0JOt7NS9kUhoM=",
|
||||
"lastModified": 1724932487,
|
||||
"narHash": "sha256-zzbqHmY1mt21omyk1+14QbAkII1B7OHlwKLcczVq22w=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "90338afd6177fc683a04d934199d693708c85a3b",
|
||||
"rev": "b4f7fb71438d00539b21f1b1e6968c0eac060127",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
|
@ -13,6 +13,7 @@ in
|
|||
../modules/garage.nix
|
||||
../modules/users/friends.nix
|
||||
../modules/bagel-container.nix
|
||||
../modules/lix-bug-details-pls
|
||||
];
|
||||
|
||||
networking.hostName = "epyc";
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
From 96937c58232ad6eaa11d1370220101c3ce2d00c3 Mon Sep 17 00:00:00 2001
|
||||
From: Jade Lovelace <lix@jade.fyi>
|
||||
Date: Thu, 29 Aug 2024 23:04:39 -0700
|
||||
Subject: [PATCH] wip: complain about failing goals at warn level
|
||||
|
||||
I want to fix the bug that appears here:
|
||||
|
||||
error: build of '/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-nixos-test-driver-nix-copy-closure.drv' on 'ssh-ng://nix@epyc.infra.newtype.fr' failed: error: some dependencies of '/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-nixos-test-driver-nix-copy-closure.drv' are missing
|
||||
error: builder for '/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-nixos-test-driver-nix-copy-closure.drv' failed with exit code 1
|
||||
error: 1 dependencies of derivation '/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-vm-test-run-nix-copy-closure.drv' failed to build
|
||||
|
||||
However, this is conditional on nrFailed, and I cannot for the life of
|
||||
me figure out *who* is failing and *why*.
|
||||
|
||||
Hopefully with these data I can narrow down why this bug is happening
|
||||
|
||||
Change-Id: I7dca71b1c8ac92e7cc40c47ab37c952a7673cf42
|
||||
---
|
||||
src/libstore/build/worker.cc | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/libstore/build/worker.cc b/src/libstore/build/worker.cc
|
||||
index 1b4633e64..a93be28a6 100644
|
||||
--- a/src/libstore/build/worker.cc
|
||||
+++ b/src/libstore/build/worker.cc
|
||||
@@ -160,7 +160,10 @@ void Worker::goalFinished(GoalPtr goal, Goal::Finished & f)
|
||||
|
||||
waiting->trace(fmt("waitee '%s' done; %d left", goal->name, waiting->waitees.size()));
|
||||
|
||||
- if (f.result != Goal::ecSuccess) ++waiting->nrFailed;
|
||||
+ if (f.result != Goal::ecSuccess) {
|
||||
+ ++waiting->nrFailed;
|
||||
+ warn("Waiter %s experienced non-success of waitee %s with result %d", waiting->getName(), goal->getName(), f.result);
|
||||
+ }
|
||||
if (f.result == Goal::ecNoSubstituters) ++waiting->nrNoSubstituters;
|
||||
if (f.result == Goal::ecIncompleteClosure) ++waiting->nrIncompleteClosure;
|
||||
|
||||
--
|
||||
2.44.1
|
||||
|
21
modules/lix-bug-details-pls/default.nix
Normal file
21
modules/lix-bug-details-pls/default.nix
Normal file
|
@ -0,0 +1,21 @@
|
|||
{ ... }:
|
||||
{
|
||||
# jade: this exists because of a Lix bug that has me losing my damn mind and we really cannot debug it without either:
|
||||
# * debug logs (infeasible. they are way too spammy)
|
||||
# * patching lix (well look where we are)
|
||||
#
|
||||
# I don't really think it's necessarily appropriate to log at info level when
|
||||
# a derivation fails on `main`, so here we have a yolopatch to get the damn
|
||||
# thing in the log.
|
||||
#
|
||||
# I suspect it is a race condition with the garbage collector.
|
||||
nixpkgs.overlays = [
|
||||
(final: prev: {
|
||||
lix = prev.lix.overrideAttrs (old: {
|
||||
patches = old.patches ++ [
|
||||
./0001-wip-complain-about-failing-goals-at-warn-level.patch
|
||||
];
|
||||
});
|
||||
})
|
||||
];
|
||||
}
|
Loading…
Reference in a new issue