From 34fb7a7e9d92698a721862fe0ef926d69b12a8e5 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sat, 9 Mar 2024 11:01:28 +0100 Subject: [PATCH] make the multi-node vm tests a bit more reliable without these changes the tests will very repeatably (although not very reliably) wedge in our runs. the ssh command starts, opens a sessions, does something, the session closes again, but the test does not move on. adding *just* the redirect and not the unit waits is not sufficient either, it needs both. this feels like a bug in the nixos testing framework somewhere, but digging that far is not in the cards right now. Change-Id: Idab577b83a36cc4899bb5ffbb3d9adc04e83e51c --- tests/nixos/nix-copy-closure.nix | 10 +++++++--- tests/nixos/nix-copy.nix | 10 +++++++--- tests/nixos/remote-builds-ssh-ng.nix | 10 +++++++--- tests/nixos/remote-builds.nix | 12 +++++++++--- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/tests/nixos/nix-copy-closure.nix b/tests/nixos/nix-copy-closure.nix index 66cbfb033..cd25fbbd0 100644 --- a/tests/nixos/nix-copy-closure.nix +++ b/tests/nixos/nix-copy-closure.nix @@ -40,6 +40,11 @@ in { "${pkgs.openssh}/bin/ssh-keygen", "-t", "ed25519", "-f", "key", "-N", "" ], capture_output=True, check=True) + server.succeed("systemctl start network-online.target") + client.succeed("systemctl start network-online.target") + server.wait_for_unit("network-online.target") + client.wait_for_unit("network-online.target") + client.succeed("mkdir -m 700 /root/.ssh") client.copy_from_host("key", "/root/.ssh/id_ed25519") client.succeed("chmod 600 /root/.ssh/id_ed25519") @@ -47,9 +52,8 @@ in { # Install the SSH key on the server. server.succeed("mkdir -m 700 /root/.ssh") server.copy_from_host("key.pub", "/root/.ssh/authorized_keys") - server.wait_for_unit("sshd") - client.wait_for_unit("network.target") - client.succeed(f"ssh -o StrictHostKeyChecking=no {server.name} 'echo hello world'") + server.wait_for_unit("sshd.service") + client.succeed(f"ssh -o StrictHostKeyChecking=no {server.name} 'echo hello world' >&2") # Copy the closure of package A from the client to the server. server.fail("nix-store --check-validity ${pkgA}") diff --git a/tests/nixos/nix-copy.nix b/tests/nixos/nix-copy.nix index 7db5197aa..3bcc7a988 100644 --- a/tests/nixos/nix-copy.nix +++ b/tests/nixos/nix-copy.nix @@ -54,8 +54,12 @@ in { start_all() - server.wait_for_unit("sshd") - client.wait_for_unit("network.target") + server.succeed("systemctl start network-online.target") + client.succeed("systemctl start network-online.target") + server.wait_for_unit("network-online.target") + client.wait_for_unit("network-online.target") + + server.wait_for_unit("sshd.service") client.wait_for_unit("getty@tty1.service") # Either the prompt: ]# # or an OCR misreading of it: 1# @@ -82,7 +86,7 @@ in { # Install the SSH key on the server. server.copy_from_host("key.pub", "/root/.ssh/authorized_keys") server.succeed("systemctl restart sshd") - client.succeed(f"ssh -o StrictHostKeyChecking=no {server.name} 'echo hello world'") + client.succeed(f"ssh -o StrictHostKeyChecking=no {server.name} 'echo hello world' >&2") client.succeed(f"ssh -O check {server.name}") client.succeed(f"ssh -O exit {server.name}") client.fail(f"ssh -O check {server.name}") diff --git a/tests/nixos/remote-builds-ssh-ng.nix b/tests/nixos/remote-builds-ssh-ng.nix index cca4066f3..5ff471607 100644 --- a/tests/nixos/remote-builds-ssh-ng.nix +++ b/tests/nixos/remote-builds-ssh-ng.nix @@ -78,6 +78,11 @@ in start_all() + builder.succeed("systemctl start network-online.target") + client.succeed("systemctl start network-online.target") + builder.wait_for_unit("network-online.target") + client.wait_for_unit("network-online.target") + # Create an SSH key on the client. subprocess.run([ "${hostPkgs.openssh}/bin/ssh-keygen", "-t", "ed25519", "-f", "key", "-N", "" @@ -87,11 +92,10 @@ in client.succeed("chmod 600 /root/.ssh/id_ed25519") # Install the SSH key on the builder. - client.wait_for_unit("network.target") builder.succeed("mkdir -p -m 700 /root/.ssh") builder.copy_from_host("key.pub", "/root/.ssh/authorized_keys") - builder.wait_for_unit("sshd") - client.succeed(f"ssh -o StrictHostKeyChecking=no {builder.name} 'echo hello world'") + builder.wait_for_unit("sshd.service") + client.succeed(f"ssh -o StrictHostKeyChecking=no {builder.name} 'echo hello world' >&2") # Perform a build out = client.succeed("nix-build ${expr nodes.client 1} 2> build-output") diff --git a/tests/nixos/remote-builds.nix b/tests/nixos/remote-builds.nix index 423b9d171..d2ed7853a 100644 --- a/tests/nixos/remote-builds.nix +++ b/tests/nixos/remote-builds.nix @@ -85,6 +85,13 @@ in start_all() + builder1.succeed("systemctl start network-online.target") + builder2.succeed("systemctl start network-online.target") + client.succeed("systemctl start network-online.target") + builder1.wait_for_unit("network-online.target") + builder2.wait_for_unit("network-online.target") + client.wait_for_unit("network-online.target") + # Create an SSH key on the client. subprocess.run([ "${hostPkgs.openssh}/bin/ssh-keygen", "-t", "ed25519", "-f", "key", "-N", "" @@ -94,12 +101,11 @@ in client.succeed("chmod 600 /root/.ssh/id_ed25519") # Install the SSH key on the builders. - client.wait_for_unit("network.target") for builder in [builder1, builder2]: builder.succeed("mkdir -p -m 700 /root/.ssh") builder.copy_from_host("key.pub", "/root/.ssh/authorized_keys") - builder.wait_for_unit("sshd") - client.succeed(f"ssh -o StrictHostKeyChecking=no {builder.name} 'echo hello world'") + builder.wait_for_unit("sshd.service") + client.succeed(f"ssh -o StrictHostKeyChecking=no {builder.name} 'echo hello world' >&2") # Perform a build and check that it was performed on the builder. out = client.succeed(