Rewrite REPL test parser

- Use a recursive descent parser so that it's easy to extend. - Add `@args` to enable customizing command-line arguments - Add `@should-start` to enable `nix repl` tests that error before entering the REPL - Make sure to read all stdout output before comparing. This catches some extra output we were tossing out before! Change-Id: I5522555df4c313024ab15cd10f9f04e7293bda3a
2024-03-22 16:45:05 -07:00 · 2024-03-22 16:45:05 -07:00 · ee423f391d
parent 83d720b730
commit ee423f391d
15 changed files with 856 additions and 419 deletions
--- a/tests/functional/repl_characterization/data/basic.ast
+++ b/tests/functional/repl_characterization/data/basic.ast
@ -1,16 +1,27 @@
-Commentary "meow meow meow"
+Commentary: "meow meow meow"
-Command "command"
+Indent: "  "
-Output "output output one"
+Prompt: "nix-repl> "
-Output ""
+Command: "command"
-Output ""
+Indent: "  "
-Output "output output two"
+Output: "output output one"
-Commentary "meow meow"
+Output: ""
-Command "command two"
+Commentary: ""
-Output "output output output"
+Indent: "  "
-Commentary "commentary"
+Output: "output output two"
-Output "output output output"
+Commentary: "meow meow"
-Output ""
+Indent: "  "
-Commentary "the blank below should be chomped"
+Prompt: "nix-repl> "
-Command "command three"
+Command: "command two"
-Commentary ""
+Indent: "  "
-Output "meow output"
+Output: "output output output"
 Commentary: "commentary"
 Indent: "  "
 Output: "output output output"
 Output: ""
 Commentary: "the blank below should be chomped"
 Indent: "  "
 Prompt: "nix-repl> "
 Command: "command three"
 Commentary: ""
 Indent: "  "
 Output: "meow output"
--- a/tests/functional/repl_characterization/data/basic_tidied.ast
+++ b/tests/functional/repl_characterization/data/basic_tidied.ast
@ -1,10 +1,9 @@
-Command "command"
+Command: "command"
-Output "output output one"
+Output: "output output one"
-Output ""
+Output: ""
-Output ""
+Output: "output output two"
-Output "output output two"
+Command: "command two"
-Command "command two"
+Output: "output output output"
-Output "output output output"
+Output: "output output output"
-Output "output output output"
+Command: "command three"
-Command "command three"
+Output: "meow output"
 Output "meow output"
--- a/tests/functional/repl_characterization/data/basic_tidied.test
+++ b/tests/functional/repl_characterization/data/basic_tidied.test
@ -0,0 +1,10 @@
 command
 output output one
 output output two
 command two
 output output output
 output output output
 command three
 meow output
--- a/tests/functional/repl_characterization/data/no_nested_debuggers.test
+++ b/tests/functional/repl_characterization/data/no_nested_debuggers.test
@ -1,3 +1,4 @@
@args --debugger
 we enter a debugger via builtins.break in the input file.
  info: breakpoint reached
@ -37,3 +38,4 @@ and once again, more breakpoints are ignored.
  nix-repl> builtins.break 3
  3
  error: bar
--- a/tests/functional/repl_characterization/data/regression_9917.test
+++ b/tests/functional/repl_characterization/data/regression_9917.test
@ -1,6 +1,7 @@
 https://github.com/NixOS/nix/pull/9917 (Enter debugger more reliably in let expressions and function calls)
 This test ensures that continues don't skip opportunities to enter the debugger.
@args --debugger
  trace: before outer break
  info: breakpoint reached
@ -13,7 +14,7 @@ This test ensures that continues don't skip opportunities to enter the debugger.
  0: error: breakpoint reached
  «none»:0
  1: while calling a function
-  TEST_DATA/regression_9917.nix:3:5
+  $TEST_DATA/regression_9917.nix:3:5
       2|   a = builtins.trace "before inner break" (
       3|     builtins.break { msg = "hello"; }
@ -21,7 +22,7 @@ This test ensures that continues don't skip opportunities to enter the debugger.
       4|   );
  2: while calling a function
-  TEST_DATA/regression_9917.nix:2:7
+  $TEST_DATA/regression_9917.nix:2:7
       1| let
       2|   a = builtins.trace "before inner break" (
--- a/tests/functional/repl_characterization/data/regression_9918.test
+++ b/tests/functional/repl_characterization/data/regression_9918.test
@ -1,3 +1,4 @@
@args --debugger
  error:
         … while evaluating the error message passed to builtin.throw
@ -14,3 +15,18 @@ We expect to be able to see locals like r in the debugger:
  Env level 1
  abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true
  nix-repl> :quit
  error:
         … while evaluating the file '$TEST_DATA/regression_9918.nix':
         … while calling the 'throw' builtin
           at $TEST_DATA/regression_9918.nix:3:7:
              2|   r = [];
              3|   x = builtins.throw r;
               |       ^
              4| in
         … while evaluating the error message passed to builtin.throw
         error: cannot coerce a list to a string: [ ]
--- a/tests/functional/repl_characterization/data/regression_l145.test
+++ b/tests/functional/repl_characterization/data/regression_l145.test
@ -1,3 +1,4 @@
@args --debugger
  info: breakpoint reached
 debugger should not crash now, but also not show any with variables
@ -12,3 +13,14 @@ debugger should not crash now, but also not show any with variables
  Env level 2
  abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true
  error:
         … while evaluating the file '$TEST_DATA/regression_l145.nix':
         … while calling the 'break' builtin
           at $TEST_DATA/regression_l145.nix:3:7:
              2| let
              3|   x = builtins.break 1;
               |       ^
              4| in
         error: breakpoint reached
--- a/tests/functional/repl_characterization/data/stack_vars.test
+++ b/tests/functional/repl_characterization/data/stack_vars.test
@ -1,3 +1,4 @@
@args --debugger
  trace: before outer break
  info: breakpoint reached
@ -24,7 +25,7 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n
  nix-repl> :st 3
  3: while calling a function
-  TEST_DATA/stack_vars.nix:5:7
+  $TEST_DATA/stack_vars.nix:5:7
       4|   );
       5|   b = builtins.trace "before outer break" (
@ -58,9 +59,8 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n
  3
  nix-repl> :st 3
  3: while calling a function
-  TEST_DATA/stack_vars.nix:2:7
+  $TEST_DATA/stack_vars.nix:2:7
       1| let
       2|   a = builtins.trace "before inner break" (
@ -72,3 +72,21 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n
  Env level 1
  abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true
  nix-repl> :quit
  error:
         … while calling the 'trace' builtin
           at $TEST_DATA/stack_vars.nix:2:7:
              1| let
              2|   a = builtins.trace "before inner break" (
               |       ^
              3|     let meow' = 3; in builtins.break { msg = "hello"; }
         … while calling the 'break' builtin
           at $TEST_DATA/stack_vars.nix:3:23:
              2|   a = builtins.trace "before inner break" (
              3|     let meow' = 3; in builtins.break { msg = "hello"; }
               |                       ^
              4|   );
         error: breakpoint reached
--- a/tests/functional/repl_characterization/repl_characterization.cc
+++ b/tests/functional/repl_characterization/repl_characterization.cc
@ -1,16 +1,17 @@
 #include <gtest/gtest.h>
 #include <boost/algorithm/string/replace.hpp>
 #include <optional>
 #include <string>
 #include <string_view>
 #include <optional>
 #include <unistd.h>
 #include <boost/algorithm/string/replace.hpp>
 #include "escape-string.hh"
 #include "test-session.hh"
 #include "util.hh"
 #include "tests/characterization.hh"
 #include "tests/cli-literate-parser.hh"
 #include "tests/terminal-code-eater.hh"
 #include "util.hh"
 using namespace std::string_literals;
@ -40,92 +41,149 @@ public:
        return unitTestData + "/" + testStem;
    }
-    void runReplTest(std::string_view const & content, std::vector<std::string> extraArgs = {}) const
+    void runReplTest(const std::string content, std::vector<std::string> extraArgs = {}) const
    {
-        auto syntax = CLILiterateParser::parse(std::string(REPL_PROMPT), content);
+        auto parsed = cli_literate_parser::parse(
            content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT), .indent = 2}
        );
        parsed.interpolatePwd(unitTestData);
        // FIXME: why does this need two --quiets
-        // show-trace is on by default due to test configuration, but is not a standard
+        // show-trace is on by default due to test configuration, but is not a
-        Strings args{"--quiet", "repl", "--quiet", "--option", "show-trace", "false", "--offline", "--extra-experimental-features", "repl-automation"};
+        // standard
        Strings args{
            "--quiet",
            "repl",
            "--quiet",
            "--option",
            "show-trace",
            "false",
            "--offline",
            "--extra-experimental-features",
            "repl-automation",
        };
        args.insert(args.end(), extraArgs.begin(), extraArgs.end());
        args.insert(args.end(), parsed.args.begin(), parsed.args.end());
        auto nixBin = canonPath(getEnvNonEmpty("NIX_BIN_DIR").value_or(NIX_BIN_DIR));
        auto process = RunningProcess::start(nixBin + "/nix", args);
-        auto session = TestSession{std::string(AUTOMATION_PROMPT), std::move(process)};
+        auto session = TestSession(std::string(AUTOMATION_PROMPT), std::move(process));
-        for (auto & bit : syntax) {
+        for (auto & event : parsed.syntax) {
-            if (bit.kind != CLILiterateParser::NodeKind::COMMAND) {
+            std::visit(
-                continue;
+                overloaded{
                    [&](const cli_literate_parser::Command & e) {
                        ASSERT_TRUE(session.waitForPrompt());
                        if (e.text == ":quit") {
                            // If we quit the repl explicitly, we won't have a
                            // prompt when we're done.
                            parsed.shouldStart = false;
                        }
-
+                        session.runCommand(e.text);
-            if (!session.waitForPrompt()) {
+                    },
-                ASSERT_TRUE(false);
+                    [&](const auto & e) {},
                },
                event
            );
        }
-            session.runCommand(bit.text);
+        if (parsed.shouldStart) {
-        }
+            ASSERT_TRUE(session.waitForPrompt());
        if (!session.waitForPrompt()) {
            ASSERT_TRUE(false);
        }
        session.close();
-        auto replacedOutLog = boost::algorithm::replace_all_copy(session.outLog, unitTestData, "TEST_DATA");
+        auto replacedOutLog =
            boost::algorithm::replace_all_copy(session.outLog, unitTestData, "$TEST_DATA");
        auto cleanedOutLog = trimOutLog(replacedOutLog);
-        auto parsedOutLog = CLILiterateParser::parse(std::string(AUTOMATION_PROMPT), cleanedOutLog, 0);
+        auto parsedOutLog = cli_literate_parser::parse(
            std::string(cleanedOutLog),
            cli_literate_parser::Config{.prompt = std::string(AUTOMATION_PROMPT), .indent = 0}
        );
-        parsedOutLog = CLILiterateParser::tidyOutputForComparison(std::move(parsedOutLog));
+        auto expected = parsed.tidyOutputForComparison();
-        syntax = CLILiterateParser::tidyOutputForComparison(std::move(syntax));
+        auto actual = parsedOutLog.tidyOutputForComparison();
-        ASSERT_EQ(parsedOutLog, syntax);
+        ASSERT_EQ(expected, actual);
    }
    void runReplTestPath(const std::string_view & nameBase, std::vector<std::string> extraArgs)
    {
        auto nixPath = goldenMaster(nameBase + ".nix");
        if (pathExists(nixPath)) {
            extraArgs.push_back("-f");
            extraArgs.push_back(nixPath);
        }
        readTest(nameBase + ".test", [this, extraArgs](std::string input) {
            runReplTest(input, extraArgs);
        });
    }
    void runReplTestPath(const std::string_view & nameBase)
    {
        runReplTestPath(nameBase, {});
    }
    void runDebuggerTest(const std::string_view & nameBase)
    {
        runReplTestPath(nameBase, {"--debugger"});
    }
 };
-TEST_F(ReplSessionTest, parses)
+TEST_F(ReplSessionTest, round_trip)
 {
    writeTest("basic.test", [this]() {
        const std::string content = readFile(goldenMaster("basic.test"));
        auto parsed = cli_literate_parser::parse(
            content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)}
        );
        std::ostringstream out{};
        for (auto & node : parsed.syntax) {
            cli_literate_parser::unparseNode(out, node, true);
        }
        return out.str();
    });
 }
 TEST_F(ReplSessionTest, tidy)
 {
    writeTest("basic.ast", [this]() {
        const std::string content = readFile(goldenMaster("basic.test"));
-        auto parser = CLILiterateParser{std::string(REPL_PROMPT)};
+        auto parsed = cli_literate_parser::parse(
-        parser.feed(content);
+            content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)}
-
+        );
        std::ostringstream out{};
-        for (auto & bit : parser.syntax()) {
+        for (auto & node : parsed.syntax) {
-            out << bit.print() << "\n";
+            out << debugNode(node) << "\n";
        }
        return out.str();
    });
    writeTest("basic_tidied.ast", [this]() {
        const std::string content = readFile(goldenMaster("basic.test"));
-        auto syntax = CLILiterateParser::parse(std::string(REPL_PROMPT), content);
+        auto parsed = cli_literate_parser::parse(
-
+            content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)}
-        syntax = CLILiterateParser::tidyOutputForComparison(std::move(syntax));
+        );
-
+        auto tidied = parsed.tidyOutputForComparison();
        std::ostringstream out{};
-        for (auto & bit : syntax) {
+        for (auto & node : tidied) {
-            out << bit.print() << "\n";
+            out << debugNode(node) << "\n";
        }
        return out.str();
    });
 }
-TEST_F(ReplSessionTest, repl_basic)
+#define REPL_TEST(name)           \
 {
    readTest("basic_repl.test", [this](std::string input) { runReplTest(input); });
 }
 #define DEBUGGER_TEST(name) \
    TEST_F(ReplSessionTest, name) \
    {                             \
-        readTest(#name ".test", [this](std::string input) { \
+        runReplTestPath(#name);   \
            runReplTest(input, {"--debugger", "-f", goldenMaster(#name ".nix")}); \
        }); \
    }
-DEBUGGER_TEST(regression_9918);
+REPL_TEST(basic_repl);
-DEBUGGER_TEST(regression_9917);
+REPL_TEST(no_nested_debuggers);
-DEBUGGER_TEST(regression_l145);
+REPL_TEST(regression_9917);
-DEBUGGER_TEST(stack_vars);
+REPL_TEST(regression_9918);
-DEBUGGER_TEST(no_nested_debuggers);
+REPL_TEST(regression_l145);
 REPL_TEST(stack_vars);
-};
+}; // namespace nix
--- a/tests/functional/repl_characterization/test-session.cc
+++ b/tests/functional/repl_characterization/test-session.cc
@ -1,4 +1,5 @@
 #include <iostream>
 #include <span>
 #include <unistd.h>
 #include "test-session.hh"
@ -21,14 +22,17 @@ RunningProcess RunningProcess::start(std::string executable, Strings args)
    // This is separate from runProgram2 because we have different IO requirements
    pid_t pid = startProcess([&]() {
-        if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1)
+        if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1) {
            throw SysError("dupping stdout");
-        if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1)
+        }
        if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1) {
            throw SysError("dupping stdin");
        }
        procStdin.writeSide.close();
        procStdout.readSide.close();
-        if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1)
+        if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1) {
            throw SysError("dupping stderr");
        }
        execv(executable.c_str(), stringsToCharPtrs(args).data());
        throw SysError("exec did not happen");
    });
@ -44,7 +48,8 @@ RunningProcess RunningProcess::start(std::string executable, Strings args)
 }
 [[gnu::unused]]
-std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s)
+std::ostream &
 operator<<(std::ostream & os, ReplOutputParser::State s)
 {
    switch (s) {
    case ReplOutputParser::State::Prompt:
@ -91,8 +96,7 @@ bool ReplOutputParser::feed(char c)
    return false;
 }
-/** Waits for the prompt and then returns if a prompt was found */
+bool TestSession::readOutThen(ReadOutThenCallback cb)
 bool TestSession::waitForPrompt()
 {
    std::vector<char> buf(1024);
@ -106,38 +110,67 @@ bool TestSession::waitForPrompt()
            return false;
        }
        switch (cb(std::span(buf.data(), res))) {
        case ReadOutThenCallbackResult::Stop:
            return true;
        case ReadOutThenCallbackResult::Continue:
            continue;
        }
    }
 }
 bool TestSession::waitForPrompt()
 {
    bool notEof = readOutThen([&](std::span<char> s) -> ReadOutThenCallbackResult {
        bool foundPrompt = false;
-        for (ssize_t i = 0; i < res; ++i) {
+
        for (auto ch : s) {
            // foundPrompt = foundPrompt || outputParser.feed(buf[i]);
            bool wasEaten = true;
-            eater.feed(buf[i], [&](char c) {
+            eater.feed(ch, [&](char c) {
                wasEaten = false;
-                foundPrompt = outputParser.feed(buf[i]) || foundPrompt;
+                foundPrompt = outputParser.feed(ch) || foundPrompt;
                outLog.push_back(c);
            });
            if constexpr (DEBUG_REPL_PARSER) {
-                std::cerr << "raw " << MaybeHexEscapedChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n";
+                std::cerr << "raw " << MaybeHexEscapedChar{ch} << (wasEaten ? " [eaten]" : "") << "\n";
            }
        }
-        if (foundPrompt) {
+        return foundPrompt ? ReadOutThenCallbackResult::Stop : ReadOutThenCallbackResult::Continue;
-            return true;
+    });
    return notEof;
 }
 void TestSession::wait()
 {
    readOutThen([&](std::span<char> s) {
        for (auto ch : s) {
            eater.feed(ch, [&](char c) {
                outputParser.feed(c);
                outLog.push_back(c);
            });
        }
        // just keep reading till we hit eof
        return ReadOutThenCallbackResult::Continue;
    });
 }
 void TestSession::close()
 {
    proc.procStdin.close();
    wait();
    proc.procStdout.close();
 }
 void TestSession::runCommand(std::string command)
 {
-    if constexpr (DEBUG_REPL_PARSER)
+    if constexpr (DEBUG_REPL_PARSER) {
        std::cerr << "runCommand " << command << "\n";
    }
    command += "\n";
    // We have to feed a newline into the output parser, since Nix might not
    // give us a newline before a prompt in all cases (it might clear line
--- a/tests/functional/repl_characterization/test-session.hh
+++ b/tests/functional/repl_characterization/test-session.hh
@ -1,7 +1,9 @@
 #pragma once
 ///@file
 #include <functional>
 #include <sched.h>
 #include <span>
 #include <string>
 #include "util.hh"
@ -22,8 +24,7 @@ struct RunningProcess
 class ReplOutputParser
 {
 public:
-    ReplOutputParser(std::string prompt)
+    ReplOutputParser(std::string prompt) : prompt(prompt)
        : prompt(prompt)
    {
        assert(!prompt.empty());
    }
@ -60,10 +61,27 @@ struct TestSession
    {
    }
    /** Waits for the prompt and then returns if a prompt was found */
    bool waitForPrompt();
    /** Feeds a line of input into the command */
    void runCommand(std::string command);
    /** Closes the session, closing standard input and waiting for standard
     * output to close, capturing any remaining output. */
    void close();
 private:
    /** Waits until the command closes its output */
    void wait();
    enum class ReadOutThenCallbackResult { Stop, Continue };
    using ReadOutThenCallback = std::function<ReadOutThenCallbackResult(std::span<char>)>;
    /** Reads some chunks of output, calling the callback provided for each
     * chunk and stopping if it returns Stop.
     *
     * @returns false if EOF, true if the callback requested we stop first.
     * */
    bool readOutThen(ReadOutThenCallback cb);
 };
 };
--- a/tests/unit/libutil-support/local.mk
+++ b/tests/unit/libutil-support/local.mk
@ -8,7 +8,7 @@ libutil-test-support_INSTALL_DIR :=
 libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc)
-libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES)
+libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) -I src/libutil
 # libexpr so we can steal their string printer from print.cc
 libutil-test-support_LIBS = libutil libexpr
--- a/tests/unit/libutil-support/tests/characterization.hh
+++ b/tests/unit/libutil-support/tests/characterization.hh
@ -74,20 +74,20 @@ public:
    {
        auto file = goldenMaster(testStem);
-        auto got = test();
+        auto actual = test();
        if (testAccept())
        {
            createDirs(dirOf(file));
-            writeFile2(file, got);
+            writeFile2(file, actual);
            GTEST_SKIP()
                << "Updating golden master "
                << file;
        }
        else
        {
-            decltype(got) expected = readFile2(file);
+            decltype(actual) expected = readFile2(file);
-            ASSERT_EQ(got, expected);
+            ASSERT_EQ(expected, actual);
        }
    }
--- a/tests/unit/libutil-support/tests/cli-literate-parser.cc
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc
@ -1,248 +1,444 @@
 #include "cli-literate-parser.hh"
 #include "escape-string.hh"
 #include "libexpr/print.hh"
 #include "escape-char.hh"
 #include "libexpr/print.hh"
 #include "types.hh"
 #include "util.hh"
 #include <ranges>
 #include <boost/algorithm/string/replace.hpp>
 #include <boost/algorithm/string/trim.hpp>
 #include <iostream>
 #include <memory>
-#include <boost/algorithm/string/trim.hpp>
+#include <sstream>
 #include <variant>
-using namespace std::string_literals;
+#include "cli-literate-parser.hh"
-
+#include "escape-string.hh"
-namespace nix {
+#include "fmt.hh"
 #include "libexpr/print.hh"
 #include "shlex.hh"
 #include "types.hh"
 #include "util.hh"
 static constexpr const bool DEBUG_PARSER = false;
-constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char *
+using namespace std::string_literals;
-{
+using namespace boost::algorithm;
    return std::visit(
        overloaded{// clang-format off
            [](Indent const&) -> const char * { return "indent"; },
            [](Commentary const&) -> const char * { return "indent"; },
            [](Prompt const&) -> const char * { return "prompt"; },
            [](Command const&) -> const char * { return "command"; },
            [](OutputLine const&) -> const char * { return "output_line"; }},
        // clang-format on
        s);
 }
-auto CLILiterateParser::Node::print() const -> std::string
+namespace nix {
 {
    std::ostringstream s{};
    switch (kind) {
    case NodeKind::COMMENTARY:
        s << "Commentary ";
        break;
    case NodeKind::COMMAND:
        s << "Command ";
        break;
    case NodeKind::OUTPUT:
        s << "Output ";
        break;
    }
    escapeString(s, this->text);
    return s.str();
 }
-void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os)
+namespace cli_literate_parser {
 {
    for (auto & node : nodes) {
        *os << node.print() << "\\n";
    }
 }
-auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node>
+struct Parser
 {
-    CLILiterateParser p{std::move(prompt), indent};
+    Parser(const std::string input, Config config)
-    p.feed(input);
+        : input(input)
-    return std::move(p).intoSyntax();
+        , rest(this->input)
-}
+        , prompt(config.prompt)
-
+        , indentString(std::string(config.indent, ' '))
-auto CLILiterateParser::intoSyntax() && -> std::vector<Node>
+        , lastWasOutput(false)
-{
+        , syntax{}
    return std::move(this->syntax_);
 }
 CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent)
    : state_(indent == 0 ? State(Prompt{}) : State(Indent{}))
    , prompt_(prompt)
    , indent_(indent)
    , lastWasOutput_(false)
    , syntax_{}
    {
        assert(!prompt.empty());
    }
-void CLILiterateParser::feed(char c)
+    const std::string input;
    std::string_view rest;
    const std::string prompt;
    const std::string indentString;
    /** Last line was output, so we consider a blank to be part of the output */
    bool lastWasOutput;
    /**
     * Nodes of syntax being built.
     */
    std::vector<Node> syntax;
    auto dbg(std::string_view state) -> void
    {
        std::cout << state << ": ";
        escapeString(
            std::cout,
            rest,
            {
                .maxLength = 40,
                .outputAnsiColors = true,
                .escapeNonPrinting = true,
            }
        );
        std::cout << std::endl;
    }
    template<typename T>
    auto pushNode(T node) -> void
    {
        if constexpr (DEBUG_PARSER) {
-        std::cout << stateDebug(state_) << " " << MaybeHexEscapedChar{c} << "\n";
+            std::cout << debugNode(node);
        }
        syntax.emplace_back(node);
    }
-    if (c == '\n') {
+    auto parseLiteral(const char c) -> bool
-        onNewline();
+    {
        if (rest.starts_with(c)) {
            rest.remove_prefix(1);
            return true;
        } else {
            return false;
        }
    }
    auto parseLiteral(const std::string_view & literal) -> bool
    {
        if (rest.starts_with(literal)) {
            rest.remove_prefix(literal.length());
            return true;
        } else {
            return false;
        }
    }
    auto parseBool() -> bool
    {
        auto result = false;
        if (parseLiteral("true")) {
            result = true;
        } else if (parseLiteral("false")) {
            result = false;
        } else {
            throw ParseError("true or false", std::string(rest));
        }
        auto untilNewline = parseUntilNewline();
        if (!untilNewline.empty()) {
            throw ParseError("nothing after true or false", untilNewline);
        }
        return result;
    }
    auto parseUntilNewline() -> std::string
    {
        auto pos = rest.find('\n');
        if (pos == std::string_view::npos) {
            throw ParseError("text and then newline", std::string(rest));
        } else {
            // `parseOutput()` sets this to true anyways.
            lastWasOutput = false;
            auto result = std::string(rest, 0, pos);
            rest.remove_prefix(pos + 1);
            return result;
        }
    }
    auto parseIndent() -> bool
    {
        if constexpr (DEBUG_PARSER) {
            dbg("indent");
        }
        if (indentString.empty()) {
            return true;
        }
        if (parseLiteral(indentString)) {
            pushNode(Indent(indentString));
            return true;
        } else {
            if constexpr (DEBUG_PARSER) {
                dbg("indent failed");
            }
            return false;
        }
    }
    auto parseCommand() -> void
    {
        if constexpr (DEBUG_PARSER) {
            dbg("command");
        }
        auto untilNewline = parseUntilNewline();
        pushNode(Command(untilNewline));
    }
    auto parsePrompt() -> void
    {
        if constexpr (DEBUG_PARSER) {
            dbg("prompt");
        }
        if (parseLiteral(prompt)) {
            pushNode(Prompt(prompt));
            if (rest.empty()) {
                return;
            }
            parseCommand();
        } else {
            parseOutput();
        }
    }
    auto parseOutput() -> void
    {
        if constexpr (DEBUG_PARSER) {
            dbg("output");
        }
        auto untilNewline = parseUntilNewline();
        pushNode(Output(untilNewline));
        lastWasOutput = true;
    }
    auto parseAtSign() -> void
    {
        if constexpr (DEBUG_PARSER) {
            dbg("@ symbol");
        }
        if (!parseLiteral('@')) {
            parseOutputOrCommentary();
        }
        if (parseLiteral("args ")) {
            parseArgs();
        } else if (parseLiteral("should-start ")) {
            if constexpr (DEBUG_PARSER) {
                dbg("@should-start");
            }
            auto shouldStart = parseBool();
            pushNode(ShouldStart{shouldStart});
        }
    }
    auto parseArgs() -> void
    {
        if constexpr (DEBUG_PARSER) {
            dbg("@args");
        }
        auto untilNewline = parseUntilNewline();
        pushNode(Args(untilNewline));
    }
    auto parseOutputOrCommentary() -> void
    {
        if constexpr (DEBUG_PARSER) {
            dbg("output/commentary");
        }
        auto oldLastWasOutput = lastWasOutput;
        auto untilNewline = parseUntilNewline();
        auto trimmed = trim_right_copy(untilNewline);
        if (oldLastWasOutput && trimmed.empty()) {
            pushNode(Output{trimmed});
        } else {
            pushNode(Commentary{untilNewline});
        }
    }
    auto parseStartOfLine() -> void
    {
        if constexpr (DEBUG_PARSER) {
            dbg("start of line");
        }
        if (parseIndent()) {
            parsePrompt();
        } else {
            parseAtSign();
        }
    }
    auto parse() && -> ParseResult
    {
        // Begin the recursive descent parser at the start of a new line.
        while (!rest.empty()) {
            parseStartOfLine();
        }
        return std::move(*this).intoParseResult();
    }
    auto intoParseResult() && -> ParseResult
    {
        // Do another pass over the nodes to produce auxiliary results like parsed
        // command line arguments.
        std::vector<std::string> args;
        std::vector<Node> newSyntax;
        auto shouldStart = true;
        for (auto it = syntax.begin(); it != syntax.end(); ++it) {
            Node node = std::move(*it);
            std::visit(
                overloaded{
-            [&](Indent & s) {
+                    [&](Args & e) {
-                if (c == ' ') {
+                        auto split = shell_split(std::string(e.text));
-                    if (++s.pos >= indent_) {
+                        args.insert(args.end(), split.begin(), split.end());
                        transition(Prompt{});
                    }
                } else {
                    transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}});
                }
                    },
-            [&](Prompt & s) {
+                    [&](ShouldStart & e) { shouldStart = e.shouldStart; },
-                if (s.pos >= prompt_.length()) {
+                    [&](auto & e) {},
                    transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}});
                    return;
                } else if (c == prompt_[s.pos]) {
                    // good prompt character
                    ++s.pos;
                } else {
                    // didn't match the prompt, so it must have actually been output.
                    s.lineAccumulator.push_back(c);
                    transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
                    return;
                }
                s.lineAccumulator.push_back(c);
                },
-            [&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
+                node
-        state_);
+            );
            newSyntax.push_back(node);
        }
-void CLILiterateParser::onNewline()
+        return ParseResult{
            .syntax = std::move(newSyntax),
            .args = std::move(args),
            .shouldStart = shouldStart,
        };
    }
 };
 template<typename View>
 auto tidySyntax(View syntax) -> std::vector<Node>
 {
-    State lastState = std::move(state_);
+    // Note: Setting `lastWasCommand` lets us trim blank lines at the start and
-    bool newLastWasOutput = false;
+    // end of the output stream.
    auto lastWasCommand = true;
    std::vector<Node> newSyntax;
-    syntax_.push_back(std::visit(
+    for (auto it = syntax.begin(); it != syntax.end(); ++it) {
        Node node = *it;
        // Only compare `Command` and `Output` nodes.
        if (std::visit([&](auto && e) { return !e.shouldCompare(); }, node)) {
            continue;
        }
        // Remove blank lines before and after commands. This lets us keep nice
        // whitespace in the test files.
        auto shouldKeep = std::visit(
            overloaded{
-            [&](Indent & s) {
+                [&](Command & e) {
-                // XXX: technically this eats trailing spaces
+                    lastWasCommand = true;
-
+                    auto trimmed = trim_right_copy(e.text);
-                // a newline following output is considered part of that output
+                    if (trimmed.empty()) {
-                if (lastWasOutput_) {
+                        return false;
-                    newLastWasOutput = true;
+                    } else {
-                    return Node::mkOutput("");
+                        e.text = trimmed;
                        return true;
                    }
                return Node::mkCommentary("");
                },
-            [&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); },
+                [&](Output & e) {
-            [&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); },
+                    std::string trimmed = trim_right_copy(e.text);
-            [&](OutputLine & s) {
+                    if (lastWasCommand && trimmed.empty()) {
-                newLastWasOutput = true;
+                        // NB: Keep `lastWasCommand` true in this branch so we
-                return Node::mkOutput(std::move(s.lineAccumulator));
+                        // can keep pruning empty output lines.
                        return false;
                    } else {
                        e.text = trimmed;
                        lastWasCommand = false;
                        return true;
                    }
                },
-            [&](Prompt & s) {
+                [&](auto & e) {
-                // INDENT followed by newline is also considered a blank output line
+                    lastWasCommand = false;
-                return Node::mkOutput(std::move(s.lineAccumulator));
+                    return false;
-            }},
+                },
-        lastState));
+            },
            node
        );
-    transition(Indent{});
+        if (shouldKeep) {
-    lastWasOutput_ = newLastWasOutput;
+            newSyntax.push_back(node);
 }
 void CLILiterateParser::feed(std::string_view s)
 {
    for (char ch : s) {
        feed(ch);
        }
    }
 void CLILiterateParser::transition(State new_state)
 {
    // When we expect INDENT and we are parsing without indents, commentary
    // cannot exist, so we want to transition directly into PROMPT before
    // resuming normal processing.
    if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) {
        new_state = Prompt{AccumulatingState{}, i->pos};
    }
    state_ = new_state;
 }
 auto CLILiterateParser::syntax() const -> std::vector<Node> const &
 {
    return syntax_;
 }
 auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
    -> std::string
 {
    std::string indent_str(indent, ' ');
    std::ostringstream out{};
    for (auto & node : syntax) {
        switch (node.kind) {
        case NodeKind::COMMENTARY:
            out << node.text << "\n";
            break;
        case NodeKind::COMMAND:
            out << indent_str << prompt << node.text << "\n";
            break;
        case NodeKind::OUTPUT:
            out << indent_str << node.text << "\n";
            break;
        }
    }
    return out.str();
 }
 auto CLILiterateParser::tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node>
 {
    std::vector<Node> newSyntax{};
    // Eat trailing newlines, so assume that the very end was actually a command
    bool lastWasCommand = true;
    bool newLastWasCommand = true;
    auto v = std::ranges::reverse_view(syntax);
    for (auto it = v.begin(); it != v.end(); ++it) {
        Node item = std::move(*it);
        lastWasCommand = newLastWasCommand;
        // chomp commentary
        if (item.kind == NodeKind::COMMENTARY) {
            continue;
        }
        if (item.kind == NodeKind::COMMAND) {
            newLastWasCommand = true;
            if (item.text == "") {
                // chomp empty commands
                continue;
            }
        }
        if (item.kind == NodeKind::OUTPUT) {
            // TODO: horrible
            bool nextIsCommand = (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND;
            std::string trimmedText = boost::algorithm::trim_right_copy(item.text);
            if ((lastWasCommand || nextIsCommand) && trimmedText == "") {
                // chomp empty text above or directly below commands
                continue;
            }
            // real output, stop chomping
            newLastWasCommand = false;
            item = Node::mkOutput(std::move(trimmedText));
        }
        newSyntax.push_back(std::move(item));
    }
    std::reverse(newSyntax.begin(), newSyntax.end());
    return newSyntax;
 }
-};
+auto ParseResult::tidyOutputForComparison() -> std::vector<Node>
 {
    auto reversed = tidySyntax(std::ranges::reverse_view(syntax));
    auto unreversed = tidySyntax(std::ranges::reverse_view(reversed));
    return unreversed;
 }
 void ParseResult::interpolatePwd(std::string_view pwd)
 {
    std::vector<std::string> newArgs;
    for (auto & arg : args) {
        newArgs.push_back(replaceStrings(arg, "${PWD}", pwd));
    }
    args = std::move(newArgs);
 }
 const char * ParseError::what() const noexcept
 {
    if (what_) {
        return what_->c_str();
    } else {
        auto escaped = escapeString(rest, {.maxLength = 256, .escapeNonPrinting = true});
        auto hint =
            new HintFmt("Parse error: Expected %1%, got:\n%2%", expected, Uncolored(escaped));
        what_ = hint->str();
        return what_->c_str();
    }
 }
 auto parse(const std::string input, Config config) -> ParseResult
 {
    return Parser(input, config).parse();
 }
 std::ostream & operator<<(std::ostream & output, const Args & node)
 {
    return output << "@args " << node.text;
 }
 std::ostream & operator<<(std::ostream & output, const ShouldStart & node)
 {
    return output << "@should-start " << (node.shouldStart ? "true" : "false");
 }
 std::ostream & operator<<(std::ostream & output, const TextNode & rhs)
 {
    return output << rhs.text;
 }
 void unparseNode(std::ostream & output, const Node & node, bool withNewline)
 {
    std::visit(
        [&](const auto & n) { output << n << (withNewline && n.emitNewlineAfter() ? "\n" : ""); },
        node
    );
 }
 template<typename T>
 std::string gtestFormat(T & value)
 {
    std::ostringstream formatted;
    unparseNode(formatted, value, true);
    auto str = formatted.str();
    // Needs to be the literal string `\n` and not a newline character to
    // trigger gtest diff printing. Yes seriously.
    boost::algorithm::replace_all(str, "\n", "\\n");
    return str;
 }
 void PrintTo(const std::vector<Node> & nodes, std::ostream * output)
 {
    for (auto & node : nodes) {
        *output << gtestFormat(node);
    }
 }
 std::string debugNode(const Node & node)
 {
    std::ostringstream output;
    output << std::visit([](const auto & n) { return n.kind(); }, node) << ": ";
    std::ostringstream contents;
    unparseNode(contents, node, false);
    escapeString(output, contents.str(), {.escapeNonPrinting = true});
    return output.str();
 }
 auto ParseResult::debugPrint(std::ostream & output) -> void
 {
    ::nix::cli_literate_parser::debugPrint(output, syntax);
 }
 void debugPrint(std::ostream & output, std::vector<Node> & nodes)
 {
    for (auto & node : nodes) {
        output << debugNode(node) << std::endl;
    }
 }
 } // namespace cli_literate_parser
 } // namespace nix
--- a/tests/unit/libutil-support/tests/cli-literate-parser.hh
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh
@ -3,132 +3,195 @@
 #include <compare>
 #include <memory>
 #include <optional>
 #include <sstream>
 #include <string>
 #include <variant>
 #include <vector>
 #include <string>
 namespace nix {
 namespace cli_literate_parser {
 // ------------------------- NODES -------------------------
 //
 // To update golden test files while preserving commentary output and other `@`
 // directives, we need to keep commentary output around after parsing.
 struct BaseNode {
  virtual ~BaseNode() = default;
  virtual auto shouldCompare() const -> bool { return false; }
  virtual auto kind() const -> std::string = 0;
  virtual auto emitNewlineAfter() const -> bool = 0;
  auto operator<=>(const BaseNode &rhs) const = default;
 };
 /**
 * A node containing text. The text should be identical to how the node was
 * written in the input file.
 */
 struct TextNode : BaseNode {
  std::string text;
  explicit TextNode(std::string text) : text(text) {}
 };
 std::ostream &operator<<(std::ostream &output, const TextNode &node);
 #define DECLARE_TEXT_NODE(NAME, NEEDS_NEWLINE, SHOULD_COMPARE)                 \
  struct NAME : TextNode {                                                     \
    using TextNode::TextNode;                                                  \
    ~NAME() override = default;                                                \
                                                                               \
    auto kind() const -> std::string override { return #NAME; }                \
    auto emitNewlineAfter() const -> bool override { return NEEDS_NEWLINE; }   \
    auto shouldCompare() const -> bool override { return SHOULD_COMPARE; }     \
  };
 /* name, needsNewline, shouldCompare */
 DECLARE_TEXT_NODE(Prompt, false, false)
 DECLARE_TEXT_NODE(Command, true, true)
 DECLARE_TEXT_NODE(Output, true, true)
 DECLARE_TEXT_NODE(Commentary, true, false)
 DECLARE_TEXT_NODE(Args, true, false)
 DECLARE_TEXT_NODE(Indent, false, false)
 #undef DECLARE_TEXT_NODE
 struct ShouldStart : BaseNode {
  bool shouldStart;
  ShouldStart(bool shouldStart) : shouldStart(shouldStart) {}
  ~ShouldStart() override = default;
  auto emitNewlineAfter() const -> bool override { return true; }
  auto kind() const -> std::string override { return "should-start"; }
  auto operator<=>(const ShouldStart &rhs) const = default;
 };
 std::ostream &operator<<(std::ostream &output, const ShouldStart &node);
 /**
 * Any syntax node, including those that are cosmetic.
 */
 using Node = std::variant<Prompt, Command, Output, Commentary, Args,
                          ShouldStart, Indent>;
 /** Unparses a node into the exact text that would have created it, including a
 * newline at the end if present, if withNewline is set */
 void unparseNode(std::ostream &output, const Node &node,
                 bool withNewline = true);
 std::string debugNode(const Node &node);
 void debugPrint(std::ostream &output, std::vector<Node> &nodes);
 /**
 * Override gtest printing for lists of nodes.
 */
 void PrintTo(std::vector<Node> const &nodes, std::ostream *output);
 /**
 * The result of parsing a test file.
 */
 struct ParseResult {
  /**
   * A set of nodes that can be used to reproduce the input file. This is used
   * to implement updating the test files.
   */
  std::vector<Node> syntax;
  /**
   * Extra CLI arguments.
   */
  std::vector<std::string> args;
  /**
   * Should the program start successfully?
   */
  bool shouldStart = false;
  /**
   * Replace `$PWD` with the given value in `args`.
   */
  void interpolatePwd(std::string_view pwd);
  /**
   * Tidy `syntax` to remove unnecessary nodes.
   */
  auto tidyOutputForComparison() -> std::vector<Node>;
  auto debugPrint(std::ostream &output) -> void;
 };
 /**
 * A parse error.
 */
 struct ParseError : std::exception {
  std::string expected;
  std::string rest;
  ParseError(std::string expected, std::string rest)
      : expected(expected), rest(rest) {}
  const char *what() const noexcept override;
 private:
  /**
   * Cached formatted contents of `what()`.
   */
  mutable std::optional<std::string> what_;
 };
 struct Config {
  /**
   * The prompt string to look for.
   */
  std::string prompt;
  /**
   * The number of spaces of indent for commands and output.
   */
  size_t indent = 2;
 };
 /*
- * A DFA parser for literate test cases for CLIs.
+ * A recursive descent parser for literate test cases for CLIs.
 *
 * FIXME: implement merging of these, so you can auto update cases that have
 * comments.
 *
- * Format:
+ * Syntax:
- * COMMENTARY
+ * ```
- * INDENT PROMPT COMMAND
+ * ( COMMENTARY
- * INDENT OUTPUT
+ * | INDENT PROMPT COMMAND
 * | INDENT OUTPUT
 * | @args ARGS
 * | @should-start ( true | false )) *
 * ```
 *
 * e.g.
 * ```
 * commentary commentary commentary
 * @args --foo
 * @should-start false
 *   nix-repl> :t 1
 *   an integer
 * ```
 *
- * Yields:
+ * Yields something like:
 * ```
 * Commentary "commentary commentary commentary"
 * Args "--foo"
 * ShouldStart false
 * Command ":t 1"
 * Output "an integer"
 * ```
 *
 * Note: one Output line is generated for each line of the sources, because
 * this is effectively necessary to be able to align them in the future to
 * auto-update tests.
 */
-class CLILiterateParser
+auto parse(std::string input, Config config) -> ParseResult;
 {
 public:
-    enum class NodeKind {
+}; // namespace cli_literate_parser
-        COMMENTARY,
+}; // namespace nix
        COMMAND,
        OUTPUT,
    };
    struct Node
    {
        NodeKind kind;
        std::string text;
        std::strong_ordering operator<=>(Node const &) const = default;
        static Node mkCommentary(std::string text)
        {
            return Node{.kind = NodeKind::COMMENTARY, .text = text};
        }
        static Node mkCommand(std::string text)
        {
            return Node{.kind = NodeKind::COMMAND, .text = text};
        }
        static Node mkOutput(std::string text)
        {
            return Node{.kind = NodeKind::OUTPUT, .text = text};
        }
        auto print() const -> std::string;
    };
    CLILiterateParser(std::string prompt, size_t indent = 2);
    auto syntax() const -> std::vector<Node> const &;
    /** Feeds a character into the parser */
    void feed(char c);
    /** Feeds a string into the parser */
    void feed(std::string_view s);
    /** Parses an input in a non-streaming fashion */
    static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
    /** Returns, losslessly, the string that would have generated a syntax tree */
    static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
    /** Consumes a CLILiterateParser and gives you the syntax out of it */
    auto intoSyntax() && -> std::vector<Node>;
    /** Tidies syntax to remove trailing whitespace from outputs and remove any
     * empty prompts */
    static auto tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node>;
 private:
    struct AccumulatingState
    {
        std::string lineAccumulator;
    };
    struct Indent
    {
        size_t pos = 0;
    };
    struct Commentary : public AccumulatingState
    {};
    struct Prompt : AccumulatingState
    {
        size_t pos = 0;
    };
    struct Command : public AccumulatingState
    {};
    struct OutputLine : public AccumulatingState
    {};
    using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
    State state_;
    constexpr static auto stateDebug(State const&) -> const char *;
    const std::string prompt_;
    const size_t indent_;
    /** Last line was output, so we consider a blank to be part of the output */
    bool lastWasOutput_;
    std::vector<Node> syntax_;
    void transition(State newState);
    void onNewline();
 };
 // Override gtest printing for lists of nodes
 void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os);
 };