From 99b8a26e235ee9b700202cd21ec37ae3c2d05215 Mon Sep 17 00:00:00 2001 From: Rebecca Turner Date: Fri, 22 Mar 2024 16:45:05 -0700 Subject: [PATCH] Rewrite REPL test parser - Use a recursive descent parser so that it's easy to extend. - Add `@args` to enable customizing command-line arguments - Add `@should-start` to enable `nix repl` tests that error before entering the REPL - Make sure to read all stdout output before comparing. This catches some extra output we were tossing out before! Change-Id: I5522555df4c313024ab15cd10f9f04e7293bda3a --- .../repl_characterization/data/basic.ast | 43 +- .../data/basic_tidied.ast | 19 +- .../data/basic_tidied.test | 10 + .../data/no_nested_debuggers.test | 2 + .../data/regression_9917.test | 5 +- .../data/regression_9918.test | 16 + .../data/regression_l145.test | 12 + .../data/stack_vars.test | 24 +- .../repl_characterization.cc | 164 +++-- .../repl_characterization/test-session.cc | 61 +- .../repl_characterization/test-session.hh | 22 +- tests/unit/libutil-support/local.mk | 2 +- .../libutil-support/tests/characterization.hh | 8 +- .../tests/cli-literate-parser.cc | 616 ++++++++++++------ .../tests/cli-literate-parser.hh | 271 +++++--- 15 files changed, 856 insertions(+), 419 deletions(-) create mode 100644 tests/functional/repl_characterization/data/basic_tidied.test diff --git a/tests/functional/repl_characterization/data/basic.ast b/tests/functional/repl_characterization/data/basic.ast index bdb445c9a..e079af588 100644 --- a/tests/functional/repl_characterization/data/basic.ast +++ b/tests/functional/repl_characterization/data/basic.ast @@ -1,16 +1,27 @@ -Commentary "meow meow meow" -Command "command" -Output "output output one" -Output "" -Output "" -Output "output output two" -Commentary "meow meow" -Command "command two" -Output "output output output" -Commentary "commentary" -Output "output output output" -Output "" -Commentary "the blank below should be chomped" -Command "command three" -Commentary "" -Output "meow output" +Commentary: "meow meow meow" +Indent: " " +Prompt: "nix-repl> " +Command: "command" +Indent: " " +Output: "output output one" +Output: "" +Commentary: "" +Indent: " " +Output: "output output two" +Commentary: "meow meow" +Indent: " " +Prompt: "nix-repl> " +Command: "command two" +Indent: " " +Output: "output output output" +Commentary: "commentary" +Indent: " " +Output: "output output output" +Output: "" +Commentary: "the blank below should be chomped" +Indent: " " +Prompt: "nix-repl> " +Command: "command three" +Commentary: "" +Indent: " " +Output: "meow output" diff --git a/tests/functional/repl_characterization/data/basic_tidied.ast b/tests/functional/repl_characterization/data/basic_tidied.ast index 878065a5c..d41d21a96 100644 --- a/tests/functional/repl_characterization/data/basic_tidied.ast +++ b/tests/functional/repl_characterization/data/basic_tidied.ast @@ -1,10 +1,9 @@ -Command "command" -Output "output output one" -Output "" -Output "" -Output "output output two" -Command "command two" -Output "output output output" -Output "output output output" -Command "command three" -Output "meow output" +Command: "command" +Output: "output output one" +Output: "" +Output: "output output two" +Command: "command two" +Output: "output output output" +Output: "output output output" +Command: "command three" +Output: "meow output" diff --git a/tests/functional/repl_characterization/data/basic_tidied.test b/tests/functional/repl_characterization/data/basic_tidied.test new file mode 100644 index 000000000..2c56c489b --- /dev/null +++ b/tests/functional/repl_characterization/data/basic_tidied.test @@ -0,0 +1,10 @@ +command +output output one + +output output two +command two +output output output +output output output + +command three +meow output diff --git a/tests/functional/repl_characterization/data/no_nested_debuggers.test b/tests/functional/repl_characterization/data/no_nested_debuggers.test index 5e834a68a..0199407f6 100644 --- a/tests/functional/repl_characterization/data/no_nested_debuggers.test +++ b/tests/functional/repl_characterization/data/no_nested_debuggers.test @@ -1,3 +1,4 @@ +@args --debugger we enter a debugger via builtins.break in the input file. info: breakpoint reached @@ -37,3 +38,4 @@ and once again, more breakpoints are ignored. nix-repl> builtins.break 3 3 + error: bar diff --git a/tests/functional/repl_characterization/data/regression_9917.test b/tests/functional/repl_characterization/data/regression_9917.test index 44ca951b5..67ad1db6b 100644 --- a/tests/functional/repl_characterization/data/regression_9917.test +++ b/tests/functional/repl_characterization/data/regression_9917.test @@ -1,6 +1,7 @@ https://github.com/NixOS/nix/pull/9917 (Enter debugger more reliably in let expressions and function calls) This test ensures that continues don't skip opportunities to enter the debugger. +@args --debugger trace: before outer break info: breakpoint reached @@ -13,7 +14,7 @@ This test ensures that continues don't skip opportunities to enter the debugger. 0: error: breakpoint reached «none»:0 1: while calling a function - TEST_DATA/regression_9917.nix:3:5 + $TEST_DATA/regression_9917.nix:3:5 2| a = builtins.trace "before inner break" ( 3| builtins.break { msg = "hello"; } @@ -21,7 +22,7 @@ This test ensures that continues don't skip opportunities to enter the debugger. 4| ); 2: while calling a function - TEST_DATA/regression_9917.nix:2:7 + $TEST_DATA/regression_9917.nix:2:7 1| let 2| a = builtins.trace "before inner break" ( diff --git a/tests/functional/repl_characterization/data/regression_9918.test b/tests/functional/repl_characterization/data/regression_9918.test index c30c405b6..a85d6d33a 100644 --- a/tests/functional/repl_characterization/data/regression_9918.test +++ b/tests/functional/repl_characterization/data/regression_9918.test @@ -1,3 +1,4 @@ +@args --debugger error: … while evaluating the error message passed to builtin.throw @@ -14,3 +15,18 @@ We expect to be able to see locals like r in the debugger: Env level 1 abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true + + nix-repl> :quit + error: + … while evaluating the file '$TEST_DATA/regression_9918.nix': + + … while calling the 'throw' builtin + at $TEST_DATA/regression_9918.nix:3:7: + 2| r = []; + 3| x = builtins.throw r; + | ^ + 4| in + + … while evaluating the error message passed to builtin.throw + + error: cannot coerce a list to a string: [ ] diff --git a/tests/functional/repl_characterization/data/regression_l145.test b/tests/functional/repl_characterization/data/regression_l145.test index 2fe04d221..bab347d26 100644 --- a/tests/functional/repl_characterization/data/regression_l145.test +++ b/tests/functional/repl_characterization/data/regression_l145.test @@ -1,3 +1,4 @@ +@args --debugger info: breakpoint reached debugger should not crash now, but also not show any with variables @@ -12,3 +13,14 @@ debugger should not crash now, but also not show any with variables Env level 2 abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true + error: + … while evaluating the file '$TEST_DATA/regression_l145.nix': + + … while calling the 'break' builtin + at $TEST_DATA/regression_l145.nix:3:7: + 2| let + 3| x = builtins.break 1; + | ^ + 4| in + + error: breakpoint reached diff --git a/tests/functional/repl_characterization/data/stack_vars.test b/tests/functional/repl_characterization/data/stack_vars.test index 96ea5fe25..c9296eeaa 100644 --- a/tests/functional/repl_characterization/data/stack_vars.test +++ b/tests/functional/repl_characterization/data/stack_vars.test @@ -1,3 +1,4 @@ +@args --debugger trace: before outer break info: breakpoint reached @@ -24,7 +25,7 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n nix-repl> :st 3 3: while calling a function - TEST_DATA/stack_vars.nix:5:7 + $TEST_DATA/stack_vars.nix:5:7 4| ); 5| b = builtins.trace "before outer break" ( @@ -58,9 +59,8 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n 3 nix-repl> :st 3 - 3: while calling a function - TEST_DATA/stack_vars.nix:2:7 + $TEST_DATA/stack_vars.nix:2:7 1| let 2| a = builtins.trace "before inner break" ( @@ -72,3 +72,21 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n Env level 1 abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true + + nix-repl> :quit + error: + … while calling the 'trace' builtin + at $TEST_DATA/stack_vars.nix:2:7: + 1| let + 2| a = builtins.trace "before inner break" ( + | ^ + 3| let meow' = 3; in builtins.break { msg = "hello"; } + + … while calling the 'break' builtin + at $TEST_DATA/stack_vars.nix:3:23: + 2| a = builtins.trace "before inner break" ( + 3| let meow' = 3; in builtins.break { msg = "hello"; } + | ^ + 4| ); + + error: breakpoint reached diff --git a/tests/functional/repl_characterization/repl_characterization.cc b/tests/functional/repl_characterization/repl_characterization.cc index 68a66b2f3..fa5a7ba74 100644 --- a/tests/functional/repl_characterization/repl_characterization.cc +++ b/tests/functional/repl_characterization/repl_characterization.cc @@ -1,16 +1,17 @@ #include +#include +#include #include #include -#include #include -#include +#include "escape-string.hh" #include "test-session.hh" -#include "util.hh" #include "tests/characterization.hh" #include "tests/cli-literate-parser.hh" #include "tests/terminal-code-eater.hh" +#include "util.hh" using namespace std::string_literals; @@ -40,92 +41,149 @@ public: return unitTestData + "/" + testStem; } - void runReplTest(std::string_view const & content, std::vector extraArgs = {}) const + void runReplTest(const std::string content, std::vector extraArgs = {}) const { - auto syntax = CLILiterateParser::parse(std::string(REPL_PROMPT), content); + auto parsed = cli_literate_parser::parse( + content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT), .indent = 2} + ); + parsed.interpolatePwd(unitTestData); // FIXME: why does this need two --quiets - // show-trace is on by default due to test configuration, but is not a standard - Strings args{"--quiet", "repl", "--quiet", "--option", "show-trace", "false", "--offline", "--extra-experimental-features", "repl-automation"}; + // show-trace is on by default due to test configuration, but is not a + // standard + Strings args{ + "--quiet", + "repl", + "--quiet", + "--option", + "show-trace", + "false", + "--offline", + "--extra-experimental-features", + "repl-automation", + }; args.insert(args.end(), extraArgs.begin(), extraArgs.end()); + args.insert(args.end(), parsed.args.begin(), parsed.args.end()); auto nixBin = canonPath(getEnvNonEmpty("NIX_BIN_DIR").value_or(NIX_BIN_DIR)); auto process = RunningProcess::start(nixBin + "/nix", args); - auto session = TestSession{std::string(AUTOMATION_PROMPT), std::move(process)}; + auto session = TestSession(std::string(AUTOMATION_PROMPT), std::move(process)); - for (auto & bit : syntax) { - if (bit.kind != CLILiterateParser::NodeKind::COMMAND) { - continue; - } - - if (!session.waitForPrompt()) { - ASSERT_TRUE(false); - } - session.runCommand(bit.text); + for (auto & event : parsed.syntax) { + std::visit( + overloaded{ + [&](const cli_literate_parser::Command & e) { + ASSERT_TRUE(session.waitForPrompt()); + if (e.text == ":quit") { + // If we quit the repl explicitly, we won't have a + // prompt when we're done. + parsed.shouldStart = false; + } + session.runCommand(e.text); + }, + [&](const auto & e) {}, + }, + event + ); } - if (!session.waitForPrompt()) { - ASSERT_TRUE(false); + if (parsed.shouldStart) { + ASSERT_TRUE(session.waitForPrompt()); } session.close(); - auto replacedOutLog = boost::algorithm::replace_all_copy(session.outLog, unitTestData, "TEST_DATA"); + auto replacedOutLog = + boost::algorithm::replace_all_copy(session.outLog, unitTestData, "$TEST_DATA"); auto cleanedOutLog = trimOutLog(replacedOutLog); - auto parsedOutLog = CLILiterateParser::parse(std::string(AUTOMATION_PROMPT), cleanedOutLog, 0); + auto parsedOutLog = cli_literate_parser::parse( + std::string(cleanedOutLog), + cli_literate_parser::Config{.prompt = std::string(AUTOMATION_PROMPT), .indent = 0} + ); - parsedOutLog = CLILiterateParser::tidyOutputForComparison(std::move(parsedOutLog)); - syntax = CLILiterateParser::tidyOutputForComparison(std::move(syntax)); + auto expected = parsed.tidyOutputForComparison(); + auto actual = parsedOutLog.tidyOutputForComparison(); - ASSERT_EQ(parsedOutLog, syntax); + ASSERT_EQ(expected, actual); + } + + void runReplTestPath(const std::string_view & nameBase, std::vector extraArgs) + { + auto nixPath = goldenMaster(nameBase + ".nix"); + if (pathExists(nixPath)) { + extraArgs.push_back("-f"); + extraArgs.push_back(nixPath); + } + readTest(nameBase + ".test", [this, extraArgs](std::string input) { + runReplTest(input, extraArgs); + }); + } + + void runReplTestPath(const std::string_view & nameBase) + { + runReplTestPath(nameBase, {}); + } + + void runDebuggerTest(const std::string_view & nameBase) + { + runReplTestPath(nameBase, {"--debugger"}); } }; -TEST_F(ReplSessionTest, parses) +TEST_F(ReplSessionTest, round_trip) +{ + writeTest("basic.test", [this]() { + const std::string content = readFile(goldenMaster("basic.test")); + auto parsed = cli_literate_parser::parse( + content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)} + ); + + std::ostringstream out{}; + for (auto & node : parsed.syntax) { + cli_literate_parser::unparseNode(out, node, true); + } + return out.str(); + }); +} + +TEST_F(ReplSessionTest, tidy) { writeTest("basic.ast", [this]() { const std::string content = readFile(goldenMaster("basic.test")); - auto parser = CLILiterateParser{std::string(REPL_PROMPT)}; - parser.feed(content); - + auto parsed = cli_literate_parser::parse( + content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)} + ); std::ostringstream out{}; - for (auto & bit : parser.syntax()) { - out << bit.print() << "\n"; + for (auto & node : parsed.syntax) { + out << debugNode(node) << "\n"; } return out.str(); }); - writeTest("basic_tidied.ast", [this]() { const std::string content = readFile(goldenMaster("basic.test")); - auto syntax = CLILiterateParser::parse(std::string(REPL_PROMPT), content); - - syntax = CLILiterateParser::tidyOutputForComparison(std::move(syntax)); - + auto parsed = cli_literate_parser::parse( + content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)} + ); + auto tidied = parsed.tidyOutputForComparison(); std::ostringstream out{}; - for (auto & bit : syntax) { - out << bit.print() << "\n"; + for (auto & node : tidied) { + out << debugNode(node) << "\n"; } return out.str(); }); } -TEST_F(ReplSessionTest, repl_basic) -{ - readTest("basic_repl.test", [this](std::string input) { runReplTest(input); }); -} - -#define DEBUGGER_TEST(name) \ +#define REPL_TEST(name) \ TEST_F(ReplSessionTest, name) \ - { \ - readTest(#name ".test", [this](std::string input) { \ - runReplTest(input, {"--debugger", "-f", goldenMaster(#name ".nix")}); \ - }); \ + { \ + runReplTestPath(#name); \ } -DEBUGGER_TEST(regression_9918); -DEBUGGER_TEST(regression_9917); -DEBUGGER_TEST(regression_l145); -DEBUGGER_TEST(stack_vars); -DEBUGGER_TEST(no_nested_debuggers); +REPL_TEST(basic_repl); +REPL_TEST(no_nested_debuggers); +REPL_TEST(regression_9917); +REPL_TEST(regression_9918); +REPL_TEST(regression_l145); +REPL_TEST(stack_vars); -}; +}; // namespace nix diff --git a/tests/functional/repl_characterization/test-session.cc b/tests/functional/repl_characterization/test-session.cc index 50e27e58c..52179a372 100644 --- a/tests/functional/repl_characterization/test-session.cc +++ b/tests/functional/repl_characterization/test-session.cc @@ -1,4 +1,5 @@ #include +#include #include #include "test-session.hh" @@ -21,14 +22,17 @@ RunningProcess RunningProcess::start(std::string executable, Strings args) // This is separate from runProgram2 because we have different IO requirements pid_t pid = startProcess([&]() { - if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1) + if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1) { throw SysError("dupping stdout"); - if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1) + } + if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1) { throw SysError("dupping stdin"); + } procStdin.writeSide.close(); procStdout.readSide.close(); - if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1) + if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1) { throw SysError("dupping stderr"); + } execv(executable.c_str(), stringsToCharPtrs(args).data()); throw SysError("exec did not happen"); }); @@ -44,7 +48,8 @@ RunningProcess RunningProcess::start(std::string executable, Strings args) } [[gnu::unused]] -std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s) +std::ostream & +operator<<(std::ostream & os, ReplOutputParser::State s) { switch (s) { case ReplOutputParser::State::Prompt: @@ -91,8 +96,7 @@ bool ReplOutputParser::feed(char c) return false; } -/** Waits for the prompt and then returns if a prompt was found */ -bool TestSession::waitForPrompt() +bool TestSession::readOutThen(ReadOutThenCallback cb) { std::vector buf(1024); @@ -106,38 +110,67 @@ bool TestSession::waitForPrompt() return false; } + switch (cb(std::span(buf.data(), res))) { + case ReadOutThenCallbackResult::Stop: + return true; + case ReadOutThenCallbackResult::Continue: + continue; + } + } +} + +bool TestSession::waitForPrompt() +{ + bool notEof = readOutThen([&](std::span s) -> ReadOutThenCallbackResult { bool foundPrompt = false; - for (ssize_t i = 0; i < res; ++i) { + + for (auto ch : s) { // foundPrompt = foundPrompt || outputParser.feed(buf[i]); bool wasEaten = true; - eater.feed(buf[i], [&](char c) { + eater.feed(ch, [&](char c) { wasEaten = false; - foundPrompt = outputParser.feed(buf[i]) || foundPrompt; + foundPrompt = outputParser.feed(ch) || foundPrompt; outLog.push_back(c); }); if constexpr (DEBUG_REPL_PARSER) { - std::cerr << "raw " << MaybeHexEscapedChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n"; + std::cerr << "raw " << MaybeHexEscapedChar{ch} << (wasEaten ? " [eaten]" : "") << "\n"; } } - if (foundPrompt) { - return true; + return foundPrompt ? ReadOutThenCallbackResult::Stop : ReadOutThenCallbackResult::Continue; + }); + + return notEof; +} + +void TestSession::wait() +{ + readOutThen([&](std::span s) { + for (auto ch : s) { + eater.feed(ch, [&](char c) { + outputParser.feed(c); + outLog.push_back(c); + }); } - } + // just keep reading till we hit eof + return ReadOutThenCallbackResult::Continue; + }); } void TestSession::close() { proc.procStdin.close(); + wait(); proc.procStdout.close(); } void TestSession::runCommand(std::string command) { - if constexpr (DEBUG_REPL_PARSER) + if constexpr (DEBUG_REPL_PARSER) { std::cerr << "runCommand " << command << "\n"; + } command += "\n"; // We have to feed a newline into the output parser, since Nix might not // give us a newline before a prompt in all cases (it might clear line diff --git a/tests/functional/repl_characterization/test-session.hh b/tests/functional/repl_characterization/test-session.hh index 19636640b..2552542fb 100644 --- a/tests/functional/repl_characterization/test-session.hh +++ b/tests/functional/repl_characterization/test-session.hh @@ -1,7 +1,9 @@ #pragma once ///@file +#include #include +#include #include #include "util.hh" @@ -22,8 +24,7 @@ struct RunningProcess class ReplOutputParser { public: - ReplOutputParser(std::string prompt) - : prompt(prompt) + ReplOutputParser(std::string prompt) : prompt(prompt) { assert(!prompt.empty()); } @@ -60,10 +61,27 @@ struct TestSession { } + /** Waits for the prompt and then returns if a prompt was found */ bool waitForPrompt(); + /** Feeds a line of input into the command */ void runCommand(std::string command); + /** Closes the session, closing standard input and waiting for standard + * output to close, capturing any remaining output. */ void close(); + +private: + /** Waits until the command closes its output */ + void wait(); + + enum class ReadOutThenCallbackResult { Stop, Continue }; + using ReadOutThenCallback = std::function)>; + /** Reads some chunks of output, calling the callback provided for each + * chunk and stopping if it returns Stop. + * + * @returns false if EOF, true if the callback requested we stop first. + * */ + bool readOutThen(ReadOutThenCallback cb); }; }; diff --git a/tests/unit/libutil-support/local.mk b/tests/unit/libutil-support/local.mk index cfd88be99..67fd92d77 100644 --- a/tests/unit/libutil-support/local.mk +++ b/tests/unit/libutil-support/local.mk @@ -8,7 +8,7 @@ libutil-test-support_INSTALL_DIR := libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc) -libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) +libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) -I src/libutil # libexpr so we can steal their string printer from print.cc libutil-test-support_LIBS = libutil libexpr diff --git a/tests/unit/libutil-support/tests/characterization.hh b/tests/unit/libutil-support/tests/characterization.hh index 6ee994d71..7f570f619 100644 --- a/tests/unit/libutil-support/tests/characterization.hh +++ b/tests/unit/libutil-support/tests/characterization.hh @@ -74,20 +74,20 @@ public: { auto file = goldenMaster(testStem); - auto got = test(); + auto actual = test(); if (testAccept()) { createDirs(dirOf(file)); - writeFile2(file, got); + writeFile2(file, actual); GTEST_SKIP() << "Updating golden master " << file; } else { - decltype(got) expected = readFile2(file); - ASSERT_EQ(got, expected); + decltype(actual) expected = readFile2(file); + ASSERT_EQ(expected, actual); } } diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.cc b/tests/unit/libutil-support/tests/cli-literate-parser.cc index 4edf434be..023f86cd7 100644 --- a/tests/unit/libutil-support/tests/cli-literate-parser.cc +++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc @@ -1,248 +1,444 @@ #include "cli-literate-parser.hh" #include "escape-string.hh" -#include "libexpr/print.hh" #include "escape-char.hh" +#include "libexpr/print.hh" #include "types.hh" #include "util.hh" #include +#include +#include #include #include -#include +#include +#include -using namespace std::string_literals; - -namespace nix { +#include "cli-literate-parser.hh" +#include "escape-string.hh" +#include "fmt.hh" +#include "libexpr/print.hh" +#include "shlex.hh" +#include "types.hh" +#include "util.hh" static constexpr const bool DEBUG_PARSER = false; -constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char * -{ - return std::visit( - overloaded{// clang-format off - [](Indent const&) -> const char * { return "indent"; }, - [](Commentary const&) -> const char * { return "indent"; }, - [](Prompt const&) -> const char * { return "prompt"; }, - [](Command const&) -> const char * { return "command"; }, - [](OutputLine const&) -> const char * { return "output_line"; }}, - // clang-format on - s); -} +using namespace std::string_literals; +using namespace boost::algorithm; -auto CLILiterateParser::Node::print() const -> std::string -{ - std::ostringstream s{}; - switch (kind) { - case NodeKind::COMMENTARY: - s << "Commentary "; - break; - case NodeKind::COMMAND: - s << "Command "; - break; - case NodeKind::OUTPUT: - s << "Output "; - break; - } - escapeString(s, this->text); - return s.str(); -} +namespace nix { -void PrintTo(std::vector const & nodes, std::ostream * os) -{ - for (auto & node : nodes) { - *os << node.print() << "\\n"; - } -} +namespace cli_literate_parser { -auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector +struct Parser { - CLILiterateParser p{std::move(prompt), indent}; - p.feed(input); - return std::move(p).intoSyntax(); -} - -auto CLILiterateParser::intoSyntax() && -> std::vector -{ - return std::move(this->syntax_); -} - -CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent) - : state_(indent == 0 ? State(Prompt{}) : State(Indent{})) - , prompt_(prompt) - , indent_(indent) - , lastWasOutput_(false) - , syntax_{} -{ - assert(!prompt.empty()); -} - -void CLILiterateParser::feed(char c) -{ - if constexpr (DEBUG_PARSER) { - std::cout << stateDebug(state_) << " " << MaybeHexEscapedChar{c} << "\n"; + Parser(const std::string input, Config config) + : input(input) + , rest(this->input) + , prompt(config.prompt) + , indentString(std::string(config.indent, ' ')) + , lastWasOutput(false) + , syntax{} + { + assert(!prompt.empty()); } - if (c == '\n') { - onNewline(); - return; + const std::string input; + std::string_view rest; + const std::string prompt; + const std::string indentString; + + /** Last line was output, so we consider a blank to be part of the output */ + bool lastWasOutput; + + /** + * Nodes of syntax being built. + */ + std::vector syntax; + + auto dbg(std::string_view state) -> void + { + std::cout << state << ": "; + escapeString( + std::cout, + rest, + { + .maxLength = 40, + .outputAnsiColors = true, + .escapeNonPrinting = true, + } + ); + std::cout << std::endl; } - std::visit( - overloaded{ - [&](Indent & s) { - if (c == ' ') { - if (++s.pos >= indent_) { - transition(Prompt{}); - } - } else { - transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}}); - } - }, - [&](Prompt & s) { - if (s.pos >= prompt_.length()) { - transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}}); - return; - } else if (c == prompt_[s.pos]) { - // good prompt character - ++s.pos; - } else { - // didn't match the prompt, so it must have actually been output. - s.lineAccumulator.push_back(c); - transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}}); - return; - } - s.lineAccumulator.push_back(c); - }, - [&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }}, - state_); -} - -void CLILiterateParser::onNewline() -{ - State lastState = std::move(state_); - bool newLastWasOutput = false; - - syntax_.push_back(std::visit( - overloaded{ - [&](Indent & s) { - // XXX: technically this eats trailing spaces - - // a newline following output is considered part of that output - if (lastWasOutput_) { - newLastWasOutput = true; - return Node::mkOutput(""); - } - return Node::mkCommentary(""); - }, - [&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); }, - [&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); }, - [&](OutputLine & s) { - newLastWasOutput = true; - return Node::mkOutput(std::move(s.lineAccumulator)); - }, - [&](Prompt & s) { - // INDENT followed by newline is also considered a blank output line - return Node::mkOutput(std::move(s.lineAccumulator)); - }}, - lastState)); - - transition(Indent{}); - lastWasOutput_ = newLastWasOutput; -} - -void CLILiterateParser::feed(std::string_view s) -{ - for (char ch : s) { - feed(ch); - } -} - -void CLILiterateParser::transition(State new_state) -{ - // When we expect INDENT and we are parsing without indents, commentary - // cannot exist, so we want to transition directly into PROMPT before - // resuming normal processing. - if (Indent * i = std::get_if(&new_state); i != nullptr && indent_ == 0) { - new_state = Prompt{AccumulatingState{}, i->pos}; + template + auto pushNode(T node) -> void + { + if constexpr (DEBUG_PARSER) { + std::cout << debugNode(node); + } + syntax.emplace_back(node); } - state_ = new_state; -} - -auto CLILiterateParser::syntax() const -> std::vector const & -{ - return syntax_; -} - -auto CLILiterateParser::unparse(const std::string & prompt, const std::vector & syntax, size_t indent) - -> std::string -{ - std::string indent_str(indent, ' '); - std::ostringstream out{}; - - for (auto & node : syntax) { - switch (node.kind) { - case NodeKind::COMMENTARY: - out << node.text << "\n"; - break; - case NodeKind::COMMAND: - out << indent_str << prompt << node.text << "\n"; - break; - case NodeKind::OUTPUT: - out << indent_str << node.text << "\n"; - break; + auto parseLiteral(const char c) -> bool + { + if (rest.starts_with(c)) { + rest.remove_prefix(1); + return true; + } else { + return false; } } - return out.str(); -} + auto parseLiteral(const std::string_view & literal) -> bool + { + if (rest.starts_with(literal)) { + rest.remove_prefix(literal.length()); + return true; + } else { + return false; + } + } -auto CLILiterateParser::tidyOutputForComparison(std::vector && syntax) -> std::vector + auto parseBool() -> bool + { + auto result = false; + if (parseLiteral("true")) { + result = true; + } else if (parseLiteral("false")) { + result = false; + } else { + throw ParseError("true or false", std::string(rest)); + } + auto untilNewline = parseUntilNewline(); + if (!untilNewline.empty()) { + throw ParseError("nothing after true or false", untilNewline); + } + return result; + } + + auto parseUntilNewline() -> std::string + { + auto pos = rest.find('\n'); + if (pos == std::string_view::npos) { + throw ParseError("text and then newline", std::string(rest)); + } else { + // `parseOutput()` sets this to true anyways. + lastWasOutput = false; + auto result = std::string(rest, 0, pos); + rest.remove_prefix(pos + 1); + return result; + } + } + + auto parseIndent() -> bool + { + if constexpr (DEBUG_PARSER) { + dbg("indent"); + } + if (indentString.empty()) { + return true; + } + + if (parseLiteral(indentString)) { + pushNode(Indent(indentString)); + return true; + } else { + if constexpr (DEBUG_PARSER) { + dbg("indent failed"); + } + return false; + } + } + + auto parseCommand() -> void + { + if constexpr (DEBUG_PARSER) { + dbg("command"); + } + auto untilNewline = parseUntilNewline(); + pushNode(Command(untilNewline)); + } + + auto parsePrompt() -> void + { + if constexpr (DEBUG_PARSER) { + dbg("prompt"); + } + if (parseLiteral(prompt)) { + pushNode(Prompt(prompt)); + if (rest.empty()) { + return; + } + parseCommand(); + } else { + parseOutput(); + } + } + + auto parseOutput() -> void + { + if constexpr (DEBUG_PARSER) { + dbg("output"); + } + auto untilNewline = parseUntilNewline(); + pushNode(Output(untilNewline)); + lastWasOutput = true; + } + + auto parseAtSign() -> void + { + if constexpr (DEBUG_PARSER) { + dbg("@ symbol"); + } + if (!parseLiteral('@')) { + parseOutputOrCommentary(); + } + + if (parseLiteral("args ")) { + parseArgs(); + } else if (parseLiteral("should-start ")) { + if constexpr (DEBUG_PARSER) { + dbg("@should-start"); + } + auto shouldStart = parseBool(); + pushNode(ShouldStart{shouldStart}); + } + } + + auto parseArgs() -> void + { + if constexpr (DEBUG_PARSER) { + dbg("@args"); + } + auto untilNewline = parseUntilNewline(); + pushNode(Args(untilNewline)); + } + + auto parseOutputOrCommentary() -> void + { + if constexpr (DEBUG_PARSER) { + dbg("output/commentary"); + } + auto oldLastWasOutput = lastWasOutput; + auto untilNewline = parseUntilNewline(); + + auto trimmed = trim_right_copy(untilNewline); + + if (oldLastWasOutput && trimmed.empty()) { + pushNode(Output{trimmed}); + } else { + pushNode(Commentary{untilNewline}); + } + } + + auto parseStartOfLine() -> void + { + if constexpr (DEBUG_PARSER) { + dbg("start of line"); + } + if (parseIndent()) { + parsePrompt(); + } else { + parseAtSign(); + } + } + + auto parse() && -> ParseResult + { + // Begin the recursive descent parser at the start of a new line. + while (!rest.empty()) { + parseStartOfLine(); + } + return std::move(*this).intoParseResult(); + } + + auto intoParseResult() && -> ParseResult + { + // Do another pass over the nodes to produce auxiliary results like parsed + // command line arguments. + std::vector args; + std::vector newSyntax; + auto shouldStart = true; + + for (auto it = syntax.begin(); it != syntax.end(); ++it) { + Node node = std::move(*it); + std::visit( + overloaded{ + [&](Args & e) { + auto split = shell_split(std::string(e.text)); + args.insert(args.end(), split.begin(), split.end()); + }, + [&](ShouldStart & e) { shouldStart = e.shouldStart; }, + [&](auto & e) {}, + }, + node + ); + + newSyntax.push_back(node); + } + + return ParseResult{ + .syntax = std::move(newSyntax), + .args = std::move(args), + .shouldStart = shouldStart, + }; + } +}; + +template +auto tidySyntax(View syntax) -> std::vector { - std::vector newSyntax{}; + // Note: Setting `lastWasCommand` lets us trim blank lines at the start and + // end of the output stream. + auto lastWasCommand = true; + std::vector newSyntax; - // Eat trailing newlines, so assume that the very end was actually a command - bool lastWasCommand = true; - bool newLastWasCommand = true; - - auto v = std::ranges::reverse_view(syntax); - - for (auto it = v.begin(); it != v.end(); ++it) { - Node item = std::move(*it); - - lastWasCommand = newLastWasCommand; - // chomp commentary - if (item.kind == NodeKind::COMMENTARY) { + for (auto it = syntax.begin(); it != syntax.end(); ++it) { + Node node = *it; + // Only compare `Command` and `Output` nodes. + if (std::visit([&](auto && e) { return !e.shouldCompare(); }, node)) { continue; } - if (item.kind == NodeKind::COMMAND) { - newLastWasCommand = true; + // Remove blank lines before and after commands. This lets us keep nice + // whitespace in the test files. + auto shouldKeep = std::visit( + overloaded{ + [&](Command & e) { + lastWasCommand = true; + auto trimmed = trim_right_copy(e.text); + if (trimmed.empty()) { + return false; + } else { + e.text = trimmed; + return true; + } + }, + [&](Output & e) { + std::string trimmed = trim_right_copy(e.text); + if (lastWasCommand && trimmed.empty()) { + // NB: Keep `lastWasCommand` true in this branch so we + // can keep pruning empty output lines. + return false; + } else { + e.text = trimmed; + lastWasCommand = false; + return true; + } + }, + [&](auto & e) { + lastWasCommand = false; + return false; + }, + }, + node + ); - if (item.text == "") { - // chomp empty commands - continue; - } + if (shouldKeep) { + newSyntax.push_back(node); } - - if (item.kind == NodeKind::OUTPUT) { - // TODO: horrible - bool nextIsCommand = (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND; - std::string trimmedText = boost::algorithm::trim_right_copy(item.text); - if ((lastWasCommand || nextIsCommand) && trimmedText == "") { - // chomp empty text above or directly below commands - continue; - } - - // real output, stop chomping - newLastWasCommand = false; - - item = Node::mkOutput(std::move(trimmedText)); - } - newSyntax.push_back(std::move(item)); } - std::reverse(newSyntax.begin(), newSyntax.end()); return newSyntax; } -}; +auto ParseResult::tidyOutputForComparison() -> std::vector +{ + auto reversed = tidySyntax(std::ranges::reverse_view(syntax)); + auto unreversed = tidySyntax(std::ranges::reverse_view(reversed)); + return unreversed; +} + +void ParseResult::interpolatePwd(std::string_view pwd) +{ + std::vector newArgs; + for (auto & arg : args) { + newArgs.push_back(replaceStrings(arg, "${PWD}", pwd)); + } + args = std::move(newArgs); +} + +const char * ParseError::what() const noexcept +{ + if (what_) { + return what_->c_str(); + } else { + auto escaped = escapeString(rest, {.maxLength = 256, .escapeNonPrinting = true}); + auto hint = + new HintFmt("Parse error: Expected %1%, got:\n%2%", expected, Uncolored(escaped)); + what_ = hint->str(); + return what_->c_str(); + } +} + +auto parse(const std::string input, Config config) -> ParseResult +{ + return Parser(input, config).parse(); +} + +std::ostream & operator<<(std::ostream & output, const Args & node) +{ + return output << "@args " << node.text; +} + +std::ostream & operator<<(std::ostream & output, const ShouldStart & node) +{ + return output << "@should-start " << (node.shouldStart ? "true" : "false"); +} + +std::ostream & operator<<(std::ostream & output, const TextNode & rhs) +{ + return output << rhs.text; +} + +void unparseNode(std::ostream & output, const Node & node, bool withNewline) +{ + std::visit( + [&](const auto & n) { output << n << (withNewline && n.emitNewlineAfter() ? "\n" : ""); }, + node + ); +} + +template +std::string gtestFormat(T & value) +{ + std::ostringstream formatted; + unparseNode(formatted, value, true); + auto str = formatted.str(); + // Needs to be the literal string `\n` and not a newline character to + // trigger gtest diff printing. Yes seriously. + boost::algorithm::replace_all(str, "\n", "\\n"); + return str; +} + +void PrintTo(const std::vector & nodes, std::ostream * output) +{ + for (auto & node : nodes) { + *output << gtestFormat(node); + } +} + +std::string debugNode(const Node & node) +{ + std::ostringstream output; + output << std::visit([](const auto & n) { return n.kind(); }, node) << ": "; + std::ostringstream contents; + unparseNode(contents, node, false); + escapeString(output, contents.str(), {.escapeNonPrinting = true}); + return output.str(); +} + +auto ParseResult::debugPrint(std::ostream & output) -> void +{ + ::nix::cli_literate_parser::debugPrint(output, syntax); +} + +void debugPrint(std::ostream & output, std::vector & nodes) +{ + for (auto & node : nodes) { + output << debugNode(node) << std::endl; + } +} + +} // namespace cli_literate_parser +} // namespace nix diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.hh b/tests/unit/libutil-support/tests/cli-literate-parser.hh index 4cffd2ba9..2ff9348ef 100644 --- a/tests/unit/libutil-support/tests/cli-literate-parser.hh +++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh @@ -3,132 +3,195 @@ #include #include +#include #include +#include #include #include -#include namespace nix { +namespace cli_literate_parser { + +// ------------------------- NODES ------------------------- +// +// To update golden test files while preserving commentary output and other `@` +// directives, we need to keep commentary output around after parsing. + +struct BaseNode { + virtual ~BaseNode() = default; + + virtual auto shouldCompare() const -> bool { return false; } + + virtual auto kind() const -> std::string = 0; + virtual auto emitNewlineAfter() const -> bool = 0; + + auto operator<=>(const BaseNode &rhs) const = default; +}; + +/** + * A node containing text. The text should be identical to how the node was + * written in the input file. + */ +struct TextNode : BaseNode { + std::string text; + + explicit TextNode(std::string text) : text(text) {} +}; + +std::ostream &operator<<(std::ostream &output, const TextNode &node); + +#define DECLARE_TEXT_NODE(NAME, NEEDS_NEWLINE, SHOULD_COMPARE) \ + struct NAME : TextNode { \ + using TextNode::TextNode; \ + ~NAME() override = default; \ + \ + auto kind() const -> std::string override { return #NAME; } \ + auto emitNewlineAfter() const -> bool override { return NEEDS_NEWLINE; } \ + auto shouldCompare() const -> bool override { return SHOULD_COMPARE; } \ + }; + +/* name, needsNewline, shouldCompare */ +DECLARE_TEXT_NODE(Prompt, false, false) +DECLARE_TEXT_NODE(Command, true, true) +DECLARE_TEXT_NODE(Output, true, true) +DECLARE_TEXT_NODE(Commentary, true, false) +DECLARE_TEXT_NODE(Args, true, false) +DECLARE_TEXT_NODE(Indent, false, false) + +#undef DECLARE_TEXT_NODE + +struct ShouldStart : BaseNode { + bool shouldStart; + + ShouldStart(bool shouldStart) : shouldStart(shouldStart) {} + ~ShouldStart() override = default; + auto emitNewlineAfter() const -> bool override { return true; } + auto kind() const -> std::string override { return "should-start"; } + + auto operator<=>(const ShouldStart &rhs) const = default; +}; +std::ostream &operator<<(std::ostream &output, const ShouldStart &node); + +/** + * Any syntax node, including those that are cosmetic. + */ +using Node = std::variant; + +/** Unparses a node into the exact text that would have created it, including a + * newline at the end if present, if withNewline is set */ +void unparseNode(std::ostream &output, const Node &node, + bool withNewline = true); + +std::string debugNode(const Node &node); +void debugPrint(std::ostream &output, std::vector &nodes); + +/** + * Override gtest printing for lists of nodes. + */ +void PrintTo(std::vector const &nodes, std::ostream *output); + +/** + * The result of parsing a test file. + */ +struct ParseResult { + /** + * A set of nodes that can be used to reproduce the input file. This is used + * to implement updating the test files. + */ + std::vector syntax; + + /** + * Extra CLI arguments. + */ + std::vector args; + + /** + * Should the program start successfully? + */ + bool shouldStart = false; + + /** + * Replace `$PWD` with the given value in `args`. + */ + void interpolatePwd(std::string_view pwd); + + /** + * Tidy `syntax` to remove unnecessary nodes. + */ + auto tidyOutputForComparison() -> std::vector; + + auto debugPrint(std::ostream &output) -> void; +}; + +/** + * A parse error. + */ +struct ParseError : std::exception { + std::string expected; + std::string rest; + + ParseError(std::string expected, std::string rest) + : expected(expected), rest(rest) {} + + const char *what() const noexcept override; + +private: + /** + * Cached formatted contents of `what()`. + */ + mutable std::optional what_; +}; + +struct Config { + /** + * The prompt string to look for. + */ + std::string prompt; + /** + * The number of spaces of indent for commands and output. + */ + size_t indent = 2; +}; + /* - * A DFA parser for literate test cases for CLIs. + * A recursive descent parser for literate test cases for CLIs. * * FIXME: implement merging of these, so you can auto update cases that have * comments. * - * Format: - * COMMENTARY - * INDENT PROMPT COMMAND - * INDENT OUTPUT + * Syntax: + * ``` + * ( COMMENTARY + * | INDENT PROMPT COMMAND + * | INDENT OUTPUT + * | @args ARGS + * | @should-start ( true | false )) * + * ``` * * e.g. + * ``` * commentary commentary commentary + * @args --foo + * @should-start false * nix-repl> :t 1 * an integer + * ``` * - * Yields: + * Yields something like: + * ``` * Commentary "commentary commentary commentary" + * Args "--foo" + * ShouldStart false * Command ":t 1" * Output "an integer" + * ``` * * Note: one Output line is generated for each line of the sources, because * this is effectively necessary to be able to align them in the future to * auto-update tests. */ -class CLILiterateParser -{ -public: +auto parse(std::string input, Config config) -> ParseResult; - enum class NodeKind { - COMMENTARY, - COMMAND, - OUTPUT, - }; - - struct Node - { - NodeKind kind; - std::string text; - std::strong_ordering operator<=>(Node const &) const = default; - - static Node mkCommentary(std::string text) - { - return Node{.kind = NodeKind::COMMENTARY, .text = text}; - } - - static Node mkCommand(std::string text) - { - return Node{.kind = NodeKind::COMMAND, .text = text}; - } - - static Node mkOutput(std::string text) - { - return Node{.kind = NodeKind::OUTPUT, .text = text}; - } - - auto print() const -> std::string; - }; - - CLILiterateParser(std::string prompt, size_t indent = 2); - - auto syntax() const -> std::vector const &; - - /** Feeds a character into the parser */ - void feed(char c); - - /** Feeds a string into the parser */ - void feed(std::string_view s); - - /** Parses an input in a non-streaming fashion */ - static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector; - - /** Returns, losslessly, the string that would have generated a syntax tree */ - static auto unparse(std::string const & prompt, std::vector const & syntax, size_t indent = 2) -> std::string; - - /** Consumes a CLILiterateParser and gives you the syntax out of it */ - auto intoSyntax() && -> std::vector; - - /** Tidies syntax to remove trailing whitespace from outputs and remove any - * empty prompts */ - static auto tidyOutputForComparison(std::vector && syntax) -> std::vector; - -private: - - struct AccumulatingState - { - std::string lineAccumulator; - }; - struct Indent - { - size_t pos = 0; - }; - struct Commentary : public AccumulatingState - {}; - struct Prompt : AccumulatingState - { - size_t pos = 0; - }; - struct Command : public AccumulatingState - {}; - struct OutputLine : public AccumulatingState - {}; - - using State = std::variant; - State state_; - - constexpr static auto stateDebug(State const&) -> const char *; - - const std::string prompt_; - const size_t indent_; - - /** Last line was output, so we consider a blank to be part of the output */ - bool lastWasOutput_; - - std::vector syntax_; - - void transition(State newState); - void onNewline(); -}; - -// Override gtest printing for lists of nodes -void PrintTo(std::vector const & nodes, std::ostream * os); -}; +}; // namespace cli_literate_parser +}; // namespace nix