Rewrite REPL test parser

- Use a recursive descent parser so that it's easy to extend.
- Add `@args` to enable customizing command-line arguments
- Add `@should-start` to enable `nix repl` tests that error before
  entering the REPL
- Make sure to read all stdout output before comparing. This catches
  some extra output we were tossing out before!

Change-Id: I5522555df4c313024ab15cd10f9f04e7293bda3a
This commit is contained in:
Rebecca Turner 2024-03-22 16:45:05 -07:00
parent f3f7d81988
commit 83729b2075
15 changed files with 857 additions and 422 deletions

View file

@ -1,17 +1,28 @@
Commentary "meow meow meow" Commentary: "meow meow meow"
Command "command" Indent: " "
Output "output output one" Prompt: "nix-repl> "
Output "" Command: "command"
Output "" Indent: " "
Output "output output two" Output: "output output one"
Commentary "meow meow" Output: ""
Command "command two" Commentary: ""
Output "output output output" Indent: " "
Commentary "commentary" Output: "output output two"
Output "output output output" Commentary: "meow meow"
Output "" Indent: " "
Commentary "the blank below should be chomped" Prompt: "nix-repl> "
Command "command three" Command: "command two"
Commentary "" Indent: " "
Output "meow output" Output: "output output output"
Output "" Commentary: "commentary"
Indent: " "
Output: "output output output"
Output: ""
Commentary: "the blank below should be chomped"
Indent: " "
Prompt: "nix-repl> "
Command: "command three"
Commentary: ""
Indent: " "
Output: "meow output"
Output: ""

View file

@ -1,10 +1,9 @@
Command "command" Command: "command"
Output "output output one" Output: "output output one"
Output "" Output: ""
Output "" Output: "output output two"
Output "output output two" Command: "command two"
Command "command two" Output: "output output output"
Output "output output output" Output: "output output output"
Output "output output output" Command: "command three"
Command "command three" Output: "meow output"
Output "meow output"

View file

@ -0,0 +1,11 @@
command
output output one
output output two
command two
output output output
output output output
command three
meow output

View file

@ -1,6 +1,7 @@
https://github.com/NixOS/nix/pull/9917 (Enter debugger more reliably in let expressions and function calls) https://github.com/NixOS/nix/pull/9917 (Enter debugger more reliably in let expressions and function calls)
This test ensures that continues don't skip opportunities to enter the debugger. This test ensures that continues don't skip opportunities to enter the debugger.
@args --debugger
trace: before outer break trace: before outer break
info: breakpoint reached info: breakpoint reached
@ -13,7 +14,7 @@ This test ensures that continues don't skip opportunities to enter the debugger.
0: error: breakpoint reached 0: error: breakpoint reached
«none»:0 «none»:0
1: while calling a function 1: while calling a function
TEST_DATA/regression_9917.nix:3:5 $TEST_DATA/regression_9917.nix:3:5
2| a = builtins.trace "before inner break" ( 2| a = builtins.trace "before inner break" (
3| builtins.break { msg = "hello"; } 3| builtins.break { msg = "hello"; }
@ -21,7 +22,7 @@ This test ensures that continues don't skip opportunities to enter the debugger.
4| ); 4| );
2: while calling a function 2: while calling a function
TEST_DATA/regression_9917.nix:2:7 $TEST_DATA/regression_9917.nix:2:7
1| let 1| let
2| a = builtins.trace "before inner break" ( 2| a = builtins.trace "before inner break" (

View file

@ -1,3 +1,4 @@
@args --debugger
error: error:
… while evaluating the error message passed to builtin.throw … while evaluating the error message passed to builtin.throw
@ -14,3 +15,18 @@ We expect to be able to see locals like r in the debugger:
Env level 1 Env level 1
abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true
nix-repl> :quit
error:
… while evaluating the file '$TEST_DATA/regression_9918.nix':
… while calling the 'throw' builtin
at $TEST_DATA/regression_9918.nix:3:7:
2| r = [];
3| x = builtins.throw r;
| ^
4| in
… while evaluating the error message passed to builtin.throw
error: cannot coerce a list to a string: [ ]

View file

@ -1,5 +1,5 @@
with { inherit ({}) invalid; }; with { inherit ({}) invalid; };
let let
x = builtins.break 1; puppy = "doggy";
in in
x builtins.break { }

View file

@ -1,3 +1,4 @@
@args --debugger
info: breakpoint reached info: breakpoint reached
debugger should not crash now, but also not show any with variables debugger should not crash now, but also not show any with variables
@ -5,10 +6,21 @@ debugger should not crash now, but also not show any with variables
0: error: breakpoint reached 0: error: breakpoint reached
«none»:0 «none»:0
Env level 0 Env level 0
static: x static: puppy
Env level 1 Env level 1
static: static:
Env level 2 Env level 2
abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true
error:
… while evaluating the file '$TEST_DATA/regression_l145.nix':
… while calling the 'break' builtin
at $TEST_DATA/regression_l145.nix:5:3:
4| in
5| builtins.break { }
| ^
6|
error: breakpoint reached\n

View file

@ -1,3 +1,4 @@
@args --debugger
trace: before outer break trace: before outer break
info: breakpoint reached info: breakpoint reached
@ -24,7 +25,7 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n
nix-repl> :st 3 nix-repl> :st 3
3: while calling a function 3: while calling a function
TEST_DATA/stack_vars.nix:5:7 $TEST_DATA/stack_vars.nix:5:7
4| ); 4| );
5| b = builtins.trace "before outer break" ( 5| b = builtins.trace "before outer break" (
@ -58,9 +59,8 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n
3 3
nix-repl> :st 3 nix-repl> :st 3
3: while calling a function 3: while calling a function
TEST_DATA/stack_vars.nix:2:7 $TEST_DATA/stack_vars.nix:2:7
1| let 1| let
2| a = builtins.trace "before inner break" ( 2| a = builtins.trace "before inner break" (
@ -72,3 +72,21 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n
Env level 1 Env level 1
abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true
nix-repl> :quit
error:
… while calling the 'trace' builtin
at $TEST_DATA/stack_vars.nix:2:7:
1| let
2| a = builtins.trace "before inner break" (
| ^
3| let meow' = 3; in builtins.break { msg = "hello"; }
… while calling the 'break' builtin
at $TEST_DATA/stack_vars.nix:3:23:
2| a = builtins.trace "before inner break" (
3| let meow' = 3; in builtins.break { msg = "hello"; }
| ^
4| );
error: breakpoint reached

View file

@ -1,16 +1,17 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <boost/algorithm/string/replace.hpp>
#include <optional>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <optional>
#include <unistd.h> #include <unistd.h>
#include <boost/algorithm/string/replace.hpp>
#include "escape-string.hh"
#include "test-session.hh" #include "test-session.hh"
#include "util.hh"
#include "tests/characterization.hh" #include "tests/characterization.hh"
#include "tests/cli-literate-parser.hh" #include "tests/cli-literate-parser.hh"
#include "tests/terminal-code-eater.hh" #include "tests/terminal-code-eater.hh"
#include "util.hh"
using namespace std::string_literals; using namespace std::string_literals;
@ -40,91 +41,147 @@ public:
return unitTestData + "/" + testStem; return unitTestData + "/" + testStem;
} }
void runReplTest(std::string_view const & content, std::vector<std::string> extraArgs = {}) const void runReplTest(const std::string content, std::vector<std::string> extraArgs = {}) const
{ {
auto syntax = CLILiterateParser::parse(std::string(REPL_PROMPT), content); auto parsed = cli_literate_parser::parse(
content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT), .indent = 2}
);
parsed.interpolatePwd(unitTestData);
// FIXME: why does this need two --quiets // FIXME: why does this need two --quiets
// show-trace is on by default due to test configuration, but is not a standard // show-trace is on by default due to test configuration, but is not a
Strings args{"--quiet", "repl", "--quiet", "--option", "show-trace", "false", "--offline", "--extra-experimental-features", "repl-automation"}; // standard
Strings args{
"--quiet",
"repl",
"--quiet",
"--option",
"show-trace",
"false",
"--offline",
"--extra-experimental-features",
"repl-automation",
};
args.insert(args.end(), extraArgs.begin(), extraArgs.end()); args.insert(args.end(), extraArgs.begin(), extraArgs.end());
args.insert(args.end(), parsed.args.begin(), parsed.args.end());
auto nixBin = canonPath(getEnvNonEmpty("NIX_BIN_DIR").value_or(NIX_BIN_DIR)); auto nixBin = canonPath(getEnvNonEmpty("NIX_BIN_DIR").value_or(NIX_BIN_DIR));
auto process = RunningProcess::start(nixBin + "/nix", args); auto process = RunningProcess::start(nixBin + "/nix", args);
auto session = TestSession{std::string(AUTOMATION_PROMPT), std::move(process)}; auto session = TestSession(std::string(AUTOMATION_PROMPT), std::move(process));
for (auto & bit : syntax) { for (auto & event : parsed.syntax) {
if (bit.kind != CLILiterateParser::NodeKind::COMMAND) { std::visit(
continue; overloaded{
} [&](const cli_literate_parser::Command & e) {
ASSERT_TRUE(session.waitForPrompt());
if (!session.waitForPrompt()) { if (e.text == ":quit") {
ASSERT_TRUE(false); // If we quit the repl explicitly, we won't have a
} // prompt when we're done.
session.runCommand(bit.text); parsed.shouldStart = false;
}
session.runCommand(e.text);
},
[&](const auto & e) {},
},
event
);
} }
if (!session.waitForPrompt()) { if (parsed.shouldStart) {
ASSERT_TRUE(false); ASSERT_TRUE(session.waitForPrompt());
} }
session.close(); session.close();
auto replacedOutLog = boost::algorithm::replace_all_copy(session.outLog, unitTestData, "TEST_DATA"); auto replacedOutLog =
boost::algorithm::replace_all_copy(session.outLog, unitTestData, "$TEST_DATA");
auto cleanedOutLog = trimOutLog(replacedOutLog); auto cleanedOutLog = trimOutLog(replacedOutLog);
auto parsedOutLog = CLILiterateParser::parse(std::string(AUTOMATION_PROMPT), cleanedOutLog, 0); auto parsedOutLog = cli_literate_parser::parse(
std::string(cleanedOutLog),
cli_literate_parser::Config{.prompt = std::string(AUTOMATION_PROMPT), .indent = 0}
);
parsedOutLog = CLILiterateParser::tidyOutputForComparison(std::move(parsedOutLog)); auto expected = parsed.tidyOutputForComparison();
syntax = CLILiterateParser::tidyOutputForComparison(std::move(syntax)); auto actual = parsedOutLog.tidyOutputForComparison();
ASSERT_EQ(parsedOutLog, syntax); ASSERT_EQ(expected, actual);
}
void runReplTestPath(const std::string_view & nameBase, std::vector<std::string> extraArgs)
{
auto nixPath = goldenMaster(nameBase + ".nix");
if (pathExists(nixPath)) {
extraArgs.push_back("-f");
extraArgs.push_back(nixPath);
}
readTest(nameBase + ".test", [this, extraArgs](std::string input) {
runReplTest(input, extraArgs);
});
}
void runReplTestPath(const std::string_view & nameBase)
{
runReplTestPath(nameBase, {});
}
void runDebuggerTest(const std::string_view & nameBase)
{
runReplTestPath(nameBase, {"--debugger"});
} }
}; };
TEST_F(ReplSessionTest, parses) TEST_F(ReplSessionTest, round_trip)
{
writeTest("basic.test", [this]() {
const std::string content = readFile(goldenMaster("basic.test"));
auto parsed = cli_literate_parser::parse(
content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)}
);
std::ostringstream out{};
for (auto & node : parsed.syntax) {
cli_literate_parser::unparseNode(out, node, true);
}
return out.str();
});
}
TEST_F(ReplSessionTest, tidy)
{ {
writeTest("basic.ast", [this]() { writeTest("basic.ast", [this]() {
const std::string content = readFile(goldenMaster("basic.test")); const std::string content = readFile(goldenMaster("basic.test"));
auto parser = CLILiterateParser{std::string(REPL_PROMPT)}; auto parsed = cli_literate_parser::parse(
parser.feed(content); content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)}
);
std::ostringstream out{}; std::ostringstream out{};
for (auto & bit : parser.syntax()) { for (auto & node : parsed.syntax) {
out << bit.print() << "\n"; out << debugNode(node) << "\n";
} }
return out.str(); return out.str();
}); });
writeTest("basic_tidied.ast", [this]() { writeTest("basic_tidied.ast", [this]() {
const std::string content = readFile(goldenMaster("basic.test")); const std::string content = readFile(goldenMaster("basic.test"));
auto syntax = CLILiterateParser::parse(std::string(REPL_PROMPT), content); auto parsed = cli_literate_parser::parse(
content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)}
syntax = CLILiterateParser::tidyOutputForComparison(std::move(syntax)); );
auto tidied = parsed.tidyOutputForComparison();
std::ostringstream out{}; std::ostringstream out{};
for (auto & bit : syntax) { for (auto & node : tidied) {
out << bit.print() << "\n"; out << debugNode(node) << "\n";
} }
return out.str(); return out.str();
}); });
} }
TEST_F(ReplSessionTest, repl_basic) #define REPL_TEST(name) \
{
readTest("basic_repl.test", [this](std::string input) { runReplTest(input); });
}
#define DEBUGGER_TEST(name) \
TEST_F(ReplSessionTest, name) \ TEST_F(ReplSessionTest, name) \
{ \ { \
readTest(#name ".test", [this](std::string input) { \ runReplTestPath(#name); \
runReplTest(input, {"--debugger", "-f", goldenMaster(#name ".nix")}); \
}); \
} }
DEBUGGER_TEST(regression_9918); REPL_TEST(regression_9918);
DEBUGGER_TEST(regression_9917); REPL_TEST(regression_9917);
DEBUGGER_TEST(regression_l145); REPL_TEST(stack_vars);
DEBUGGER_TEST(stack_vars); REPL_TEST(basic_repl);
}; }; // namespace nix

View file

@ -1,4 +1,5 @@
#include <iostream> #include <iostream>
#include <span>
#include <unistd.h> #include <unistd.h>
#include "test-session.hh" #include "test-session.hh"
@ -21,14 +22,17 @@ RunningProcess RunningProcess::start(std::string executable, Strings args)
// This is separate from runProgram2 because we have different IO requirements // This is separate from runProgram2 because we have different IO requirements
pid_t pid = startProcess([&]() { pid_t pid = startProcess([&]() {
if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1) if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1) {
throw SysError("dupping stdout"); throw SysError("dupping stdout");
if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1) }
if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1) {
throw SysError("dupping stdin"); throw SysError("dupping stdin");
}
procStdin.writeSide.close(); procStdin.writeSide.close();
procStdout.readSide.close(); procStdout.readSide.close();
if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1) if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1) {
throw SysError("dupping stderr"); throw SysError("dupping stderr");
}
execv(executable.c_str(), stringsToCharPtrs(args).data()); execv(executable.c_str(), stringsToCharPtrs(args).data());
throw SysError("exec did not happen"); throw SysError("exec did not happen");
}); });
@ -44,7 +48,8 @@ RunningProcess RunningProcess::start(std::string executable, Strings args)
} }
[[gnu::unused]] [[gnu::unused]]
std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s) std::ostream &
operator<<(std::ostream & os, ReplOutputParser::State s)
{ {
switch (s) { switch (s) {
case ReplOutputParser::State::Prompt: case ReplOutputParser::State::Prompt:
@ -91,8 +96,7 @@ bool ReplOutputParser::feed(char c)
return false; return false;
} }
/** Waits for the prompt and then returns if a prompt was found */ bool TestSession::readOutThen(ReadOutThenCallback cb)
bool TestSession::waitForPrompt()
{ {
std::vector<char> buf(1024); std::vector<char> buf(1024);
@ -106,38 +110,67 @@ bool TestSession::waitForPrompt()
return false; return false;
} }
switch (cb(std::span(buf.data(), res))) {
case ReadOutThenCallbackResult::Stop:
return true;
case ReadOutThenCallbackResult::Continue:
continue;
}
}
}
bool TestSession::waitForPrompt()
{
bool notEof = readOutThen([&](std::span<char> s) -> ReadOutThenCallbackResult {
bool foundPrompt = false; bool foundPrompt = false;
for (ssize_t i = 0; i < res; ++i) {
for (auto ch : s) {
// foundPrompt = foundPrompt || outputParser.feed(buf[i]); // foundPrompt = foundPrompt || outputParser.feed(buf[i]);
bool wasEaten = true; bool wasEaten = true;
eater.feed(buf[i], [&](char c) { eater.feed(ch, [&](char c) {
wasEaten = false; wasEaten = false;
foundPrompt = outputParser.feed(buf[i]) || foundPrompt; foundPrompt = outputParser.feed(ch) || foundPrompt;
outLog.push_back(c); outLog.push_back(c);
}); });
if constexpr (DEBUG_REPL_PARSER) { if constexpr (DEBUG_REPL_PARSER) {
std::cerr << "raw " << MaybeHexEscapedChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n"; std::cerr << "raw " << MaybeHexEscapedChar{ch} << (wasEaten ? " [eaten]" : "") << "\n";
} }
} }
if (foundPrompt) { return foundPrompt ? ReadOutThenCallbackResult::Stop : ReadOutThenCallbackResult::Continue;
return true; });
return notEof;
}
void TestSession::wait()
{
readOutThen([&](std::span<char> s) {
for (auto ch : s) {
eater.feed(ch, [&](char c) {
outputParser.feed(c);
outLog.push_back(c);
});
} }
} // just keep reading till we hit eof
return ReadOutThenCallbackResult::Continue;
});
} }
void TestSession::close() void TestSession::close()
{ {
proc.procStdin.close(); proc.procStdin.close();
wait();
proc.procStdout.close(); proc.procStdout.close();
} }
void TestSession::runCommand(std::string command) void TestSession::runCommand(std::string command)
{ {
if constexpr (DEBUG_REPL_PARSER) if constexpr (DEBUG_REPL_PARSER) {
std::cerr << "runCommand " << command << "\n"; std::cerr << "runCommand " << command << "\n";
}
command += "\n"; command += "\n";
// We have to feed a newline into the output parser, since Nix might not // We have to feed a newline into the output parser, since Nix might not
// give us a newline before a prompt in all cases (it might clear line // give us a newline before a prompt in all cases (it might clear line

View file

@ -1,7 +1,9 @@
#pragma once #pragma once
///@file ///@file
#include <functional>
#include <sched.h> #include <sched.h>
#include <span>
#include <string> #include <string>
#include "util.hh" #include "util.hh"
@ -22,8 +24,7 @@ struct RunningProcess
class ReplOutputParser class ReplOutputParser
{ {
public: public:
ReplOutputParser(std::string prompt) ReplOutputParser(std::string prompt) : prompt(prompt)
: prompt(prompt)
{ {
assert(!prompt.empty()); assert(!prompt.empty());
} }
@ -60,10 +61,27 @@ struct TestSession
{ {
} }
/** Waits for the prompt and then returns if a prompt was found */
bool waitForPrompt(); bool waitForPrompt();
/** Feeds a line of input into the command */
void runCommand(std::string command); void runCommand(std::string command);
/** Closes the session, closing standard input and waiting for standard
* output to close, capturing any remaining output. */
void close(); void close();
private:
/** Waits until the command closes its output */
void wait();
enum class ReadOutThenCallbackResult { Stop, Continue };
using ReadOutThenCallback = std::function<ReadOutThenCallbackResult(std::span<char>)>;
/** Reads some chunks of output, calling the callback provided for each
* chunk and stopping if it returns Stop.
*
* @returns false if EOF, true if the callback requested we stop first.
* */
bool readOutThen(ReadOutThenCallback cb);
}; };
}; };

View file

@ -8,7 +8,7 @@ libutil-test-support_INSTALL_DIR :=
libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc) libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc)
libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) -I src/libutil
# libexpr so we can steal their string printer from print.cc # libexpr so we can steal their string printer from print.cc
libutil-test-support_LIBS = libutil libexpr libutil-test-support_LIBS = libutil libexpr

View file

@ -74,20 +74,20 @@ public:
{ {
auto file = goldenMaster(testStem); auto file = goldenMaster(testStem);
auto got = test(); auto actual = test();
if (testAccept()) if (testAccept())
{ {
createDirs(dirOf(file)); createDirs(dirOf(file));
writeFile2(file, got); writeFile2(file, actual);
GTEST_SKIP() GTEST_SKIP()
<< "Updating golden master " << "Updating golden master "
<< file; << file;
} }
else else
{ {
decltype(got) expected = readFile2(file); decltype(actual) expected = readFile2(file);
ASSERT_EQ(got, expected); ASSERT_EQ(expected, actual);
} }
} }

View file

@ -1,248 +1,444 @@
#include "cli-literate-parser.hh" #include "cli-literate-parser.hh"
#include "escape-string.hh" #include "escape-string.hh"
#include "libexpr/print.hh"
#include "escape-char.hh" #include "escape-char.hh"
#include "libexpr/print.hh"
#include "types.hh" #include "types.hh"
#include "util.hh" #include "util.hh"
#include <ranges> #include <ranges>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <boost/algorithm/string/trim.hpp> #include <sstream>
#include <variant>
using namespace std::string_literals; #include "cli-literate-parser.hh"
#include "escape-string.hh"
namespace nix { #include "fmt.hh"
#include "libexpr/print.hh"
#include "shlex.hh"
#include "types.hh"
#include "util.hh"
static constexpr const bool DEBUG_PARSER = false; static constexpr const bool DEBUG_PARSER = false;
constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char * using namespace std::string_literals;
{ using namespace boost::algorithm;
return std::visit(
overloaded{// clang-format off
[](Indent const&) -> const char * { return "indent"; },
[](Commentary const&) -> const char * { return "indent"; },
[](Prompt const&) -> const char * { return "prompt"; },
[](Command const&) -> const char * { return "command"; },
[](OutputLine const&) -> const char * { return "output_line"; }},
// clang-format on
s);
}
auto CLILiterateParser::Node::print() const -> std::string namespace nix {
{
std::ostringstream s{};
switch (kind) {
case NodeKind::COMMENTARY:
s << "Commentary ";
break;
case NodeKind::COMMAND:
s << "Command ";
break;
case NodeKind::OUTPUT:
s << "Output ";
break;
}
escapeString(s, this->text);
return s.str();
}
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os) namespace cli_literate_parser {
{
for (auto & node : nodes) {
*os << node.print() << "\\n";
}
}
auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node> struct Parser
{ {
CLILiterateParser p{std::move(prompt), indent}; Parser(const std::string input, Config config)
p.feed(input); : input(input)
return std::move(p).intoSyntax(); , rest(this->input)
} , prompt(config.prompt)
, indentString(std::string(config.indent, ' '))
auto CLILiterateParser::intoSyntax() && -> std::vector<Node> , lastWasOutput(false)
{ , syntax{}
return std::move(this->syntax_); {
} assert(!prompt.empty());
CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent)
: state_(indent == 0 ? State(Prompt{}) : State(Indent{}))
, prompt_(prompt)
, indent_(indent)
, lastWasOutput_(false)
, syntax_{}
{
assert(!prompt.empty());
}
void CLILiterateParser::feed(char c)
{
if constexpr (DEBUG_PARSER) {
std::cout << stateDebug(state_) << " " << MaybeHexEscapedChar{c} << "\n";
} }
if (c == '\n') { const std::string input;
onNewline(); std::string_view rest;
return; const std::string prompt;
const std::string indentString;
/** Last line was output, so we consider a blank to be part of the output */
bool lastWasOutput;
/**
* Nodes of syntax being built.
*/
std::vector<Node> syntax;
auto dbg(std::string_view state) -> void
{
std::cout << state << ": ";
escapeString(
std::cout,
rest,
{
.maxLength = 40,
.ansiColors = true,
.escapeNonPrinting = true,
}
);
std::cout << std::endl;
} }
std::visit( template<typename T>
overloaded{ auto pushNode(T node) -> void
[&](Indent & s) { {
if (c == ' ') { if constexpr (DEBUG_PARSER) {
if (++s.pos >= indent_) { std::cout << debugNode(node);
transition(Prompt{}); }
} syntax.emplace_back(node);
} else {
transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}});
}
},
[&](Prompt & s) {
if (s.pos >= prompt_.length()) {
transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}});
return;
} else if (c == prompt_[s.pos]) {
// good prompt character
++s.pos;
} else {
// didn't match the prompt, so it must have actually been output.
s.lineAccumulator.push_back(c);
transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
return;
}
s.lineAccumulator.push_back(c);
},
[&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
state_);
}
void CLILiterateParser::onNewline()
{
State lastState = std::move(state_);
bool newLastWasOutput = false;
syntax_.push_back(std::visit(
overloaded{
[&](Indent & s) {
// XXX: technically this eats trailing spaces
// a newline following output is considered part of that output
if (lastWasOutput_) {
newLastWasOutput = true;
return Node::mkOutput("");
}
return Node::mkCommentary("");
},
[&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); },
[&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); },
[&](OutputLine & s) {
newLastWasOutput = true;
return Node::mkOutput(std::move(s.lineAccumulator));
},
[&](Prompt & s) {
// INDENT followed by newline is also considered a blank output line
return Node::mkOutput(std::move(s.lineAccumulator));
}},
lastState));
transition(Indent{});
lastWasOutput_ = newLastWasOutput;
}
void CLILiterateParser::feed(std::string_view s)
{
for (char ch : s) {
feed(ch);
}
}
void CLILiterateParser::transition(State new_state)
{
// When we expect INDENT and we are parsing without indents, commentary
// cannot exist, so we want to transition directly into PROMPT before
// resuming normal processing.
if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) {
new_state = Prompt{AccumulatingState{}, i->pos};
} }
state_ = new_state; auto parseLiteral(const char c) -> bool
} {
if (rest.starts_with(c)) {
auto CLILiterateParser::syntax() const -> std::vector<Node> const & rest.remove_prefix(1);
{ return true;
return syntax_; } else {
} return false;
auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
-> std::string
{
std::string indent_str(indent, ' ');
std::ostringstream out{};
for (auto & node : syntax) {
switch (node.kind) {
case NodeKind::COMMENTARY:
out << node.text << "\n";
break;
case NodeKind::COMMAND:
out << indent_str << prompt << node.text << "\n";
break;
case NodeKind::OUTPUT:
out << indent_str << node.text << "\n";
break;
} }
} }
return out.str(); auto parseLiteral(const std::string_view & literal) -> bool
} {
if (rest.starts_with(literal)) {
rest.remove_prefix(literal.length());
return true;
} else {
return false;
}
}
auto CLILiterateParser::tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node> auto parseBool() -> bool
{
auto result = false;
if (parseLiteral("true")) {
result = true;
} else if (parseLiteral("false")) {
result = false;
} else {
throw ParseError("true or false", std::string(rest));
}
auto untilNewline = parseUntilNewline();
if (!untilNewline.empty()) {
throw ParseError("nothing after true or false", untilNewline);
}
return result;
}
auto parseUntilNewline() -> std::string
{
auto pos = rest.find('\n');
if (pos == std::string_view::npos) {
throw ParseError("text and then newline", std::string(rest));
} else {
// `parseOutput()` sets this to true anyways.
lastWasOutput = false;
auto result = std::string(rest, 0, pos);
rest.remove_prefix(pos + 1);
return result;
}
}
auto parseIndent() -> bool
{
if constexpr (DEBUG_PARSER) {
dbg("indent");
}
if (indentString.empty()) {
return true;
}
if (parseLiteral(indentString)) {
pushNode(Indent(indentString));
return true;
} else {
if constexpr (DEBUG_PARSER) {
dbg("indent failed");
}
return false;
}
}
auto parseCommand() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("command");
}
auto untilNewline = parseUntilNewline();
pushNode(Command(untilNewline));
}
auto parsePrompt() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("prompt");
}
if (parseLiteral(prompt)) {
pushNode(Prompt(prompt));
if (rest.empty()) {
return;
}
parseCommand();
} else {
parseOutput();
}
}
auto parseOutput() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("output");
}
auto untilNewline = parseUntilNewline();
pushNode(Output(untilNewline));
lastWasOutput = true;
}
auto parseAtSign() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("@ symbol");
}
if (!parseLiteral('@')) {
parseOutputOrCommentary();
}
if (parseLiteral("args ")) {
parseArgs();
} else if (parseLiteral("should-start ")) {
if constexpr (DEBUG_PARSER) {
dbg("@should-start");
}
auto shouldStart = parseBool();
pushNode(ShouldStart{shouldStart});
}
}
auto parseArgs() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("@args");
}
auto untilNewline = parseUntilNewline();
pushNode(Args(untilNewline));
}
auto parseOutputOrCommentary() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("output/commentary");
}
auto oldLastWasOutput = lastWasOutput;
auto untilNewline = parseUntilNewline();
auto trimmed = trim_right_copy(untilNewline);
if (oldLastWasOutput && trimmed.empty()) {
pushNode(Output{trimmed});
} else {
pushNode(Commentary{untilNewline});
}
}
auto parseStartOfLine() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("start of line");
}
if (parseIndent()) {
parsePrompt();
} else {
parseAtSign();
}
}
auto parse() && -> ParseResult
{
// Begin the recursive descent parser at the start of a new line.
while (!rest.empty()) {
parseStartOfLine();
}
return std::move(*this).intoParseResult();
}
auto intoParseResult() && -> ParseResult
{
// Do another pass over the nodes to produce auxiliary results like parsed
// command line arguments.
std::vector<std::string> args;
std::vector<Node> newSyntax;
auto shouldStart = true;
for (auto it = syntax.begin(); it != syntax.end(); ++it) {
Node node = std::move(*it);
std::visit(
overloaded{
[&](Args & e) {
auto split = shell_split(std::string(e.text));
args.insert(args.end(), split.begin(), split.end());
},
[&](ShouldStart & e) { shouldStart = e.shouldStart; },
[&](auto & e) {},
},
node
);
newSyntax.push_back(node);
}
return ParseResult{
.syntax = std::move(newSyntax),
.args = std::move(args),
.shouldStart = shouldStart,
};
}
};
template<typename View>
auto tidySyntax(View syntax) -> std::vector<Node>
{ {
std::vector<Node> newSyntax{}; // Note: Setting `lastWasCommand` lets us trim blank lines at the start and
// end of the output stream.
auto lastWasCommand = true;
std::vector<Node> newSyntax;
// Eat trailing newlines, so assume that the very end was actually a command for (auto it = syntax.begin(); it != syntax.end(); ++it) {
bool lastWasCommand = true; Node node = *it;
bool newLastWasCommand = true; // Only compare `Command` and `Output` nodes.
if (std::visit([&](auto && e) { return !e.shouldCompare(); }, node)) {
auto v = std::ranges::reverse_view(syntax);
for (auto it = v.begin(); it != v.end(); ++it) {
Node item = std::move(*it);
lastWasCommand = newLastWasCommand;
// chomp commentary
if (item.kind == NodeKind::COMMENTARY) {
continue; continue;
} }
if (item.kind == NodeKind::COMMAND) { // Remove blank lines before and after commands. This lets us keep nice
newLastWasCommand = true; // whitespace in the test files.
auto shouldKeep = std::visit(
overloaded{
[&](Command & e) {
lastWasCommand = true;
auto trimmed = trim_right_copy(e.text);
if (trimmed.empty()) {
return false;
} else {
e.text = trimmed;
return true;
}
},
[&](Output & e) {
std::string trimmed = trim_right_copy(e.text);
if (lastWasCommand && trimmed.empty()) {
// NB: Keep `lastWasCommand` true in this branch so we
// can keep pruning empty output lines.
return false;
} else {
e.text = trimmed;
lastWasCommand = false;
return true;
}
},
[&](auto & e) {
lastWasCommand = false;
return false;
},
},
node
);
if (item.text == "") { if (shouldKeep) {
// chomp empty commands newSyntax.push_back(node);
continue;
}
} }
if (item.kind == NodeKind::OUTPUT) {
// TODO: horrible
bool nextIsCommand = (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND;
std::string trimmedText = boost::algorithm::trim_right_copy(item.text);
if ((lastWasCommand || nextIsCommand) && trimmedText == "") {
// chomp empty text above or directly below commands
continue;
}
// real output, stop chomping
newLastWasCommand = false;
item = Node::mkOutput(std::move(trimmedText));
}
newSyntax.push_back(std::move(item));
} }
std::reverse(newSyntax.begin(), newSyntax.end());
return newSyntax; return newSyntax;
} }
}; auto ParseResult::tidyOutputForComparison() -> std::vector<Node>
{
auto reversed = tidySyntax(std::ranges::reverse_view(syntax));
auto unreversed = tidySyntax(std::ranges::reverse_view(reversed));
return unreversed;
}
void ParseResult::interpolatePwd(std::string_view pwd)
{
std::vector<std::string> newArgs;
for (auto & arg : args) {
newArgs.push_back(replaceStrings(arg, "${PWD}", pwd));
}
args = std::move(newArgs);
}
const char * ParseError::what() const noexcept
{
if (what_) {
return what_->c_str();
} else {
auto escaped = escapeString(rest, {.maxLength = 256, .escapeNonPrinting = true});
auto hint =
new HintFmt("Parse error: Expected %1%, got:\n%2%", expected, Uncolored(escaped));
what_ = hint->str();
return what_->c_str();
}
}
auto parse(const std::string input, Config config) -> ParseResult
{
return Parser(input, config).parse();
}
std::ostream & operator<<(std::ostream & output, const Args & node)
{
return output << "@args " << node.text;
}
std::ostream & operator<<(std::ostream & output, const ShouldStart & node)
{
return output << "@should-start " << (node.shouldStart ? "true" : "false");
}
std::ostream & operator<<(std::ostream & output, const TextNode & rhs)
{
return output << rhs.text;
}
void unparseNode(std::ostream & output, const Node & node, bool withNewline)
{
std::visit(
[&](const auto & n) { output << n << (withNewline && n.emitNewlineAfter() ? "\n" : ""); },
node
);
}
template<typename T>
std::string gtestFormat(T & value)
{
std::ostringstream formatted;
unparseNode(formatted, value, true);
auto str = formatted.str();
// Needs to be the literal string `\n` and not a newline character to
// trigger gtest diff printing. Yes seriously.
boost::algorithm::replace_all(str, "\n", "\\n");
return str;
}
void PrintTo(const std::vector<Node> & nodes, std::ostream * output)
{
for (auto & node : nodes) {
*output << gtestFormat(node);
}
}
std::string debugNode(const Node & node)
{
std::ostringstream output;
output << std::visit([](const auto & n) { return n.kind(); }, node) << ": ";
std::ostringstream contents;
unparseNode(contents, node, false);
escapeString(output, contents.str(), {.escapeNonPrinting = true});
return output.str();
}
auto ParseResult::debugPrint(std::ostream & output) -> void
{
::nix::cli_literate_parser::debugPrint(output, syntax);
}
void debugPrint(std::ostream & output, std::vector<Node> & nodes)
{
for (auto & node : nodes) {
output << debugNode(node) << std::endl;
}
}
} // namespace cli_literate_parser
} // namespace nix

View file

@ -3,132 +3,195 @@
#include <compare> #include <compare>
#include <memory> #include <memory>
#include <optional>
#include <sstream> #include <sstream>
#include <string>
#include <variant> #include <variant>
#include <vector> #include <vector>
#include <string>
namespace nix { namespace nix {
namespace cli_literate_parser {
// ------------------------- NODES -------------------------
//
// To update golden test files while preserving commentary output and other `@`
// directives, we need to keep commentary output around after parsing.
struct BaseNode {
virtual ~BaseNode() = default;
virtual auto shouldCompare() const -> bool { return false; }
virtual auto kind() const -> std::string = 0;
virtual auto emitNewlineAfter() const -> bool = 0;
auto operator<=>(const BaseNode &rhs) const = default;
};
/**
* A node containing text. The text should be identical to how the node was
* written in the input file.
*/
struct TextNode : BaseNode {
std::string text;
explicit TextNode(std::string text) : text(text) {}
};
std::ostream &operator<<(std::ostream &output, const TextNode &node);
#define DECLARE_TEXT_NODE(NAME, NEEDS_NEWLINE, SHOULD_COMPARE) \
struct NAME : TextNode { \
using TextNode::TextNode; \
~NAME() override = default; \
\
auto kind() const -> std::string override { return #NAME; } \
auto emitNewlineAfter() const -> bool override { return NEEDS_NEWLINE; } \
auto shouldCompare() const -> bool override { return SHOULD_COMPARE; } \
};
/* name, needsNewline, shouldCompare */
DECLARE_TEXT_NODE(Prompt, false, false)
DECLARE_TEXT_NODE(Command, true, true)
DECLARE_TEXT_NODE(Output, true, true)
DECLARE_TEXT_NODE(Commentary, true, false)
DECLARE_TEXT_NODE(Args, true, false)
DECLARE_TEXT_NODE(Indent, false, false)
#undef DECLARE_TEXT_NODE
struct ShouldStart : BaseNode {
bool shouldStart;
ShouldStart(bool shouldStart) : shouldStart(shouldStart) {}
~ShouldStart() override = default;
auto emitNewlineAfter() const -> bool override { return true; }
auto kind() const -> std::string override { return "should-start"; }
auto operator<=>(const ShouldStart &rhs) const = default;
};
std::ostream &operator<<(std::ostream &output, const ShouldStart &node);
/**
* Any syntax node, including those that are cosmetic.
*/
using Node = std::variant<Prompt, Command, Output, Commentary, Args,
ShouldStart, Indent>;
/** Unparses a node into the exact text that would have created it, including a
* newline at the end if present, if withNewline is set */
void unparseNode(std::ostream &output, const Node &node,
bool withNewline = true);
std::string debugNode(const Node &node);
void debugPrint(std::ostream &output, std::vector<Node> &nodes);
/**
* Override gtest printing for lists of nodes.
*/
void PrintTo(std::vector<Node> const &nodes, std::ostream *output);
/**
* The result of parsing a test file.
*/
struct ParseResult {
/**
* A set of nodes that can be used to reproduce the input file. This is used
* to implement updating the test files.
*/
std::vector<Node> syntax;
/**
* Extra CLI arguments.
*/
std::vector<std::string> args;
/**
* Should the program start successfully?
*/
bool shouldStart = false;
/**
* Replace `$PWD` with the given value in `args`.
*/
void interpolatePwd(std::string_view pwd);
/**
* Tidy `syntax` to remove unnecessary nodes.
*/
auto tidyOutputForComparison() -> std::vector<Node>;
auto debugPrint(std::ostream &output) -> void;
};
/**
* A parse error.
*/
struct ParseError : std::exception {
std::string expected;
std::string rest;
ParseError(std::string expected, std::string rest)
: expected(expected), rest(rest) {}
const char *what() const noexcept override;
private:
/**
* Cached formatted contents of `what()`.
*/
mutable std::optional<std::string> what_;
};
struct Config {
/**
* The prompt string to look for.
*/
std::string prompt;
/**
* The number of spaces of indent for commands and output.
*/
size_t indent = 2;
};
/* /*
* A DFA parser for literate test cases for CLIs. * A recursive descent parser for literate test cases for CLIs.
* *
* FIXME: implement merging of these, so you can auto update cases that have * FIXME: implement merging of these, so you can auto update cases that have
* comments. * comments.
* *
* Format: * Syntax:
* COMMENTARY * ```
* INDENT PROMPT COMMAND * ( COMMENTARY
* INDENT OUTPUT * | INDENT PROMPT COMMAND
* | INDENT OUTPUT
* | @args ARGS
* | @should-start ( true | false )) *
* ```
* *
* e.g. * e.g.
* ```
* commentary commentary commentary * commentary commentary commentary
* @args --foo
* @should-start false
* nix-repl> :t 1 * nix-repl> :t 1
* an integer * an integer
* ```
* *
* Yields: * Yields something like:
* ```
* Commentary "commentary commentary commentary" * Commentary "commentary commentary commentary"
* Args "--foo"
* ShouldStart false
* Command ":t 1" * Command ":t 1"
* Output "an integer" * Output "an integer"
* ```
* *
* Note: one Output line is generated for each line of the sources, because * Note: one Output line is generated for each line of the sources, because
* this is effectively necessary to be able to align them in the future to * this is effectively necessary to be able to align them in the future to
* auto-update tests. * auto-update tests.
*/ */
class CLILiterateParser auto parse(std::string input, Config config) -> ParseResult;
{
public:
enum class NodeKind { }; // namespace cli_literate_parser
COMMENTARY, }; // namespace nix
COMMAND,
OUTPUT,
};
struct Node
{
NodeKind kind;
std::string text;
std::strong_ordering operator<=>(Node const &) const = default;
static Node mkCommentary(std::string text)
{
return Node{.kind = NodeKind::COMMENTARY, .text = text};
}
static Node mkCommand(std::string text)
{
return Node{.kind = NodeKind::COMMAND, .text = text};
}
static Node mkOutput(std::string text)
{
return Node{.kind = NodeKind::OUTPUT, .text = text};
}
auto print() const -> std::string;
};
CLILiterateParser(std::string prompt, size_t indent = 2);
auto syntax() const -> std::vector<Node> const &;
/** Feeds a character into the parser */
void feed(char c);
/** Feeds a string into the parser */
void feed(std::string_view s);
/** Parses an input in a non-streaming fashion */
static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
/** Returns, losslessly, the string that would have generated a syntax tree */
static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
/** Consumes a CLILiterateParser and gives you the syntax out of it */
auto intoSyntax() && -> std::vector<Node>;
/** Tidies syntax to remove trailing whitespace from outputs and remove any
* empty prompts */
static auto tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node>;
private:
struct AccumulatingState
{
std::string lineAccumulator;
};
struct Indent
{
size_t pos = 0;
};
struct Commentary : public AccumulatingState
{};
struct Prompt : AccumulatingState
{
size_t pos = 0;
};
struct Command : public AccumulatingState
{};
struct OutputLine : public AccumulatingState
{};
using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
State state_;
constexpr static auto stateDebug(State const&) -> const char *;
const std::string prompt_;
const size_t indent_;
/** Last line was output, so we consider a blank to be part of the output */
bool lastWasOutput_;
std::vector<Node> syntax_;
void transition(State newState);
void onNewline();
};
// Override gtest printing for lists of nodes
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os);
};