Rewrite REPL test parser

- Use a recursive descent parser so that it's easy to extend.
- Add `@args` to enable customizing command-line arguments
- Add `@should-start` to enable `nix repl` tests that error before
  entering the REPL
- Make sure to read all stdout output before comparing. This catches
  some extra output we were tossing out before!

Change-Id: I5522555df4c313024ab15cd10f9f04e7293bda3a
This commit is contained in:
Rebecca Turner 2024-03-22 16:45:05 -07:00
parent f3f7d81988
commit 83729b2075
15 changed files with 857 additions and 422 deletions

View file

@ -1,17 +1,28 @@
Commentary "meow meow meow"
Command "command"
Output "output output one"
Output ""
Output ""
Output "output output two"
Commentary "meow meow"
Command "command two"
Output "output output output"
Commentary "commentary"
Output "output output output"
Output ""
Commentary "the blank below should be chomped"
Command "command three"
Commentary ""
Output "meow output"
Output ""
Commentary: "meow meow meow"
Indent: " "
Prompt: "nix-repl> "
Command: "command"
Indent: " "
Output: "output output one"
Output: ""
Commentary: ""
Indent: " "
Output: "output output two"
Commentary: "meow meow"
Indent: " "
Prompt: "nix-repl> "
Command: "command two"
Indent: " "
Output: "output output output"
Commentary: "commentary"
Indent: " "
Output: "output output output"
Output: ""
Commentary: "the blank below should be chomped"
Indent: " "
Prompt: "nix-repl> "
Command: "command three"
Commentary: ""
Indent: " "
Output: "meow output"
Output: ""

View file

@ -1,10 +1,9 @@
Command "command"
Output "output output one"
Output ""
Output ""
Output "output output two"
Command "command two"
Output "output output output"
Output "output output output"
Command "command three"
Output "meow output"
Command: "command"
Output: "output output one"
Output: ""
Output: "output output two"
Command: "command two"
Output: "output output output"
Output: "output output output"
Command: "command three"
Output: "meow output"

View file

@ -0,0 +1,11 @@
command
output output one
output output two
command two
output output output
output output output
command three
meow output

View file

@ -1,6 +1,7 @@
https://github.com/NixOS/nix/pull/9917 (Enter debugger more reliably in let expressions and function calls)
This test ensures that continues don't skip opportunities to enter the debugger.
@args --debugger
trace: before outer break
info: breakpoint reached
@ -13,7 +14,7 @@ This test ensures that continues don't skip opportunities to enter the debugger.
0: error: breakpoint reached
«none»:0
1: while calling a function
TEST_DATA/regression_9917.nix:3:5
$TEST_DATA/regression_9917.nix:3:5
2| a = builtins.trace "before inner break" (
3| builtins.break { msg = "hello"; }
@ -21,7 +22,7 @@ This test ensures that continues don't skip opportunities to enter the debugger.
4| );
2: while calling a function
TEST_DATA/regression_9917.nix:2:7
$TEST_DATA/regression_9917.nix:2:7
1| let
2| a = builtins.trace "before inner break" (

View file

@ -1,3 +1,4 @@
@args --debugger
error:
… while evaluating the error message passed to builtin.throw
@ -14,3 +15,18 @@ We expect to be able to see locals like r in the debugger:
Env level 1
abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true
nix-repl> :quit
error:
… while evaluating the file '$TEST_DATA/regression_9918.nix':
… while calling the 'throw' builtin
at $TEST_DATA/regression_9918.nix:3:7:
2| r = [];
3| x = builtins.throw r;
| ^
4| in
… while evaluating the error message passed to builtin.throw
error: cannot coerce a list to a string: [ ]

View file

@ -1,5 +1,5 @@
with { inherit ({}) invalid; };
let
x = builtins.break 1;
puppy = "doggy";
in
x
builtins.break { }

View file

@ -1,3 +1,4 @@
@args --debugger
info: breakpoint reached
debugger should not crash now, but also not show any with variables
@ -5,10 +6,21 @@ debugger should not crash now, but also not show any with variables
0: error: breakpoint reached
«none»:0
Env level 0
static: x
static: puppy
Env level 1
static:
Env level 2
abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true
error:
… while evaluating the file '$TEST_DATA/regression_l145.nix':
… while calling the 'break' builtin
at $TEST_DATA/regression_l145.nix:5:3:
4| in
5| builtins.break { }
| ^
6|
error: breakpoint reached\n

View file

@ -1,3 +1,4 @@
@args --debugger
trace: before outer break
info: breakpoint reached
@ -24,7 +25,7 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n
nix-repl> :st 3
3: while calling a function
TEST_DATA/stack_vars.nix:5:7
$TEST_DATA/stack_vars.nix:5:7
4| );
5| b = builtins.trace "before outer break" (
@ -58,9 +59,8 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n
3
nix-repl> :st 3
3: while calling a function
TEST_DATA/stack_vars.nix:2:7
$TEST_DATA/stack_vars.nix:2:7
1| let
2| a = builtins.trace "before inner break" (
@ -72,3 +72,21 @@ If we :st past the frame in the backtrace with the meow in it, the meow should n
Env level 1
abort baseNameOf break builtins derivation derivationStrict dirOf false fetchGit fetchMercurial fetchTarball fetchTree fromTOML import isNull map null placeholder removeAttrs scopedImport throw toString true
nix-repl> :quit
error:
… while calling the 'trace' builtin
at $TEST_DATA/stack_vars.nix:2:7:
1| let
2| a = builtins.trace "before inner break" (
| ^
3| let meow' = 3; in builtins.break { msg = "hello"; }
… while calling the 'break' builtin
at $TEST_DATA/stack_vars.nix:3:23:
2| a = builtins.trace "before inner break" (
3| let meow' = 3; in builtins.break { msg = "hello"; }
| ^
4| );
error: breakpoint reached

View file

@ -1,16 +1,17 @@
#include <gtest/gtest.h>
#include <boost/algorithm/string/replace.hpp>
#include <optional>
#include <string>
#include <string_view>
#include <optional>
#include <unistd.h>
#include <boost/algorithm/string/replace.hpp>
#include "escape-string.hh"
#include "test-session.hh"
#include "util.hh"
#include "tests/characterization.hh"
#include "tests/cli-literate-parser.hh"
#include "tests/terminal-code-eater.hh"
#include "util.hh"
using namespace std::string_literals;
@ -40,91 +41,147 @@ public:
return unitTestData + "/" + testStem;
}
void runReplTest(std::string_view const & content, std::vector<std::string> extraArgs = {}) const
void runReplTest(const std::string content, std::vector<std::string> extraArgs = {}) const
{
auto syntax = CLILiterateParser::parse(std::string(REPL_PROMPT), content);
auto parsed = cli_literate_parser::parse(
content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT), .indent = 2}
);
parsed.interpolatePwd(unitTestData);
// FIXME: why does this need two --quiets
// show-trace is on by default due to test configuration, but is not a standard
Strings args{"--quiet", "repl", "--quiet", "--option", "show-trace", "false", "--offline", "--extra-experimental-features", "repl-automation"};
// show-trace is on by default due to test configuration, but is not a
// standard
Strings args{
"--quiet",
"repl",
"--quiet",
"--option",
"show-trace",
"false",
"--offline",
"--extra-experimental-features",
"repl-automation",
};
args.insert(args.end(), extraArgs.begin(), extraArgs.end());
args.insert(args.end(), parsed.args.begin(), parsed.args.end());
auto nixBin = canonPath(getEnvNonEmpty("NIX_BIN_DIR").value_or(NIX_BIN_DIR));
auto process = RunningProcess::start(nixBin + "/nix", args);
auto session = TestSession{std::string(AUTOMATION_PROMPT), std::move(process)};
auto session = TestSession(std::string(AUTOMATION_PROMPT), std::move(process));
for (auto & bit : syntax) {
if (bit.kind != CLILiterateParser::NodeKind::COMMAND) {
continue;
}
if (!session.waitForPrompt()) {
ASSERT_TRUE(false);
}
session.runCommand(bit.text);
for (auto & event : parsed.syntax) {
std::visit(
overloaded{
[&](const cli_literate_parser::Command & e) {
ASSERT_TRUE(session.waitForPrompt());
if (e.text == ":quit") {
// If we quit the repl explicitly, we won't have a
// prompt when we're done.
parsed.shouldStart = false;
}
session.runCommand(e.text);
},
[&](const auto & e) {},
},
event
);
}
if (!session.waitForPrompt()) {
ASSERT_TRUE(false);
if (parsed.shouldStart) {
ASSERT_TRUE(session.waitForPrompt());
}
session.close();
auto replacedOutLog = boost::algorithm::replace_all_copy(session.outLog, unitTestData, "TEST_DATA");
auto replacedOutLog =
boost::algorithm::replace_all_copy(session.outLog, unitTestData, "$TEST_DATA");
auto cleanedOutLog = trimOutLog(replacedOutLog);
auto parsedOutLog = CLILiterateParser::parse(std::string(AUTOMATION_PROMPT), cleanedOutLog, 0);
auto parsedOutLog = cli_literate_parser::parse(
std::string(cleanedOutLog),
cli_literate_parser::Config{.prompt = std::string(AUTOMATION_PROMPT), .indent = 0}
);
parsedOutLog = CLILiterateParser::tidyOutputForComparison(std::move(parsedOutLog));
syntax = CLILiterateParser::tidyOutputForComparison(std::move(syntax));
auto expected = parsed.tidyOutputForComparison();
auto actual = parsedOutLog.tidyOutputForComparison();
ASSERT_EQ(parsedOutLog, syntax);
ASSERT_EQ(expected, actual);
}
void runReplTestPath(const std::string_view & nameBase, std::vector<std::string> extraArgs)
{
auto nixPath = goldenMaster(nameBase + ".nix");
if (pathExists(nixPath)) {
extraArgs.push_back("-f");
extraArgs.push_back(nixPath);
}
readTest(nameBase + ".test", [this, extraArgs](std::string input) {
runReplTest(input, extraArgs);
});
}
void runReplTestPath(const std::string_view & nameBase)
{
runReplTestPath(nameBase, {});
}
void runDebuggerTest(const std::string_view & nameBase)
{
runReplTestPath(nameBase, {"--debugger"});
}
};
TEST_F(ReplSessionTest, parses)
TEST_F(ReplSessionTest, round_trip)
{
writeTest("basic.test", [this]() {
const std::string content = readFile(goldenMaster("basic.test"));
auto parsed = cli_literate_parser::parse(
content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)}
);
std::ostringstream out{};
for (auto & node : parsed.syntax) {
cli_literate_parser::unparseNode(out, node, true);
}
return out.str();
});
}
TEST_F(ReplSessionTest, tidy)
{
writeTest("basic.ast", [this]() {
const std::string content = readFile(goldenMaster("basic.test"));
auto parser = CLILiterateParser{std::string(REPL_PROMPT)};
parser.feed(content);
auto parsed = cli_literate_parser::parse(
content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)}
);
std::ostringstream out{};
for (auto & bit : parser.syntax()) {
out << bit.print() << "\n";
for (auto & node : parsed.syntax) {
out << debugNode(node) << "\n";
}
return out.str();
});
writeTest("basic_tidied.ast", [this]() {
const std::string content = readFile(goldenMaster("basic.test"));
auto syntax = CLILiterateParser::parse(std::string(REPL_PROMPT), content);
syntax = CLILiterateParser::tidyOutputForComparison(std::move(syntax));
auto parsed = cli_literate_parser::parse(
content, cli_literate_parser::Config{.prompt = std::string(REPL_PROMPT)}
);
auto tidied = parsed.tidyOutputForComparison();
std::ostringstream out{};
for (auto & bit : syntax) {
out << bit.print() << "\n";
for (auto & node : tidied) {
out << debugNode(node) << "\n";
}
return out.str();
});
}
TEST_F(ReplSessionTest, repl_basic)
{
readTest("basic_repl.test", [this](std::string input) { runReplTest(input); });
}
#define DEBUGGER_TEST(name) \
#define REPL_TEST(name) \
TEST_F(ReplSessionTest, name) \
{ \
readTest(#name ".test", [this](std::string input) { \
runReplTest(input, {"--debugger", "-f", goldenMaster(#name ".nix")}); \
}); \
{ \
runReplTestPath(#name); \
}
DEBUGGER_TEST(regression_9918);
DEBUGGER_TEST(regression_9917);
DEBUGGER_TEST(regression_l145);
DEBUGGER_TEST(stack_vars);
REPL_TEST(regression_9918);
REPL_TEST(regression_9917);
REPL_TEST(stack_vars);
REPL_TEST(basic_repl);
};
}; // namespace nix

View file

@ -1,4 +1,5 @@
#include <iostream>
#include <span>
#include <unistd.h>
#include "test-session.hh"
@ -21,14 +22,17 @@ RunningProcess RunningProcess::start(std::string executable, Strings args)
// This is separate from runProgram2 because we have different IO requirements
pid_t pid = startProcess([&]() {
if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1)
if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1) {
throw SysError("dupping stdout");
if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1)
}
if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1) {
throw SysError("dupping stdin");
}
procStdin.writeSide.close();
procStdout.readSide.close();
if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1)
if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1) {
throw SysError("dupping stderr");
}
execv(executable.c_str(), stringsToCharPtrs(args).data());
throw SysError("exec did not happen");
});
@ -44,7 +48,8 @@ RunningProcess RunningProcess::start(std::string executable, Strings args)
}
[[gnu::unused]]
std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s)
std::ostream &
operator<<(std::ostream & os, ReplOutputParser::State s)
{
switch (s) {
case ReplOutputParser::State::Prompt:
@ -91,8 +96,7 @@ bool ReplOutputParser::feed(char c)
return false;
}
/** Waits for the prompt and then returns if a prompt was found */
bool TestSession::waitForPrompt()
bool TestSession::readOutThen(ReadOutThenCallback cb)
{
std::vector<char> buf(1024);
@ -106,38 +110,67 @@ bool TestSession::waitForPrompt()
return false;
}
switch (cb(std::span(buf.data(), res))) {
case ReadOutThenCallbackResult::Stop:
return true;
case ReadOutThenCallbackResult::Continue:
continue;
}
}
}
bool TestSession::waitForPrompt()
{
bool notEof = readOutThen([&](std::span<char> s) -> ReadOutThenCallbackResult {
bool foundPrompt = false;
for (ssize_t i = 0; i < res; ++i) {
for (auto ch : s) {
// foundPrompt = foundPrompt || outputParser.feed(buf[i]);
bool wasEaten = true;
eater.feed(buf[i], [&](char c) {
eater.feed(ch, [&](char c) {
wasEaten = false;
foundPrompt = outputParser.feed(buf[i]) || foundPrompt;
foundPrompt = outputParser.feed(ch) || foundPrompt;
outLog.push_back(c);
});
if constexpr (DEBUG_REPL_PARSER) {
std::cerr << "raw " << MaybeHexEscapedChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n";
std::cerr << "raw " << MaybeHexEscapedChar{ch} << (wasEaten ? " [eaten]" : "") << "\n";
}
}
if (foundPrompt) {
return true;
return foundPrompt ? ReadOutThenCallbackResult::Stop : ReadOutThenCallbackResult::Continue;
});
return notEof;
}
void TestSession::wait()
{
readOutThen([&](std::span<char> s) {
for (auto ch : s) {
eater.feed(ch, [&](char c) {
outputParser.feed(c);
outLog.push_back(c);
});
}
}
// just keep reading till we hit eof
return ReadOutThenCallbackResult::Continue;
});
}
void TestSession::close()
{
proc.procStdin.close();
wait();
proc.procStdout.close();
}
void TestSession::runCommand(std::string command)
{
if constexpr (DEBUG_REPL_PARSER)
if constexpr (DEBUG_REPL_PARSER) {
std::cerr << "runCommand " << command << "\n";
}
command += "\n";
// We have to feed a newline into the output parser, since Nix might not
// give us a newline before a prompt in all cases (it might clear line

View file

@ -1,7 +1,9 @@
#pragma once
///@file
#include <functional>
#include <sched.h>
#include <span>
#include <string>
#include "util.hh"
@ -22,8 +24,7 @@ struct RunningProcess
class ReplOutputParser
{
public:
ReplOutputParser(std::string prompt)
: prompt(prompt)
ReplOutputParser(std::string prompt) : prompt(prompt)
{
assert(!prompt.empty());
}
@ -60,10 +61,27 @@ struct TestSession
{
}
/** Waits for the prompt and then returns if a prompt was found */
bool waitForPrompt();
/** Feeds a line of input into the command */
void runCommand(std::string command);
/** Closes the session, closing standard input and waiting for standard
* output to close, capturing any remaining output. */
void close();
private:
/** Waits until the command closes its output */
void wait();
enum class ReadOutThenCallbackResult { Stop, Continue };
using ReadOutThenCallback = std::function<ReadOutThenCallbackResult(std::span<char>)>;
/** Reads some chunks of output, calling the callback provided for each
* chunk and stopping if it returns Stop.
*
* @returns false if EOF, true if the callback requested we stop first.
* */
bool readOutThen(ReadOutThenCallback cb);
};
};

View file

@ -8,7 +8,7 @@ libutil-test-support_INSTALL_DIR :=
libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc)
libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES)
libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) -I src/libutil
# libexpr so we can steal their string printer from print.cc
libutil-test-support_LIBS = libutil libexpr

View file

@ -74,20 +74,20 @@ public:
{
auto file = goldenMaster(testStem);
auto got = test();
auto actual = test();
if (testAccept())
{
createDirs(dirOf(file));
writeFile2(file, got);
writeFile2(file, actual);
GTEST_SKIP()
<< "Updating golden master "
<< file;
}
else
{
decltype(got) expected = readFile2(file);
ASSERT_EQ(got, expected);
decltype(actual) expected = readFile2(file);
ASSERT_EQ(expected, actual);
}
}

View file

@ -1,248 +1,444 @@
#include "cli-literate-parser.hh"
#include "escape-string.hh"
#include "libexpr/print.hh"
#include "escape-char.hh"
#include "libexpr/print.hh"
#include "types.hh"
#include "util.hh"
#include <ranges>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <iostream>
#include <memory>
#include <boost/algorithm/string/trim.hpp>
#include <sstream>
#include <variant>
using namespace std::string_literals;
namespace nix {
#include "cli-literate-parser.hh"
#include "escape-string.hh"
#include "fmt.hh"
#include "libexpr/print.hh"
#include "shlex.hh"
#include "types.hh"
#include "util.hh"
static constexpr const bool DEBUG_PARSER = false;
constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char *
{
return std::visit(
overloaded{// clang-format off
[](Indent const&) -> const char * { return "indent"; },
[](Commentary const&) -> const char * { return "indent"; },
[](Prompt const&) -> const char * { return "prompt"; },
[](Command const&) -> const char * { return "command"; },
[](OutputLine const&) -> const char * { return "output_line"; }},
// clang-format on
s);
}
using namespace std::string_literals;
using namespace boost::algorithm;
auto CLILiterateParser::Node::print() const -> std::string
{
std::ostringstream s{};
switch (kind) {
case NodeKind::COMMENTARY:
s << "Commentary ";
break;
case NodeKind::COMMAND:
s << "Command ";
break;
case NodeKind::OUTPUT:
s << "Output ";
break;
}
escapeString(s, this->text);
return s.str();
}
namespace nix {
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os)
{
for (auto & node : nodes) {
*os << node.print() << "\\n";
}
}
namespace cli_literate_parser {
auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node>
struct Parser
{
CLILiterateParser p{std::move(prompt), indent};
p.feed(input);
return std::move(p).intoSyntax();
}
auto CLILiterateParser::intoSyntax() && -> std::vector<Node>
{
return std::move(this->syntax_);
}
CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent)
: state_(indent == 0 ? State(Prompt{}) : State(Indent{}))
, prompt_(prompt)
, indent_(indent)
, lastWasOutput_(false)
, syntax_{}
{
assert(!prompt.empty());
}
void CLILiterateParser::feed(char c)
{
if constexpr (DEBUG_PARSER) {
std::cout << stateDebug(state_) << " " << MaybeHexEscapedChar{c} << "\n";
Parser(const std::string input, Config config)
: input(input)
, rest(this->input)
, prompt(config.prompt)
, indentString(std::string(config.indent, ' '))
, lastWasOutput(false)
, syntax{}
{
assert(!prompt.empty());
}
if (c == '\n') {
onNewline();
return;
const std::string input;
std::string_view rest;
const std::string prompt;
const std::string indentString;
/** Last line was output, so we consider a blank to be part of the output */
bool lastWasOutput;
/**
* Nodes of syntax being built.
*/
std::vector<Node> syntax;
auto dbg(std::string_view state) -> void
{
std::cout << state << ": ";
escapeString(
std::cout,
rest,
{
.maxLength = 40,
.ansiColors = true,
.escapeNonPrinting = true,
}
);
std::cout << std::endl;
}
std::visit(
overloaded{
[&](Indent & s) {
if (c == ' ') {
if (++s.pos >= indent_) {
transition(Prompt{});
}
} else {
transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}});
}
},
[&](Prompt & s) {
if (s.pos >= prompt_.length()) {
transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}});
return;
} else if (c == prompt_[s.pos]) {
// good prompt character
++s.pos;
} else {
// didn't match the prompt, so it must have actually been output.
s.lineAccumulator.push_back(c);
transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
return;
}
s.lineAccumulator.push_back(c);
},
[&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
state_);
}
void CLILiterateParser::onNewline()
{
State lastState = std::move(state_);
bool newLastWasOutput = false;
syntax_.push_back(std::visit(
overloaded{
[&](Indent & s) {
// XXX: technically this eats trailing spaces
// a newline following output is considered part of that output
if (lastWasOutput_) {
newLastWasOutput = true;
return Node::mkOutput("");
}
return Node::mkCommentary("");
},
[&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); },
[&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); },
[&](OutputLine & s) {
newLastWasOutput = true;
return Node::mkOutput(std::move(s.lineAccumulator));
},
[&](Prompt & s) {
// INDENT followed by newline is also considered a blank output line
return Node::mkOutput(std::move(s.lineAccumulator));
}},
lastState));
transition(Indent{});
lastWasOutput_ = newLastWasOutput;
}
void CLILiterateParser::feed(std::string_view s)
{
for (char ch : s) {
feed(ch);
}
}
void CLILiterateParser::transition(State new_state)
{
// When we expect INDENT and we are parsing without indents, commentary
// cannot exist, so we want to transition directly into PROMPT before
// resuming normal processing.
if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) {
new_state = Prompt{AccumulatingState{}, i->pos};
template<typename T>
auto pushNode(T node) -> void
{
if constexpr (DEBUG_PARSER) {
std::cout << debugNode(node);
}
syntax.emplace_back(node);
}
state_ = new_state;
}
auto CLILiterateParser::syntax() const -> std::vector<Node> const &
{
return syntax_;
}
auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
-> std::string
{
std::string indent_str(indent, ' ');
std::ostringstream out{};
for (auto & node : syntax) {
switch (node.kind) {
case NodeKind::COMMENTARY:
out << node.text << "\n";
break;
case NodeKind::COMMAND:
out << indent_str << prompt << node.text << "\n";
break;
case NodeKind::OUTPUT:
out << indent_str << node.text << "\n";
break;
auto parseLiteral(const char c) -> bool
{
if (rest.starts_with(c)) {
rest.remove_prefix(1);
return true;
} else {
return false;
}
}
return out.str();
}
auto parseLiteral(const std::string_view & literal) -> bool
{
if (rest.starts_with(literal)) {
rest.remove_prefix(literal.length());
return true;
} else {
return false;
}
}
auto CLILiterateParser::tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node>
auto parseBool() -> bool
{
auto result = false;
if (parseLiteral("true")) {
result = true;
} else if (parseLiteral("false")) {
result = false;
} else {
throw ParseError("true or false", std::string(rest));
}
auto untilNewline = parseUntilNewline();
if (!untilNewline.empty()) {
throw ParseError("nothing after true or false", untilNewline);
}
return result;
}
auto parseUntilNewline() -> std::string
{
auto pos = rest.find('\n');
if (pos == std::string_view::npos) {
throw ParseError("text and then newline", std::string(rest));
} else {
// `parseOutput()` sets this to true anyways.
lastWasOutput = false;
auto result = std::string(rest, 0, pos);
rest.remove_prefix(pos + 1);
return result;
}
}
auto parseIndent() -> bool
{
if constexpr (DEBUG_PARSER) {
dbg("indent");
}
if (indentString.empty()) {
return true;
}
if (parseLiteral(indentString)) {
pushNode(Indent(indentString));
return true;
} else {
if constexpr (DEBUG_PARSER) {
dbg("indent failed");
}
return false;
}
}
auto parseCommand() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("command");
}
auto untilNewline = parseUntilNewline();
pushNode(Command(untilNewline));
}
auto parsePrompt() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("prompt");
}
if (parseLiteral(prompt)) {
pushNode(Prompt(prompt));
if (rest.empty()) {
return;
}
parseCommand();
} else {
parseOutput();
}
}
auto parseOutput() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("output");
}
auto untilNewline = parseUntilNewline();
pushNode(Output(untilNewline));
lastWasOutput = true;
}
auto parseAtSign() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("@ symbol");
}
if (!parseLiteral('@')) {
parseOutputOrCommentary();
}
if (parseLiteral("args ")) {
parseArgs();
} else if (parseLiteral("should-start ")) {
if constexpr (DEBUG_PARSER) {
dbg("@should-start");
}
auto shouldStart = parseBool();
pushNode(ShouldStart{shouldStart});
}
}
auto parseArgs() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("@args");
}
auto untilNewline = parseUntilNewline();
pushNode(Args(untilNewline));
}
auto parseOutputOrCommentary() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("output/commentary");
}
auto oldLastWasOutput = lastWasOutput;
auto untilNewline = parseUntilNewline();
auto trimmed = trim_right_copy(untilNewline);
if (oldLastWasOutput && trimmed.empty()) {
pushNode(Output{trimmed});
} else {
pushNode(Commentary{untilNewline});
}
}
auto parseStartOfLine() -> void
{
if constexpr (DEBUG_PARSER) {
dbg("start of line");
}
if (parseIndent()) {
parsePrompt();
} else {
parseAtSign();
}
}
auto parse() && -> ParseResult
{
// Begin the recursive descent parser at the start of a new line.
while (!rest.empty()) {
parseStartOfLine();
}
return std::move(*this).intoParseResult();
}
auto intoParseResult() && -> ParseResult
{
// Do another pass over the nodes to produce auxiliary results like parsed
// command line arguments.
std::vector<std::string> args;
std::vector<Node> newSyntax;
auto shouldStart = true;
for (auto it = syntax.begin(); it != syntax.end(); ++it) {
Node node = std::move(*it);
std::visit(
overloaded{
[&](Args & e) {
auto split = shell_split(std::string(e.text));
args.insert(args.end(), split.begin(), split.end());
},
[&](ShouldStart & e) { shouldStart = e.shouldStart; },
[&](auto & e) {},
},
node
);
newSyntax.push_back(node);
}
return ParseResult{
.syntax = std::move(newSyntax),
.args = std::move(args),
.shouldStart = shouldStart,
};
}
};
template<typename View>
auto tidySyntax(View syntax) -> std::vector<Node>
{
std::vector<Node> newSyntax{};
// Note: Setting `lastWasCommand` lets us trim blank lines at the start and
// end of the output stream.
auto lastWasCommand = true;
std::vector<Node> newSyntax;
// Eat trailing newlines, so assume that the very end was actually a command
bool lastWasCommand = true;
bool newLastWasCommand = true;
auto v = std::ranges::reverse_view(syntax);
for (auto it = v.begin(); it != v.end(); ++it) {
Node item = std::move(*it);
lastWasCommand = newLastWasCommand;
// chomp commentary
if (item.kind == NodeKind::COMMENTARY) {
for (auto it = syntax.begin(); it != syntax.end(); ++it) {
Node node = *it;
// Only compare `Command` and `Output` nodes.
if (std::visit([&](auto && e) { return !e.shouldCompare(); }, node)) {
continue;
}
if (item.kind == NodeKind::COMMAND) {
newLastWasCommand = true;
// Remove blank lines before and after commands. This lets us keep nice
// whitespace in the test files.
auto shouldKeep = std::visit(
overloaded{
[&](Command & e) {
lastWasCommand = true;
auto trimmed = trim_right_copy(e.text);
if (trimmed.empty()) {
return false;
} else {
e.text = trimmed;
return true;
}
},
[&](Output & e) {
std::string trimmed = trim_right_copy(e.text);
if (lastWasCommand && trimmed.empty()) {
// NB: Keep `lastWasCommand` true in this branch so we
// can keep pruning empty output lines.
return false;
} else {
e.text = trimmed;
lastWasCommand = false;
return true;
}
},
[&](auto & e) {
lastWasCommand = false;
return false;
},
},
node
);
if (item.text == "") {
// chomp empty commands
continue;
}
if (shouldKeep) {
newSyntax.push_back(node);
}
if (item.kind == NodeKind::OUTPUT) {
// TODO: horrible
bool nextIsCommand = (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND;
std::string trimmedText = boost::algorithm::trim_right_copy(item.text);
if ((lastWasCommand || nextIsCommand) && trimmedText == "") {
// chomp empty text above or directly below commands
continue;
}
// real output, stop chomping
newLastWasCommand = false;
item = Node::mkOutput(std::move(trimmedText));
}
newSyntax.push_back(std::move(item));
}
std::reverse(newSyntax.begin(), newSyntax.end());
return newSyntax;
}
};
auto ParseResult::tidyOutputForComparison() -> std::vector<Node>
{
auto reversed = tidySyntax(std::ranges::reverse_view(syntax));
auto unreversed = tidySyntax(std::ranges::reverse_view(reversed));
return unreversed;
}
void ParseResult::interpolatePwd(std::string_view pwd)
{
std::vector<std::string> newArgs;
for (auto & arg : args) {
newArgs.push_back(replaceStrings(arg, "${PWD}", pwd));
}
args = std::move(newArgs);
}
const char * ParseError::what() const noexcept
{
if (what_) {
return what_->c_str();
} else {
auto escaped = escapeString(rest, {.maxLength = 256, .escapeNonPrinting = true});
auto hint =
new HintFmt("Parse error: Expected %1%, got:\n%2%", expected, Uncolored(escaped));
what_ = hint->str();
return what_->c_str();
}
}
auto parse(const std::string input, Config config) -> ParseResult
{
return Parser(input, config).parse();
}
std::ostream & operator<<(std::ostream & output, const Args & node)
{
return output << "@args " << node.text;
}
std::ostream & operator<<(std::ostream & output, const ShouldStart & node)
{
return output << "@should-start " << (node.shouldStart ? "true" : "false");
}
std::ostream & operator<<(std::ostream & output, const TextNode & rhs)
{
return output << rhs.text;
}
void unparseNode(std::ostream & output, const Node & node, bool withNewline)
{
std::visit(
[&](const auto & n) { output << n << (withNewline && n.emitNewlineAfter() ? "\n" : ""); },
node
);
}
template<typename T>
std::string gtestFormat(T & value)
{
std::ostringstream formatted;
unparseNode(formatted, value, true);
auto str = formatted.str();
// Needs to be the literal string `\n` and not a newline character to
// trigger gtest diff printing. Yes seriously.
boost::algorithm::replace_all(str, "\n", "\\n");
return str;
}
void PrintTo(const std::vector<Node> & nodes, std::ostream * output)
{
for (auto & node : nodes) {
*output << gtestFormat(node);
}
}
std::string debugNode(const Node & node)
{
std::ostringstream output;
output << std::visit([](const auto & n) { return n.kind(); }, node) << ": ";
std::ostringstream contents;
unparseNode(contents, node, false);
escapeString(output, contents.str(), {.escapeNonPrinting = true});
return output.str();
}
auto ParseResult::debugPrint(std::ostream & output) -> void
{
::nix::cli_literate_parser::debugPrint(output, syntax);
}
void debugPrint(std::ostream & output, std::vector<Node> & nodes)
{
for (auto & node : nodes) {
output << debugNode(node) << std::endl;
}
}
} // namespace cli_literate_parser
} // namespace nix

View file

@ -3,132 +3,195 @@
#include <compare>
#include <memory>
#include <optional>
#include <sstream>
#include <string>
#include <variant>
#include <vector>
#include <string>
namespace nix {
namespace cli_literate_parser {
// ------------------------- NODES -------------------------
//
// To update golden test files while preserving commentary output and other `@`
// directives, we need to keep commentary output around after parsing.
struct BaseNode {
virtual ~BaseNode() = default;
virtual auto shouldCompare() const -> bool { return false; }
virtual auto kind() const -> std::string = 0;
virtual auto emitNewlineAfter() const -> bool = 0;
auto operator<=>(const BaseNode &rhs) const = default;
};
/**
* A node containing text. The text should be identical to how the node was
* written in the input file.
*/
struct TextNode : BaseNode {
std::string text;
explicit TextNode(std::string text) : text(text) {}
};
std::ostream &operator<<(std::ostream &output, const TextNode &node);
#define DECLARE_TEXT_NODE(NAME, NEEDS_NEWLINE, SHOULD_COMPARE) \
struct NAME : TextNode { \
using TextNode::TextNode; \
~NAME() override = default; \
\
auto kind() const -> std::string override { return #NAME; } \
auto emitNewlineAfter() const -> bool override { return NEEDS_NEWLINE; } \
auto shouldCompare() const -> bool override { return SHOULD_COMPARE; } \
};
/* name, needsNewline, shouldCompare */
DECLARE_TEXT_NODE(Prompt, false, false)
DECLARE_TEXT_NODE(Command, true, true)
DECLARE_TEXT_NODE(Output, true, true)
DECLARE_TEXT_NODE(Commentary, true, false)
DECLARE_TEXT_NODE(Args, true, false)
DECLARE_TEXT_NODE(Indent, false, false)
#undef DECLARE_TEXT_NODE
struct ShouldStart : BaseNode {
bool shouldStart;
ShouldStart(bool shouldStart) : shouldStart(shouldStart) {}
~ShouldStart() override = default;
auto emitNewlineAfter() const -> bool override { return true; }
auto kind() const -> std::string override { return "should-start"; }
auto operator<=>(const ShouldStart &rhs) const = default;
};
std::ostream &operator<<(std::ostream &output, const ShouldStart &node);
/**
* Any syntax node, including those that are cosmetic.
*/
using Node = std::variant<Prompt, Command, Output, Commentary, Args,
ShouldStart, Indent>;
/** Unparses a node into the exact text that would have created it, including a
* newline at the end if present, if withNewline is set */
void unparseNode(std::ostream &output, const Node &node,
bool withNewline = true);
std::string debugNode(const Node &node);
void debugPrint(std::ostream &output, std::vector<Node> &nodes);
/**
* Override gtest printing for lists of nodes.
*/
void PrintTo(std::vector<Node> const &nodes, std::ostream *output);
/**
* The result of parsing a test file.
*/
struct ParseResult {
/**
* A set of nodes that can be used to reproduce the input file. This is used
* to implement updating the test files.
*/
std::vector<Node> syntax;
/**
* Extra CLI arguments.
*/
std::vector<std::string> args;
/**
* Should the program start successfully?
*/
bool shouldStart = false;
/**
* Replace `$PWD` with the given value in `args`.
*/
void interpolatePwd(std::string_view pwd);
/**
* Tidy `syntax` to remove unnecessary nodes.
*/
auto tidyOutputForComparison() -> std::vector<Node>;
auto debugPrint(std::ostream &output) -> void;
};
/**
* A parse error.
*/
struct ParseError : std::exception {
std::string expected;
std::string rest;
ParseError(std::string expected, std::string rest)
: expected(expected), rest(rest) {}
const char *what() const noexcept override;
private:
/**
* Cached formatted contents of `what()`.
*/
mutable std::optional<std::string> what_;
};
struct Config {
/**
* The prompt string to look for.
*/
std::string prompt;
/**
* The number of spaces of indent for commands and output.
*/
size_t indent = 2;
};
/*
* A DFA parser for literate test cases for CLIs.
* A recursive descent parser for literate test cases for CLIs.
*
* FIXME: implement merging of these, so you can auto update cases that have
* comments.
*
* Format:
* COMMENTARY
* INDENT PROMPT COMMAND
* INDENT OUTPUT
* Syntax:
* ```
* ( COMMENTARY
* | INDENT PROMPT COMMAND
* | INDENT OUTPUT
* | @args ARGS
* | @should-start ( true | false )) *
* ```
*
* e.g.
* ```
* commentary commentary commentary
* @args --foo
* @should-start false
* nix-repl> :t 1
* an integer
* ```
*
* Yields:
* Yields something like:
* ```
* Commentary "commentary commentary commentary"
* Args "--foo"
* ShouldStart false
* Command ":t 1"
* Output "an integer"
* ```
*
* Note: one Output line is generated for each line of the sources, because
* this is effectively necessary to be able to align them in the future to
* auto-update tests.
*/
class CLILiterateParser
{
public:
auto parse(std::string input, Config config) -> ParseResult;
enum class NodeKind {
COMMENTARY,
COMMAND,
OUTPUT,
};
struct Node
{
NodeKind kind;
std::string text;
std::strong_ordering operator<=>(Node const &) const = default;
static Node mkCommentary(std::string text)
{
return Node{.kind = NodeKind::COMMENTARY, .text = text};
}
static Node mkCommand(std::string text)
{
return Node{.kind = NodeKind::COMMAND, .text = text};
}
static Node mkOutput(std::string text)
{
return Node{.kind = NodeKind::OUTPUT, .text = text};
}
auto print() const -> std::string;
};
CLILiterateParser(std::string prompt, size_t indent = 2);
auto syntax() const -> std::vector<Node> const &;
/** Feeds a character into the parser */
void feed(char c);
/** Feeds a string into the parser */
void feed(std::string_view s);
/** Parses an input in a non-streaming fashion */
static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
/** Returns, losslessly, the string that would have generated a syntax tree */
static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
/** Consumes a CLILiterateParser and gives you the syntax out of it */
auto intoSyntax() && -> std::vector<Node>;
/** Tidies syntax to remove trailing whitespace from outputs and remove any
* empty prompts */
static auto tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node>;
private:
struct AccumulatingState
{
std::string lineAccumulator;
};
struct Indent
{
size_t pos = 0;
};
struct Commentary : public AccumulatingState
{};
struct Prompt : AccumulatingState
{
size_t pos = 0;
};
struct Command : public AccumulatingState
{};
struct OutputLine : public AccumulatingState
{};
using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
State state_;
constexpr static auto stateDebug(State const&) -> const char *;
const std::string prompt_;
const size_t indent_;
/** Last line was output, so we consider a blank to be part of the output */
bool lastWasOutput_;
std::vector<Node> syntax_;
void transition(State newState);
void onNewline();
};
// Override gtest printing for lists of nodes
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os);
};
}; // namespace cli_literate_parser
}; // namespace nix