Implement a parser for a literate testing system for the repl

This parser can be reused for other purposes. It's inspired by
https://bitheap.org/cram/

Although eelco's impostor exists https://github.com/mobusoperandi/eelco,
it is not very nice to depend on out of tree testing frameworks with no
way to customize them.

Change-Id: Ifca50177e09730182baf0ebf829c3505bbb0274a
This commit is contained in:
jade 2024-03-09 23:59:50 -08:00
parent 06952cf7c4
commit ed95b02215
10 changed files with 412 additions and 1 deletions

View file

@ -41,6 +41,7 @@ makefiles += \
tests/functional/ca/local.mk \
tests/functional/dyn-drv/local.mk \
tests/functional/test-libstoreconsumer/local.mk \
tests/functional/repl_characterization/local.mk \
tests/functional/plugins/local.mk
else
makefiles += \

View file

@ -0,0 +1 @@
test-repl-characterization

View file

@ -0,0 +1,11 @@
Commentary "meow meow meow"
Command "command"
Output "output output one"
Output ""
Output ""
Output "output output two"
Commentary "meow meow"
Command "command two"
Output "output output output"
Commentary "commentary"
Output "output output output"

View file

@ -0,0 +1,11 @@
meow meow meow
nix-repl> command
output output one
output output two
meow meow
nix-repl> command two
output output output
commentary
output output output

View file

@ -0,0 +1,19 @@
programs += test-repl-characterization
installcheck: test-repl-characterization_RUN
test-repl-characterization_DIR := $(d)
test-repl-characterization_ENV := _NIX_TEST_UNIT_DATA=$(shell realpath "$(d)")/data
# do not install
test-repl-characterization_INSTALL_DIR :=
test-repl-characterization_SOURCES := \
$(wildcard $(d)/*.cc) \
test-repl-characterization_CXXFLAGS += -I src/libutil -I tests/unit/libutil-support
test-repl-characterization_LIBS = libutil libutil-test-support
test-repl-characterization_LDFLAGS = $(THREAD_LDFLAGS) $(SODIUM_LIBS) $(EDITLINE_LIBS) $(BOOST_LDFLAGS) $(LOWDOWN_LIBS) $(GTEST_LIBS)

View file

@ -0,0 +1,42 @@
#include <gtest/gtest.h>
#include <string>
#include <string_view>
#include <optional>
#include <unistd.h>
#include "tests/characterization.hh"
#include "tests/cli-literate-parser.hh"
using namespace std::string_literals;
namespace nix {
static constexpr const char * REPL_PROMPT = "nix-repl> ";
class ReplSessionTest : public CharacterizationTest
{
Path unitTestData = getUnitTestData();
public:
Path goldenMaster(std::string_view testStem) const override
{
return unitTestData + "/" + testStem;
}
};
TEST_F(ReplSessionTest, parses)
{
writeTest("basic.ast", [this]() {
const std::string content = readFile(goldenMaster("basic.test"));
auto parser = CLILiterateParser{REPL_PROMPT};
parser.feed(content);
std::ostringstream out{};
for (auto & bit : parser.syntax()) {
out << bit.print() << "\n";
}
return out.str();
});
}
};

View file

@ -10,6 +10,7 @@ libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc)
libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES)
libutil-test-support_LIBS = libutil
# libexpr so we can steal their string printer from print.cc
libutil-test-support_LIBS = libutil libexpr
libutil-test-support_LDFLAGS := -pthread -lrapidcheck

View file

@ -0,0 +1,174 @@
#include "cli-literate-parser.hh"
#include "libexpr/print.hh"
#include "debug-char.hh"
#include "types.hh"
#include "util.hh"
#include <iostream>
#include <memory>
#include <boost/algorithm/string/trim.hpp>
using namespace std::string_literals;
namespace nix {
static constexpr const bool DEBUG_PARSER = false;
constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char *
{
return std::visit(
overloaded{// clang-format off
[](Indent const&) -> const char * { return "indent"; },
[](Commentary const&) -> const char * { return "indent"; },
[](Prompt const&) -> const char * { return "prompt"; },
[](Command const&) -> const char * { return "command"; },
[](OutputLine const&) -> const char * { return "output_line"; }},
// clang-format on
s);
}
auto CLILiterateParser::Node::print() const -> std::string
{
std::ostringstream s{};
switch (kind) {
case NodeKind::COMMENTARY:
s << "Commentary ";
break;
case NodeKind::COMMAND:
s << "Command ";
break;
case NodeKind::OUTPUT:
s << "Output ";
break;
}
printLiteralString(s, this->text);
return s.str();
}
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os)
{
for (auto & node : nodes) {
*os << node.print() << "\\n";
}
}
auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node>
{
CLILiterateParser p{std::move(prompt), indent};
p.feed(input);
return std::move(p).intoSyntax();
}
auto CLILiterateParser::intoSyntax() && -> std::vector<Node>
{
return std::move(this->syntax_);
}
CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent)
: state_(indent == 0 ? State(Prompt{}) : State(Indent{}))
, prompt_(prompt)
, indent_(indent)
, lastWasOutput_(false)
, syntax_{}
{
assert(!prompt.empty());
}
void CLILiterateParser::feed(char c)
{
if constexpr (DEBUG_PARSER) {
std::cout << stateDebug(state_) << " " << DebugChar{c} << "\n";
}
if (c == '\n') {
onNewline();
return;
}
std::visit(
overloaded{
[&](Indent & s) {
if (c == ' ') {
if (++s.pos >= indent_) {
transition(Prompt{});
}
} else {
transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}});
}
},
[&](Prompt & s) {
if (s.pos >= prompt_.length()) {
transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}});
return;
} else if (c == prompt_[s.pos]) {
// good prompt character
++s.pos;
} else {
// didn't match the prompt, so it must have actually been output.
s.lineAccumulator.push_back(c);
transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
return;
}
s.lineAccumulator.push_back(c);
},
[&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
state_);
}
void CLILiterateParser::onNewline()
{
State lastState = std::move(state_);
bool newLastWasOutput = false;
syntax_.push_back(std::visit(
overloaded{
[&](Indent & s) {
// XXX: technically this eats trailing spaces
// a newline following output is considered part of that output
if (lastWasOutput_) {
newLastWasOutput = true;
return Node::mkOutput("");
}
return Node::mkCommentary("");
},
[&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); },
[&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); },
[&](OutputLine & s) {
newLastWasOutput = true;
return Node::mkOutput(std::move(s.lineAccumulator));
},
[&](Prompt & s) {
// INDENT followed by newline is also considered a blank output line
return Node::mkOutput(std::move(s.lineAccumulator));
}},
lastState));
transition(Indent{});
lastWasOutput_ = newLastWasOutput;
}
void CLILiterateParser::feed(std::string_view s)
{
for (char ch : s) {
feed(ch);
}
}
void CLILiterateParser::transition(State new_state)
{
// When we expect INDENT and we are parsing without indents, commentary
// cannot exist, so we want to transition directly into PROMPT before
// resuming normal processing.
if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) {
new_state = Prompt{AccumulatingState{}, i->pos};
}
state_ = new_state;
}
auto CLILiterateParser::syntax() const -> std::vector<Node> const &
{
return syntax_;
}
};

View file

@ -0,0 +1,127 @@
#pragma once
///@file
#include <compare>
#include <memory>
#include <sstream>
#include <variant>
#include <vector>
#include <string>
namespace nix {
/*
* A DFA parser for literate test cases for CLIs.
*
* FIXME: implement merging of these, so you can auto update cases that have
* comments.
*
* Format:
* COMMENTARY
* INDENT PROMPT COMMAND
* INDENT OUTPUT
*
* e.g.
* commentary commentary commentary
* nix-repl> :t 1
* an integer
*
* Yields:
* Commentary "commentary commentary commentary"
* Command ":t 1"
* Output "an integer"
*
* Note: one Output line is generated for each line of the sources, because
* this is effectively necessary to be able to align them in the future to
* auto-update tests.
*/
class CLILiterateParser
{
public:
enum class NodeKind {
COMMENTARY,
COMMAND,
OUTPUT,
};
struct Node
{
NodeKind kind;
std::string text;
std::strong_ordering operator<=>(Node const &) const = default;
static Node mkCommentary(std::string text)
{
return Node{.kind = NodeKind::COMMENTARY, .text = text};
}
static Node mkCommand(std::string text)
{
return Node{.kind = NodeKind::COMMAND, .text = text};
}
static Node mkOutput(std::string text)
{
return Node{.kind = NodeKind::OUTPUT, .text = text};
}
auto print() const -> std::string;
};
CLILiterateParser(std::string prompt, size_t indent = 2);
auto syntax() const -> std::vector<Node> const &;
/** Feeds a character into the parser */
void feed(char c);
/** Feeds a string into the parser */
void feed(std::string_view s);
/** Parses an input in a non-streaming fashion */
static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
/** Consumes a CLILiterateParser and gives you the syntax out of it */
auto intoSyntax() && -> std::vector<Node>;
private:
struct AccumulatingState
{
std::string lineAccumulator;
};
struct Indent
{
size_t pos = 0;
};
struct Commentary : public AccumulatingState
{};
struct Prompt : AccumulatingState
{
size_t pos = 0;
};
struct Command : public AccumulatingState
{};
struct OutputLine : public AccumulatingState
{};
using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
State state_;
constexpr static auto stateDebug(State const&) -> const char *;
const std::string prompt_;
const size_t indent_;
/** Last line was output, so we consider a blank to be part of the output */
bool lastWasOutput_;
std::vector<Node> syntax_;
void transition(State newState);
void onNewline();
};
// Override gtest printing for lists of nodes
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os);
};

View file

@ -0,0 +1,24 @@
///@file
#include <ostream>
#include <boost/io/ios_state.hpp>
namespace nix {
struct DebugChar
{
char c;
};
inline std::ostream & operator<<(std::ostream & s, DebugChar c)
{
boost::io::ios_flags_saver _ifs(s);
if (isprint(c.c)) {
s << static_cast<char>(c.c);
} else {
s << std::hex << "0x" << (static_cast<unsigned int>(c.c) & 0xff);
}
return s;
}
}