From ed95b02215198c538c685f195077d7c6b1ec10c0 Mon Sep 17 00:00:00 2001 From: Jade Lovelace Date: Sat, 9 Mar 2024 23:59:50 -0800 Subject: [PATCH] Implement a parser for a literate testing system for the repl This parser can be reused for other purposes. It's inspired by https://bitheap.org/cram/ Although eelco's impostor exists https://github.com/mobusoperandi/eelco, it is not very nice to depend on out of tree testing frameworks with no way to customize them. Change-Id: Ifca50177e09730182baf0ebf829c3505bbb0274a --- Makefile | 1 + .../repl_characterization/.gitignore | 1 + .../repl_characterization/data/basic.ast | 11 ++ .../repl_characterization/data/basic.test | 11 ++ .../functional/repl_characterization/local.mk | 19 ++ .../repl_characterization.cc | 42 +++++ tests/unit/libutil-support/local.mk | 3 +- .../tests/cli-literate-parser.cc | 174 ++++++++++++++++++ .../tests/cli-literate-parser.hh | 127 +++++++++++++ .../unit/libutil-support/tests/debug-char.hh | 24 +++ 10 files changed, 412 insertions(+), 1 deletion(-) create mode 100644 tests/functional/repl_characterization/.gitignore create mode 100644 tests/functional/repl_characterization/data/basic.ast create mode 100644 tests/functional/repl_characterization/data/basic.test create mode 100644 tests/functional/repl_characterization/local.mk create mode 100644 tests/functional/repl_characterization/repl_characterization.cc create mode 100644 tests/unit/libutil-support/tests/cli-literate-parser.cc create mode 100644 tests/unit/libutil-support/tests/cli-literate-parser.hh create mode 100644 tests/unit/libutil-support/tests/debug-char.hh diff --git a/Makefile b/Makefile index 313f02ede..87cfc0699 100644 --- a/Makefile +++ b/Makefile @@ -41,6 +41,7 @@ makefiles += \ tests/functional/ca/local.mk \ tests/functional/dyn-drv/local.mk \ tests/functional/test-libstoreconsumer/local.mk \ + tests/functional/repl_characterization/local.mk \ tests/functional/plugins/local.mk else makefiles += \ diff --git a/tests/functional/repl_characterization/.gitignore b/tests/functional/repl_characterization/.gitignore new file mode 100644 index 000000000..4c6412c2f --- /dev/null +++ b/tests/functional/repl_characterization/.gitignore @@ -0,0 +1 @@ +test-repl-characterization diff --git a/tests/functional/repl_characterization/data/basic.ast b/tests/functional/repl_characterization/data/basic.ast new file mode 100644 index 000000000..d494b00aa --- /dev/null +++ b/tests/functional/repl_characterization/data/basic.ast @@ -0,0 +1,11 @@ +Commentary "meow meow meow" +Command "command" +Output "output output one" +Output "" +Output "" +Output "output output two" +Commentary "meow meow" +Command "command two" +Output "output output output" +Commentary "commentary" +Output "output output output" diff --git a/tests/functional/repl_characterization/data/basic.test b/tests/functional/repl_characterization/data/basic.test new file mode 100644 index 000000000..d6b8427b4 --- /dev/null +++ b/tests/functional/repl_characterization/data/basic.test @@ -0,0 +1,11 @@ +meow meow meow + nix-repl> command + output output one + + + output output two +meow meow + nix-repl> command two + output output output +commentary + output output output diff --git a/tests/functional/repl_characterization/local.mk b/tests/functional/repl_characterization/local.mk new file mode 100644 index 000000000..6eacb96d0 --- /dev/null +++ b/tests/functional/repl_characterization/local.mk @@ -0,0 +1,19 @@ +programs += test-repl-characterization + +installcheck: test-repl-characterization_RUN + +test-repl-characterization_DIR := $(d) + +test-repl-characterization_ENV := _NIX_TEST_UNIT_DATA=$(shell realpath "$(d)")/data + +# do not install +test-repl-characterization_INSTALL_DIR := + +test-repl-characterization_SOURCES := \ + $(wildcard $(d)/*.cc) \ + +test-repl-characterization_CXXFLAGS += -I src/libutil -I tests/unit/libutil-support + +test-repl-characterization_LIBS = libutil libutil-test-support + +test-repl-characterization_LDFLAGS = $(THREAD_LDFLAGS) $(SODIUM_LIBS) $(EDITLINE_LIBS) $(BOOST_LDFLAGS) $(LOWDOWN_LIBS) $(GTEST_LIBS) diff --git a/tests/functional/repl_characterization/repl_characterization.cc b/tests/functional/repl_characterization/repl_characterization.cc new file mode 100644 index 000000000..5b73e7a89 --- /dev/null +++ b/tests/functional/repl_characterization/repl_characterization.cc @@ -0,0 +1,42 @@ +#include + +#include +#include +#include +#include + +#include "tests/characterization.hh" +#include "tests/cli-literate-parser.hh" + +using namespace std::string_literals; + +namespace nix { + +static constexpr const char * REPL_PROMPT = "nix-repl> "; + +class ReplSessionTest : public CharacterizationTest +{ + Path unitTestData = getUnitTestData(); + +public: + Path goldenMaster(std::string_view testStem) const override + { + return unitTestData + "/" + testStem; + } +}; + +TEST_F(ReplSessionTest, parses) +{ + writeTest("basic.ast", [this]() { + const std::string content = readFile(goldenMaster("basic.test")); + auto parser = CLILiterateParser{REPL_PROMPT}; + parser.feed(content); + + std::ostringstream out{}; + for (auto & bit : parser.syntax()) { + out << bit.print() << "\n"; + } + return out.str(); + }); +} +}; diff --git a/tests/unit/libutil-support/local.mk b/tests/unit/libutil-support/local.mk index b4c8f2475..cfd88be99 100644 --- a/tests/unit/libutil-support/local.mk +++ b/tests/unit/libutil-support/local.mk @@ -10,6 +10,7 @@ libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc) libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) -libutil-test-support_LIBS = libutil +# libexpr so we can steal their string printer from print.cc +libutil-test-support_LIBS = libutil libexpr libutil-test-support_LDFLAGS := -pthread -lrapidcheck diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.cc b/tests/unit/libutil-support/tests/cli-literate-parser.cc new file mode 100644 index 000000000..3b2345e8e --- /dev/null +++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc @@ -0,0 +1,174 @@ +#include "cli-literate-parser.hh" +#include "libexpr/print.hh" +#include "debug-char.hh" +#include "types.hh" +#include "util.hh" +#include +#include +#include + +using namespace std::string_literals; + +namespace nix { + +static constexpr const bool DEBUG_PARSER = false; + +constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char * +{ + return std::visit( + overloaded{// clang-format off + [](Indent const&) -> const char * { return "indent"; }, + [](Commentary const&) -> const char * { return "indent"; }, + [](Prompt const&) -> const char * { return "prompt"; }, + [](Command const&) -> const char * { return "command"; }, + [](OutputLine const&) -> const char * { return "output_line"; }}, + // clang-format on + s); +} + +auto CLILiterateParser::Node::print() const -> std::string +{ + std::ostringstream s{}; + switch (kind) { + case NodeKind::COMMENTARY: + s << "Commentary "; + break; + case NodeKind::COMMAND: + s << "Command "; + break; + case NodeKind::OUTPUT: + s << "Output "; + break; + } + printLiteralString(s, this->text); + return s.str(); +} + +void PrintTo(std::vector const & nodes, std::ostream * os) +{ + for (auto & node : nodes) { + *os << node.print() << "\\n"; + } +} + +auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector +{ + CLILiterateParser p{std::move(prompt), indent}; + p.feed(input); + return std::move(p).intoSyntax(); +} + +auto CLILiterateParser::intoSyntax() && -> std::vector +{ + return std::move(this->syntax_); +} + +CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent) + : state_(indent == 0 ? State(Prompt{}) : State(Indent{})) + , prompt_(prompt) + , indent_(indent) + , lastWasOutput_(false) + , syntax_{} +{ + assert(!prompt.empty()); +} + +void CLILiterateParser::feed(char c) +{ + if constexpr (DEBUG_PARSER) { + std::cout << stateDebug(state_) << " " << DebugChar{c} << "\n"; + } + + if (c == '\n') { + onNewline(); + return; + } + + std::visit( + overloaded{ + [&](Indent & s) { + if (c == ' ') { + if (++s.pos >= indent_) { + transition(Prompt{}); + } + } else { + transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}}); + } + }, + [&](Prompt & s) { + if (s.pos >= prompt_.length()) { + transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}}); + return; + } else if (c == prompt_[s.pos]) { + // good prompt character + ++s.pos; + } else { + // didn't match the prompt, so it must have actually been output. + s.lineAccumulator.push_back(c); + transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}}); + return; + } + s.lineAccumulator.push_back(c); + }, + [&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }}, + state_); +} + +void CLILiterateParser::onNewline() +{ + State lastState = std::move(state_); + bool newLastWasOutput = false; + + syntax_.push_back(std::visit( + overloaded{ + [&](Indent & s) { + // XXX: technically this eats trailing spaces + + // a newline following output is considered part of that output + if (lastWasOutput_) { + newLastWasOutput = true; + return Node::mkOutput(""); + } + return Node::mkCommentary(""); + }, + [&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); }, + [&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); }, + [&](OutputLine & s) { + newLastWasOutput = true; + return Node::mkOutput(std::move(s.lineAccumulator)); + }, + [&](Prompt & s) { + // INDENT followed by newline is also considered a blank output line + return Node::mkOutput(std::move(s.lineAccumulator)); + }}, + lastState)); + + transition(Indent{}); + lastWasOutput_ = newLastWasOutput; +} + +void CLILiterateParser::feed(std::string_view s) +{ + for (char ch : s) { + feed(ch); + } +} + +void CLILiterateParser::transition(State new_state) +{ + // When we expect INDENT and we are parsing without indents, commentary + // cannot exist, so we want to transition directly into PROMPT before + // resuming normal processing. + if (Indent * i = std::get_if(&new_state); i != nullptr && indent_ == 0) { + new_state = Prompt{AccumulatingState{}, i->pos}; + } + + state_ = new_state; +} + +auto CLILiterateParser::syntax() const -> std::vector const & +{ + return syntax_; +} + +}; diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.hh b/tests/unit/libutil-support/tests/cli-literate-parser.hh new file mode 100644 index 000000000..86a5bdd32 --- /dev/null +++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh @@ -0,0 +1,127 @@ +#pragma once +///@file + +#include +#include +#include +#include +#include +#include + +namespace nix { +/* + * A DFA parser for literate test cases for CLIs. + * + * FIXME: implement merging of these, so you can auto update cases that have + * comments. + * + * Format: + * COMMENTARY + * INDENT PROMPT COMMAND + * INDENT OUTPUT + * + * e.g. + * commentary commentary commentary + * nix-repl> :t 1 + * an integer + * + * Yields: + * Commentary "commentary commentary commentary" + * Command ":t 1" + * Output "an integer" + * + * Note: one Output line is generated for each line of the sources, because + * this is effectively necessary to be able to align them in the future to + * auto-update tests. + */ +class CLILiterateParser +{ +public: + + enum class NodeKind { + COMMENTARY, + COMMAND, + OUTPUT, + }; + + struct Node + { + NodeKind kind; + std::string text; + std::strong_ordering operator<=>(Node const &) const = default; + + static Node mkCommentary(std::string text) + { + return Node{.kind = NodeKind::COMMENTARY, .text = text}; + } + + static Node mkCommand(std::string text) + { + return Node{.kind = NodeKind::COMMAND, .text = text}; + } + + static Node mkOutput(std::string text) + { + return Node{.kind = NodeKind::OUTPUT, .text = text}; + } + + auto print() const -> std::string; + }; + + CLILiterateParser(std::string prompt, size_t indent = 2); + + auto syntax() const -> std::vector const &; + + /** Feeds a character into the parser */ + void feed(char c); + + /** Feeds a string into the parser */ + void feed(std::string_view s); + + /** Parses an input in a non-streaming fashion */ + static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector; + + /** Consumes a CLILiterateParser and gives you the syntax out of it */ + auto intoSyntax() && -> std::vector; + +private: + + struct AccumulatingState + { + std::string lineAccumulator; + }; + struct Indent + { + size_t pos = 0; + }; + struct Commentary : public AccumulatingState + {}; + struct Prompt : AccumulatingState + { + size_t pos = 0; + }; + struct Command : public AccumulatingState + {}; + struct OutputLine : public AccumulatingState + {}; + + using State = std::variant; + State state_; + + constexpr static auto stateDebug(State const&) -> const char *; + + const std::string prompt_; + const size_t indent_; + + /** Last line was output, so we consider a blank to be part of the output */ + bool lastWasOutput_; + + std::vector syntax_; + + void transition(State newState); + void onNewline(); +}; + +// Override gtest printing for lists of nodes +void PrintTo(std::vector const & nodes, std::ostream * os); +}; diff --git a/tests/unit/libutil-support/tests/debug-char.hh b/tests/unit/libutil-support/tests/debug-char.hh new file mode 100644 index 000000000..765d8553f --- /dev/null +++ b/tests/unit/libutil-support/tests/debug-char.hh @@ -0,0 +1,24 @@ +///@file +#include +#include + +namespace nix { + +struct DebugChar +{ + char c; +}; + +inline std::ostream & operator<<(std::ostream & s, DebugChar c) +{ + boost::io::ios_flags_saver _ifs(s); + + if (isprint(c.c)) { + s << static_cast(c.c); + } else { + s << std::hex << "0x" << (static_cast(c.c) & 0xff); + } + return s; +} + +}