Implement a repl characterization test system

This allows for automating using the repl without needing a PTY, with very easy to write test files. Change-Id: Ia8d7854edd91f93477638942cb6fc261354e6035
2024-03-09 23:59:50 -08:00 · 2024-03-09 23:59:50 -08:00 · 18ed6c3bdf
parent 38571c50e6
commit 18ed6c3bdf
8 changed files with 531 additions and 0 deletions
--- a/tests/functional/repl_characterization/data/basic_repl.test
+++ b/tests/functional/repl_characterization/data/basic_repl.test
@ -0,0 +1,60 @@
  nix-repl> 1 + 1
  2
  nix-repl> :doc builtins.head
  Synopsis: builtins.head list
      Return the first element of a list; abort evaluation if
      the argument isn’t a list or is an empty list. You can
      test whether a list is empty by comparing it with [].
  nix-repl> f = a: "" + a
 Expect the trace to not contain any traceback:
  nix-repl> f 2
  error:
         … while evaluating a path segment
           at «string»:1:10:
              1|  a: "" + a
               |          ^
         error: cannot coerce an integer to a string: 2
  nix-repl> :te
  showing error traces
 Expect the trace to have traceback:
  nix-repl> f 2
  error:
         … from call site
           at «string»:1:1:
              1| f 2
               | ^
         … while calling anonymous lambda
           at «string»:1:2:
              1|  a: "" + a
               |  ^
         … while evaluating a path segment
           at «string»:1:10:
              1|  a: "" + a
               |          ^
         error: cannot coerce an integer to a string: 2
 Turning it off should also work:
  nix-repl> :te
  not showing error traces
  nix-repl> f 2
  error:
         … while evaluating a path segment
           at «string»:1:10:
              1|  a: "" + a
               |          ^
         error: cannot coerce an integer to a string: 2
--- a/tests/functional/repl_characterization/repl_characterization.cc
+++ b/tests/functional/repl_characterization/repl_characterization.cc
@ -5,8 +5,11 @@
 #include <optional>
 #include <unistd.h>
 #include "test-session.hh"
 #include "util.hh"
 #include "tests/characterization.hh"
 #include "tests/cli-literate-parser.hh"
 #include "tests/terminal-code-eater.hh"
 using namespace std::string_literals;
@ -14,6 +17,18 @@ namespace nix {
 static constexpr const char * REPL_PROMPT = "nix-repl> ";
 // ASCII ENQ character
 static constexpr const char * AUTOMATION_PROMPT = "\x05";
 static std::string_view trimOutLog(std::string_view outLog)
 {
    const std::string trailer = "\n"s + AUTOMATION_PROMPT;
    if (outLog.ends_with(trailer)) {
        outLog.remove_suffix(trailer.length());
    }
    return outLog;
 }
 class ReplSessionTest : public CharacterizationTest
 {
    Path unitTestData = getUnitTestData();
@ -23,6 +38,43 @@ public:
    {
        return unitTestData + "/" + testStem;
    }
    void runReplTest(std::string_view const & content, std::vector<std::string> extraArgs = {}) const
    {
        auto syntax = CLILiterateParser::parse(REPL_PROMPT, content);
        // FIXME: why does this need two --quiets
        // show-trace is on by default due to test configuration, but is not a standard
        Strings args{"--quiet", "repl", "--quiet", "--option", "show-trace", "false", "--offline", "--extra-experimental-features", "repl-automation"};
        args.insert(args.end(), extraArgs.begin(), extraArgs.end());
        auto nixBin = canonPath(getEnvNonEmpty("NIX_BIN_DIR").value_or(NIX_BIN_DIR));
        auto process = RunningProcess::start(nixBin + "/nix", args);
        auto session = TestSession{AUTOMATION_PROMPT, std::move(process)};
        for (auto & bit : syntax) {
            if (bit.kind != CLILiterateParser::NodeKind::COMMAND) {
                continue;
            }
            if (!session.waitForPrompt()) {
                ASSERT_TRUE(false);
            }
            session.runCommand(bit.text);
        }
        if (!session.waitForPrompt()) {
            ASSERT_TRUE(false);
        }
        session.close();
        auto parsedOutLog = CLILiterateParser::parse(AUTOMATION_PROMPT, trimOutLog(session.outLog), 0);
        parsedOutLog = CLILiterateParser::tidyOutputForComparison(std::move(parsedOutLog));
        syntax = CLILiterateParser::tidyOutputForComparison(std::move(syntax));
        ASSERT_EQ(parsedOutLog, syntax);
    }
 };
 TEST_F(ReplSessionTest, parses)
@ -39,4 +91,10 @@ TEST_F(ReplSessionTest, parses)
        return out.str();
    });
 }
 TEST_F(ReplSessionTest, repl_basic)
 {
    readTest("basic_repl.test", [this](std::string input) { runReplTest(input); });
 }
 };
--- a/tests/functional/repl_characterization/test-session.cc
+++ b/tests/functional/repl_characterization/test-session.cc
@ -0,0 +1,151 @@
 #include <iostream>
 #include <unistd.h>
 #include "test-session.hh"
 #include "util.hh"
 #include "tests/debug-char.hh"
 namespace nix {
 static constexpr const bool DEBUG_REPL_PARSER = false;
 RunningProcess RunningProcess::start(std::string executable, Strings args)
 {
    args.push_front(executable);
    Pipe procStdin{};
    Pipe procStdout{};
    procStdin.create();
    procStdout.create();
    // This is separate from runProgram2 because we have different IO requirements
    pid_t pid = startProcess([&]() {
        if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1)
            throw SysError("dupping stdout");
        if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1)
            throw SysError("dupping stdin");
        procStdin.writeSide.close();
        procStdout.readSide.close();
        if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1)
            throw SysError("dupping stderr");
        execv(executable.c_str(), stringsToCharPtrs(args).data());
        throw SysError("exec did not happen");
    });
    procStdout.writeSide.close();
    procStdin.readSide.close();
    return RunningProcess{
        .pid = pid,
        .procStdin = std::move(procStdin),
        .procStdout = std::move(procStdout),
    };
 }
 [[gnu::unused]]
 std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s)
 {
    switch (s) {
    case ReplOutputParser::State::Prompt:
        os << "prompt";
        break;
    case ReplOutputParser::State::Context:
        os << "context";
        break;
    }
    return os;
 }
 void ReplOutputParser::transition(State new_state, char responsible_char, bool wasPrompt)
 {
    if constexpr (DEBUG_REPL_PARSER) {
        std::cerr << "transition " << new_state << " for " << DebugChar{responsible_char}
                  << (wasPrompt ? " [prompt]" : "") << "\n";
    }
    state = new_state;
    pos_in_prompt = 0;
 }
 bool ReplOutputParser::feed(char c)
 {
    if (c == '\n') {
        transition(State::Prompt, c);
        return false;
    }
    switch (state) {
    case State::Context:
        break;
    case State::Prompt:
        if (pos_in_prompt == prompt.length() - 1 && prompt[pos_in_prompt] == c) {
            transition(State::Context, c, true);
            return true;
        }
        if (pos_in_prompt >= prompt.length() - 1 || prompt[pos_in_prompt] != c) {
            transition(State::Context, c);
            break;
        }
        pos_in_prompt++;
        break;
    }
    return false;
 }
 /** Waits for the prompt and then returns if a prompt was found */
 bool TestSession::waitForPrompt()
 {
    std::vector<char> buf(1024);
    for (;;) {
        ssize_t res = read(proc.procStdout.readSide.get(), buf.data(), buf.size());
        if (res < 0) {
            throw SysError("read");
        }
        if (res == 0) {
            return false;
        }
        bool foundPrompt = false;
        for (ssize_t i = 0; i < res; ++i) {
            // foundPrompt = foundPrompt || outputParser.feed(buf[i]);
            bool wasEaten = true;
            eater.feed(buf[i], [&](char c) {
                wasEaten = false;
                foundPrompt = outputParser.feed(buf[i]) || foundPrompt;
                outLog.push_back(c);
            });
            if constexpr (DEBUG_REPL_PARSER) {
                std::cerr << "raw " << DebugChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n";
            }
        }
        if (foundPrompt) {
            return true;
        }
    }
 }
 void TestSession::close()
 {
    proc.procStdin.close();
    proc.procStdout.close();
 }
 void TestSession::runCommand(std::string command)
 {
    if constexpr (DEBUG_REPL_PARSER)
        std::cerr << "runCommand " << command << "\n";
    command += "\n";
    // We have to feed a newline into the output parser, since Nix might not
    // give us a newline before a prompt in all cases (it might clear line
    // first, e.g.)
    outputParser.feed('\n');
    // Echo is disabled, so we have to make our own
    outLog.append(command);
    writeFull(proc.procStdin.writeSide.get(), command, false);
 }
 };
--- a/tests/functional/repl_characterization/test-session.hh
+++ b/tests/functional/repl_characterization/test-session.hh
@ -0,0 +1,69 @@
 #pragma once
 ///@file
 #include <sched.h>
 #include <string>
 #include "util.hh"
 #include "tests/terminal-code-eater.hh"
 namespace nix {
 struct RunningProcess
 {
    pid_t pid;
    Pipe procStdin;
    Pipe procStdout;
    static RunningProcess start(std::string executable, Strings args);
 };
 /** DFA that catches repl prompts */
 class ReplOutputParser
 {
 public:
    ReplOutputParser(std::string prompt)
        : prompt(prompt)
    {
        assert(!prompt.empty());
    }
    /** Feeds in a character and returns whether this is an open prompt */
    bool feed(char c);
    enum class State {
        Prompt,
        Context,
    };
 private:
    State state = State::Prompt;
    size_t pos_in_prompt = 0;
    std::string const prompt;
    void transition(State state, char responsible_char, bool wasPrompt = false);
 };
 struct TestSession
 {
    RunningProcess proc;
    ReplOutputParser outputParser;
    TerminalCodeEater eater;
    std::string outLog;
    std::string prompt;
    TestSession(std::string prompt, RunningProcess && proc)
        : proc(std::move(proc))
        , outputParser(prompt)
        , eater{}
        , outLog{}
        , prompt(prompt)
    {
    }
    bool waitForPrompt();
    void runCommand(std::string command);
    void close();
 };
 };
--- a/tests/unit/libutil-support/tests/cli-literate-parser.cc
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc
@ -171,4 +171,76 @@ auto CLILiterateParser::syntax() const -> std::vector<Node> const &
    return syntax_;
 }
 auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
    -> std::string
 {
    std::string indent_str(indent, ' ');
    std::ostringstream out{};
    for (auto & node : syntax) {
        switch (node.kind) {
        case NodeKind::COMMENTARY:
            out << node.text << "\n";
            break;
        case NodeKind::COMMAND:
            out << indent_str << prompt << node.text << "\n";
            break;
        case NodeKind::OUTPUT:
            out << indent_str << node.text << "\n";
            break;
        }
    }
    return out.str();
 }
 auto CLILiterateParser::tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node>
 {
    std::vector<Node> newSyntax{};
    // Eat trailing newlines, so assume that the very end was actually a command
    bool lastWasCommand = true;
    bool newLastWasCommand = true;
    auto v = std::ranges::reverse_view(syntax);
    for (auto it = v.begin(); it != v.end(); ++it) {
        Node item = std::move(*it);
        lastWasCommand = newLastWasCommand;
        // chomp commentary
        if (item.kind == NodeKind::COMMENTARY) {
            continue;
        }
        if (item.kind == NodeKind::COMMAND) {
            newLastWasCommand = true;
            if (item.text == "") {
                // chomp empty commands
                continue;
            }
        }
        if (item.kind == NodeKind::OUTPUT) {
            // TODO: horrible
            bool nextIsCommand = (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND;
            std::string trimmedText = boost::algorithm::trim_right_copy(item.text);
            if ((lastWasCommand || nextIsCommand) && trimmedText == "") {
                // chomp empty text above or directly below commands
                continue;
            }
            // real output, stop chomping
            newLastWasCommand = false;
            item = Node::mkOutput(std::move(trimmedText));
        }
        newSyntax.push_back(std::move(item));
    }
    std::reverse(newSyntax.begin(), newSyntax.end());
    return newSyntax;
 }
 };
--- a/tests/unit/libutil-support/tests/cli-literate-parser.hh
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh
@ -81,9 +81,16 @@ public:
    /** Parses an input in a non-streaming fashion */
    static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
    /** Returns, losslessly, the string that would have generated a syntax tree */
    static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
    /** Consumes a CLILiterateParser and gives you the syntax out of it */
    auto intoSyntax() && -> std::vector<Node>;
    /** Tidies syntax to remove trailing whitespace from outputs and remove any
     * empty prompts */
    static auto tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node>;
 private:
    struct AccumulatingState
--- a/tests/unit/libutil-support/tests/terminal-code-eater.cc
+++ b/tests/unit/libutil-support/tests/terminal-code-eater.cc
@ -0,0 +1,85 @@
 #include "terminal-code-eater.hh"
 #include "debug-char.hh"
 #include <assert.h>
 #include <cstdint>
 #include <iostream>
 namespace nix {
 static constexpr const bool DEBUG_EATER = false;
 void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
 {
    auto isParamChar = [](char v) -> bool { return v >= 0x30 && v <= 0x3f; };
    auto isIntermediateChar = [](char v) -> bool { return v >= 0x20 && v <= 0x2f; };
    auto isFinalChar = [](char v) -> bool { return v >= 0x40 && v <= 0x7e; };
    if constexpr (DEBUG_EATER) {
        std::cerr << "eater" << DebugChar{c} << "\n";
    }
    switch (state) {
    case State::ExpectESC:
        switch (c) {
        case '\e':
            transition(State::ExpectESCSeq);
            return;
        // Just eat \r, since it is part of clearing a line
        case '\r':
            return;
        }
        if constexpr (DEBUG_EATER) {
            std::cerr << "eater uneat" << DebugChar{c} << "\n";
        }
        on_char(c);
        break;
    case State::ExpectESCSeq:
        switch (c) {
        // CSI
        case '[':
            transition(State::InCSIParams);
            return;
        default:
            transition(State::ExpectESC);
            return;
        }
        break;
    // https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences
    // A CSI sequence is: CSI [\x30-\x3f]* [\x20-\x2f]* [\x40-\x7e]
    //                        ^ params     ^ intermediates ^ final byte
    case State::InCSIParams:
        if (isFinalChar(c)) {
            transition(State::ExpectESC);
            return;
        } else if (isIntermediateChar(c)) {
            transition(State::InCSIIntermediates);
            return;
        } else if (isParamChar(c)) {
            return;
        } else {
            // Corrupt escape sequence? Throw an assert, for now.
            // transition(State::ExpectESC);
            assert(false && "Corrupt terminal escape sequence");
            return;
        }
        break;
    case State::InCSIIntermediates:
        if (isFinalChar(c)) {
            transition(State::ExpectESC);
            return;
        } else if (isIntermediateChar(c)) {
            return;
        } else {
            // Corrupt escape sequence? Throw an assert, for now.
            // transition(State::ExpectESC);
            assert(false && "Corrupt terminal escape sequence in intermediates");
            return;
        }
        break;
    }
 }
 void TerminalCodeEater::transition(State new_state)
 {
    state = new_state;
 }
 };
--- a/tests/unit/libutil-support/tests/terminal-code-eater.hh
+++ b/tests/unit/libutil-support/tests/terminal-code-eater.hh
@ -0,0 +1,29 @@
 #pragma once
 /// @file
 #include <functional>
 namespace nix {
 /** DFA that eats terminal escapes
 *
 * See: https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
 */
 class TerminalCodeEater
 {
 public:
    void feed(char c, std::function<void(char)> on_char);
 private:
    enum class State {
        ExpectESC,
        ExpectESCSeq,
        InCSIParams,
        InCSIIntermediates,
    };
    State state = State::ExpectESC;
    void transition(State new_state);
 };
 };