From b85085157ac3906ab7a92b96a45d9e441521da55 Mon Sep 17 00:00:00 2001
From: Jade Lovelace <lix@jade.fyi>
Date: Sat, 9 Mar 2024 23:59:50 -0800
Subject: [PATCH] Implement a repl characterization test system

This allows for automating using the repl without needing a PTY, with
very easy to write test files.

Change-Id: Ia8d7854edd91f93477638942cb6fc261354e6035
---
 .../data/basic_repl.test                      |  60 +++++++
 .../repl_characterization.cc                  |  55 +++++++
 .../repl_characterization/test-session.cc     | 150 ++++++++++++++++++
 .../repl_characterization/test-session.hh     |  69 ++++++++
 .../tests/cli-literate-parser.cc              |  72 +++++++++
 .../tests/cli-literate-parser.hh              |   7 +
 .../tests/terminal-code-eater.cc              |  85 ++++++++++
 .../tests/terminal-code-eater.hh              |  29 ++++
 8 files changed, 527 insertions(+)
 create mode 100644 tests/functional/repl_characterization/data/basic_repl.test
 create mode 100644 tests/functional/repl_characterization/test-session.cc
 create mode 100644 tests/functional/repl_characterization/test-session.hh
 create mode 100644 tests/unit/libutil-support/tests/terminal-code-eater.cc
 create mode 100644 tests/unit/libutil-support/tests/terminal-code-eater.hh
diff --git a/tests/functional/repl_characterization/data/basic_repl.test b/tests/functional/repl_characterization/data/basic_repl.test
new file mode 100644
index 000000000..a8dea6d7c
--- /dev/null
+++ b/tests/functional/repl_characterization/data/basic_repl.test
@@ -0,0 +1,60 @@
+  nix-repl> 1 + 1
+  2
+
+  nix-repl> :doc builtins.head
+  Synopsis: builtins.head list
+
+      Return the first element of a list; abort evaluation if
+      the argument isn’t a list or is an empty list. You can
+      test whether a list is empty by comparing it with [].
+
+  nix-repl> f = a: "" + a
+
+Expect the trace to not contain any traceback:
+
+  nix-repl> f 2
+  error:
+         … while evaluating a path segment
+           at «string»:1:10:
+              1|  a: "" + a
+               |          ^
+
+         error: cannot coerce an integer to a string: 2
+
+  nix-repl> :te
+  showing error traces
+
+Expect the trace to have traceback:
+
+  nix-repl> f 2
+  error:
+         … from call site
+           at «string»:1:1:
+              1| f 2
+               | ^
+
+         … while calling anonymous lambda
+           at «string»:1:2:
+              1|  a: "" + a
+               |  ^
+
+         … while evaluating a path segment
+           at «string»:1:10:
+              1|  a: "" + a
+               |          ^
+
+         error: cannot coerce an integer to a string: 2
+
+Turning it off should also work:
+
+  nix-repl> :te
+  not showing error traces
+
+  nix-repl> f 2
+  error:
+         … while evaluating a path segment
+           at «string»:1:10:
+              1|  a: "" + a
+               |          ^
+
+         error: cannot coerce an integer to a string: 2
diff --git a/tests/functional/repl_characterization/repl_characterization.cc b/tests/functional/repl_characterization/repl_characterization.cc
index 5b73e7a89..395018299 100644
--- a/tests/functional/repl_characterization/repl_characterization.cc
+++ b/tests/functional/repl_characterization/repl_characterization.cc
@@ -5,8 +5,11 @@
 #include <optional>
 #include <unistd.h>
 
+#include "test-session.hh"
+#include "util.hh"
 #include "tests/characterization.hh"
 #include "tests/cli-literate-parser.hh"
+#include "tests/terminal-code-eater.hh"
 
 using namespace std::string_literals;
 
@@ -14,6 +17,18 @@ namespace nix {
 
 static constexpr const char * REPL_PROMPT = "nix-repl> ";
 
+// ASCII ENQ character
+static constexpr const char * AUTOMATION_PROMPT = "\x05";
+
+static std::string_view trimOutLog(std::string_view outLog)
+{
+    const std::string trailer = "\n"s + AUTOMATION_PROMPT;
+    if (outLog.ends_with(trailer)) {
+        outLog.remove_suffix(trailer.length());
+    }
+    return outLog;
+}
+
 class ReplSessionTest : public CharacterizationTest
 {
     Path unitTestData = getUnitTestData();
@@ -23,6 +38,40 @@ public:
     {
         return unitTestData + "/" + testStem;
     }
+
+    void runReplTest(std::string_view const & content, std::vector<std::string> extraArgs = {}) const
+    {
+        auto syntax = CLILiterateParser::parse(REPL_PROMPT, content);
+
+        Strings args{"--quiet", "repl", "--quiet", "--extra-experimental-features", "repl-automation"};
+        args.insert(args.end(), extraArgs.begin(), extraArgs.end());
+
+        // TODO: why the fuck does this need two --quiets
+        auto process = RunningProcess::start("nix", args);
+        auto session = TestSession{AUTOMATION_PROMPT, std::move(process)};
+
+        for (auto & bit : syntax) {
+            if (bit.kind != CLILiterateParser::NodeKind::COMMAND) {
+                continue;
+            }
+
+            if (!session.waitForPrompt()) {
+                ASSERT_TRUE(false);
+            }
+            session.runCommand(bit.text);
+        }
+        if (!session.waitForPrompt()) {
+            ASSERT_TRUE(false);
+        }
+        session.close();
+
+        auto parsedOutLog = CLILiterateParser::parse(AUTOMATION_PROMPT, trimOutLog(session.outLog), 0);
+
+        CLILiterateParser::tidyOutputForComparison(parsedOutLog);
+        CLILiterateParser::tidyOutputForComparison(syntax);
+
+        ASSERT_EQ(parsedOutLog, syntax);
+    }
 };
 
 TEST_F(ReplSessionTest, parses)
@@ -39,4 +88,10 @@ TEST_F(ReplSessionTest, parses)
         return out.str();
     });
 }
+
+TEST_F(ReplSessionTest, repl_basic)
+{
+    readTest("basic_repl.test", [this](std::string input) { runReplTest(input); });
+}
+
 };
diff --git a/tests/functional/repl_characterization/test-session.cc b/tests/functional/repl_characterization/test-session.cc
new file mode 100644
index 000000000..e4532a5b7
--- /dev/null
+++ b/tests/functional/repl_characterization/test-session.cc
@@ -0,0 +1,150 @@
+#include <iostream>
+
+#include "test-session.hh"
+#include "util.hh"
+#include "tests/debug-char.hh"
+
+namespace nix {
+
+static constexpr const bool DEBUG_REPL_PARSER = false;
+
+RunningProcess RunningProcess::start(std::string executable, Strings args)
+{
+    args.push_front(executable);
+
+    Pipe procStdin{};
+    Pipe procStdout{};
+
+    procStdin.create();
+    procStdout.create();
+
+    // This is separate from runProgram2 because we have different IO requirements
+    pid_t pid = startProcess([&]() {
+        if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1)
+            throw SysError("dupping stdout");
+        if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1)
+            throw SysError("dupping stdin");
+        procStdin.writeSide.close();
+        procStdout.readSide.close();
+        if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1)
+            throw SysError("dupping stderr");
+        execvp(executable.c_str(), stringsToCharPtrs(args).data());
+        throw SysError("exec did not happen");
+    });
+
+    procStdout.writeSide.close();
+    procStdin.readSide.close();
+
+    return RunningProcess{
+        .pid = pid,
+        .procStdin = std::move(procStdin),
+        .procStdout = std::move(procStdout),
+    };
+}
+
+[[gnu::unused]]
+std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s)
+{
+    switch (s) {
+    case ReplOutputParser::State::Prompt:
+        os << "prompt";
+        break;
+    case ReplOutputParser::State::Context:
+        os << "context";
+        break;
+    }
+    return os;
+}
+
+void ReplOutputParser::transition(State new_state, char responsible_char, bool wasPrompt)
+{
+    if constexpr (DEBUG_REPL_PARSER) {
+        std::cerr << "transition " << new_state << " for " << DebugChar{responsible_char}
+                  << (wasPrompt ? " [prompt]" : "") << "\n";
+    }
+    state = new_state;
+    pos_in_prompt = 0;
+}
+
+bool ReplOutputParser::feed(char c)
+{
+    if (c == '\n') {
+        transition(State::Prompt, c);
+        return false;
+    }
+    switch (state) {
+    case State::Context:
+        break;
+    case State::Prompt:
+        if (pos_in_prompt == prompt.length() - 1 && prompt[pos_in_prompt] == c) {
+            transition(State::Context, c, true);
+            return true;
+        }
+        if (pos_in_prompt >= prompt.length() - 1 || prompt[pos_in_prompt] != c) {
+            transition(State::Context, c);
+            break;
+        }
+        pos_in_prompt++;
+        break;
+    }
+    return false;
+}
+
+/** Waits for the prompt and then returns if a prompt was found */
+bool TestSession::waitForPrompt()
+{
+    std::vector<char> buf(1024);
+
+    for (;;) {
+        ssize_t res = read(proc.procStdout.readSide.get(), buf.data(), buf.size());
+
+        if (res < 0) {
+            throw SysError("read");
+        }
+        if (res == 0) {
+            return false;
+        }
+
+        bool foundPrompt = false;
+        for (ssize_t i = 0; i < res; ++i) {
+            // foundPrompt = foundPrompt || outputParser.feed(buf[i]);
+            bool wasEaten = true;
+            eater.feed(buf[i], [&](char c) {
+                wasEaten = false;
+                foundPrompt = outputParser.feed(buf[i]) || foundPrompt;
+
+                outLog.push_back(c);
+            });
+
+            if constexpr (DEBUG_REPL_PARSER) {
+                std::cerr << "raw " << DebugChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n";
+            }
+        }
+
+        if (foundPrompt) {
+            return true;
+        }
+    }
+}
+
+void TestSession::close()
+{
+    proc.procStdin.close();
+    proc.procStdout.close();
+}
+
+void TestSession::runCommand(std::string command)
+{
+    if constexpr (DEBUG_REPL_PARSER)
+        std::cerr << "runCommand " << command << "\n";
+    command += "\n";
+    // We have to feed a newline into the output parser, since Nix might not
+    // give us a newline before a prompt in all cases (it might clear line
+    // first, e.g.)
+    outputParser.feed('\n');
+    // Echo is disabled, so we have to make our own
+    outLog.append(command);
+    writeFull(proc.procStdin.writeSide.get(), command, false);
+}
+
+};
diff --git a/tests/functional/repl_characterization/test-session.hh b/tests/functional/repl_characterization/test-session.hh
new file mode 100644
index 000000000..4dab6383d
--- /dev/null
+++ b/tests/functional/repl_characterization/test-session.hh
@@ -0,0 +1,69 @@
+#pragma once
+///@file
+
+#include <sched.h>
+#include <string>
+
+#include "util.hh"
+#include "tests/terminal-code-eater.hh"
+
+namespace nix {
+
+struct RunningProcess
+{
+    pid_t pid;
+    Pipe procStdin;
+    Pipe procStdout;
+
+    static RunningProcess start(std::string executable, Strings args);
+};
+
+/** DFA that catches repl prompts */
+class ReplOutputParser
+{
+public:
+    ReplOutputParser(std::string prompt)
+        : prompt(prompt)
+    {
+        assert(!prompt.empty());
+    }
+    /** Feeds in a character and returns whether this is an open prompt */
+    bool feed(char c);
+
+    enum class State {
+        Prompt,
+        Context,
+    };
+
+private:
+    State state = State::Prompt;
+    size_t pos_in_prompt = 0;
+    std::string const prompt;
+
+    void transition(State state, char responsible_char, bool wasPrompt = false);
+};
+
+struct TestSession
+{
+    RunningProcess proc;
+    ReplOutputParser outputParser;
+    TerminalCodeEater eater;
+    std::string outLog;
+    std::string prompt;
+
+    TestSession(std::string prompt, RunningProcess && proc)
+        : proc(std::move(proc))
+        , outputParser(prompt)
+        , eater({})
+        , outLog({})
+        , prompt(prompt)
+    {
+    }
+
+    bool waitForPrompt();
+
+    void runCommand(std::string command);
+
+    void close();
+};
+};
diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.cc b/tests/unit/libutil-support/tests/cli-literate-parser.cc
index 3b2345e8e..7e0b5e69f 100644
--- a/tests/unit/libutil-support/tests/cli-literate-parser.cc
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc
@@ -171,4 +171,76 @@ auto CLILiterateParser::syntax() const -> std::vector<Node> const &
     return syntax_;
 }
 
+auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
+    -> std::string
+{
+    std::string indent_str(indent, ' ');
+    std::ostringstream out{};
+
+    for (auto & node : syntax) {
+        switch (node.kind) {
+        case NodeKind::COMMENTARY:
+            // TODO: should not ignore commentary
+            break;
+        case NodeKind::COMMAND:
+            out << indent_str << prompt << node.text << "\n";
+            break;
+        case NodeKind::OUTPUT:
+            out << indent_str << node.text << "\n";
+            break;
+        }
+    }
+
+    return out.str();
+}
+
+void CLILiterateParser::tidyOutputForComparison(std::vector<Node> & syntax)
+{
+    std::vector<Node> newSyntax{};
+
+    // Eat trailing newlines, so assume that the very end was actually a command
+    bool lastWasCommand = true;
+    bool newLastWasCommand = true;
+
+    auto v = std::ranges::reverse_view(syntax);
+
+    for (auto it = v.begin(); it != v.end(); ++it) {
+        Node item = *it;
+
+        lastWasCommand = newLastWasCommand;
+        // chomp commentary
+        if (item.kind == NodeKind::COMMENTARY) {
+            continue;
+        }
+
+        if (item.kind == NodeKind::COMMAND) {
+            newLastWasCommand = true;
+
+            if (item.text == "") {
+                // chomp empty commands
+                continue;
+            }
+        }
+
+        if (item.kind == NodeKind::OUTPUT) {
+            // TODO: horrible
+            bool nextIsCommand = (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND;
+            std::string trimmedText = boost::algorithm::trim_right_copy(item.text);
+            if ((lastWasCommand || nextIsCommand) && trimmedText == "") {
+                // chomp empty text above or directly below commands
+                continue;
+            }
+
+            // real output, stop chomping
+            newLastWasCommand = false;
+
+            item = Node::mkOutput(std::move(trimmedText));
+        }
+        newSyntax.push_back(std::move(item));
+    }
+
+    std::reverse(newSyntax.begin(), newSyntax.end());
+    syntax = std::move(newSyntax);
+}
+
 };
diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.hh b/tests/unit/libutil-support/tests/cli-literate-parser.hh
index 86a5bdd32..0ec673c1b 100644
--- a/tests/unit/libutil-support/tests/cli-literate-parser.hh
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh
@@ -81,9 +81,16 @@ public:
     /** Parses an input in a non-streaming fashion */
     static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
 
+    /** Returns, losslessly, the string that would have generated a syntax tree */
+    static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
+
     /** Consumes a CLILiterateParser and gives you the syntax out of it */
     auto intoSyntax() && -> std::vector<Node>;
 
+    /** Tidies syntax to remove trailing whitespace from outputs and remove any
+     * empty prompts */
+    static void tidyOutputForComparison(std::vector<Node> & syntax);
+
 private:
 
     struct AccumulatingState
diff --git a/tests/unit/libutil-support/tests/terminal-code-eater.cc b/tests/unit/libutil-support/tests/terminal-code-eater.cc
new file mode 100644
index 000000000..51e1d565e
--- /dev/null
+++ b/tests/unit/libutil-support/tests/terminal-code-eater.cc
@@ -0,0 +1,85 @@
+#include "terminal-code-eater.hh"
+#include "debug-char.hh"
+#include <assert.h>
+#include <cstdint>
+#include <iostream>
+
+namespace nix {
+
+static constexpr const bool DEBUG_EATER = false;
+
+void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
+{
+    auto isParamChar = [](char v) -> bool { return v >= 0x30 && v <= 0x3f; };
+    auto isIntermediateChar = [](char v) -> bool { return v >= 0x20 && v <= 0x2f; };
+    auto isFinalChar = [](char v) -> bool { return v >= 0x40 && v <= 0x7e; };
+    if constexpr (DEBUG_EATER) {
+        std::cerr << "eater" << DebugChar{c} << "\n";
+    }
+
+    switch (state) {
+    case State::ExpectESC:
+        switch (c) {
+        case '\e':
+            transition(State::ExpectESCSeq);
+            return;
+        // Just eat \r, since it is part of clearing a line
+        case '\r':
+            return;
+        }
+        if constexpr (DEBUG_EATER) {
+            std::cerr << "eater uneat" << DebugChar{c} << "\n";
+        }
+        on_char(c);
+        break;
+    case State::ExpectESCSeq:
+        switch (c) {
+        // CSI
+        case '[':
+            transition(State::InCSIParams);
+            return;
+        default:
+            transition(State::ExpectESC);
+            return;
+        }
+        break;
+    // https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences
+    // A CSI sequence is: CSI [\x30-\x3f]* [\x20-\x2f]* [\x40-\x7e]
+    //                        ^ params     ^ intermediates ^ final byte
+    case State::InCSIParams:
+        if (isFinalChar(c)) {
+            transition(State::ExpectESC);
+            return;
+        } else if (isIntermediateChar(c)) {
+            transition(State::InCSIIntermediates);
+            return;
+        } else if (isParamChar(c)) {
+            return;
+        } else {
+            // Corrupt escape sequence? Throw an assert, for now.
+            // transition(State::ExpectESC);
+            assert(false && "Corrupt terminal escape sequence");
+            return;
+        }
+        break;
+    case State::InCSIIntermediates:
+        if (isFinalChar(c)) {
+            transition(State::ExpectESC);
+            return;
+        } else if (isIntermediateChar(c)) {
+            return;
+        } else {
+            // Corrupt escape sequence? Throw an assert, for now.
+            // transition(State::ExpectESC);
+            assert(false && "Corrupt terminal escape sequence in intermediates");
+            return;
+        }
+        break;
+    }
+}
+
+void TerminalCodeEater::transition(State new_state)
+{
+    state = new_state;
+}
+};
diff --git a/tests/unit/libutil-support/tests/terminal-code-eater.hh b/tests/unit/libutil-support/tests/terminal-code-eater.hh
new file mode 100644
index 000000000..d904bcc20
--- /dev/null
+++ b/tests/unit/libutil-support/tests/terminal-code-eater.hh
@@ -0,0 +1,29 @@
+#pragma once
+/// @file
+
+#include <functional>
+
+namespace nix {
+
+/** DFA that eats terminal escapes
+ *
+ * See: https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
+ */
+class TerminalCodeEater
+{
+public:
+    void feed(char c, std::function<void(char)> on_char);
+
+private:
+    enum class State {
+        ExpectESC,
+        ExpectESCSeq,
+        InCSIParams,
+        InCSIIntermediates,
+    };
+
+    State state = State::ExpectESC;
+
+    void transition(State new_state);
+};
+};