burn it all

Change-Id: I0db6eee7d6aae3e02ea0155c04a55a7142e456c3
Add clang format configuration
2024-03-13 22:03:42 -07:00 · 2024-03-13 15:46:55 -07:00 · 2024-03-13 15:46:55 -07:00 · 2024-03-13 15:46:55 -07:00 · 2024-03-13 15:46:55 -07:00 · 2024-03-13 15:46:55 -07:00
30 changed files with 1221 additions and 6 deletions
--- a/.clang-format
+++ b/.clang-format
@ -0,0 +1,30 @@
+BasedOnStyle: LLVM
+IndentWidth: 4
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterStruct: true
+  AfterClass: true
+  AfterFunction: true
+  AfterUnion: true
+  SplitEmptyRecord: false
+PointerAlignment: Middle
+FixNamespaceComments: false
+SortIncludes: Never
+#IndentPPDirectives: BeforeHash
+SpaceAfterCStyleCast: true
+SpaceAfterTemplateKeyword: false
+AccessModifierOffset: -4
+AlignAfterOpenBracket: AlwaysBreak
+AlignEscapedNewlines: DontAlign
+ColumnLimit: 120
+BreakStringLiterals: false
+BitFieldColonSpacing: None
+AllowShortFunctionsOnASingleLine: Empty
+AlwaysBreakTemplateDeclarations: Yes
+BinPackParameters: false
+BreakConstructorInitializers: BeforeComma
+EmptyLineAfterAccessModifier: Leave # change to always/never later?
+EmptyLineBeforeAccessModifier: Leave
+#PackConstructorInitializers: BinPack
+BreakBeforeBinaryOperators: NonAssignment
+AlwaysBreakBeforeMultilineStrings: true
--- a/1
+++ b/1
@ -41,6 +41,7 @@ makefiles += \
  tests/functional/ca/local.mk \
  tests/functional/dyn-drv/local.mk \
  tests/functional/test-libstoreconsumer/local.mk \
+  tests/functional/repl_characterization/local.mk \
  tests/functional/plugins/local.mk
 else
 makefiles += \
--- a/src/libstore/globals.cc
+++ b/src/libstore/globals.cc
@ -342,7 +342,7 @@ void initPlugins()
            void *handle =
                dlopen(file.c_str(), RTLD_LAZY | RTLD_LOCAL);
            if (!handle)
-                throw Error("could not dynamically open plugin file '%s': %s", file, dlerror());
+                warn("could not dynamically open plugin file '%s': %s", file, dlerror());
        }
    }

--- a/tests/functional/local.mk
+++ b/tests/functional/local.mk
@ -131,7 +131,7 @@ ifeq ($(HAVE_LIBCPUID), 1)
 endif

 ifeq ($(ENABLE_BUILD), yes)
-	nix_tests += test-libstoreconsumer.sh
+	nix_tests += test-libstoreconsumer.sh test-repl-characterization.sh

 	ifeq ($(BUILD_SHARED_LIBS), 1)
 		nix_tests += plugins.sh
@ -141,7 +141,10 @@ endif
 $(d)/test-libstoreconsumer.sh.test $(d)/test-libstoreconsumer.sh.test-debug: \
  $(buildprefix)$(d)/test-libstoreconsumer/test-libstoreconsumer
 $(d)/plugins.sh.test $(d)/plugins.sh.test-debug: \
-  $(buildprefix)$(d)/plugins/libplugintest.$(SO_EXT)
+  $(buildprefix)$(d)/plugins/libplugintest.$(SO_EXT) \
+  $(buildprefix)$(d)/plugins/libplugintestfail.$(SO_EXT)
+$(d)/test-repl-characterization.sh.test $(d)/test-repl-characterization.sh.test-debug: \
+  $(buildprefix)$(d)/repl_characterization/test-repl-characterization

 install-tests += $(foreach x, $(nix_tests), $(d)/$(x))

--- a/tests/functional/plugins.sh
+++ b/tests/functional/plugins.sh
@ -4,6 +4,20 @@ if [[ $BUILD_SHARED_LIBS != 1 ]]; then
    skipTest "Plugins are not supported"
 fi

-res=$(nix --option setting-set true --option plugin-files $PWD/plugins/libplugintest* eval --expr builtins.anotherNull)
+res=$(nix --option setting-set true --option plugin-files $PWD/plugins/libplugintest.* eval --expr builtins.anotherNull)

 [ "$res"x = "nullx" ]
+
+# Plugin load failing due to missing symbols
+res=$(nix --option plugin-files $PWD/plugins/libplugintestfail.* eval --expr '1234 + 5' 2>&1)
+# We expect this to succeed evaluating
+echo "$res" | grep 1239 >/dev/null
+# On Linux, we expect this to print some failure of dlopen.
+# Only on Linux do we expect for sure that -z now is set on the .so file, so it
+# will definitely fail to load instead of lazy loading (and thus not hitting
+# the missing symbol).
+# FIXME(jade): does there exist an equivalent of -z now on macOS that eluded us
+# in search?
+if [[ "$(uname -s)" == Linux ]]; then
+    echo "$res" | grep "could not dynamically open plugin file" >/dev/null
+fi
--- a/tests/functional/plugins/local.mk
+++ b/tests/functional/plugins/local.mk
@ -1,4 +1,4 @@
-libraries += libplugintest
+libraries += libplugintest libplugintestfail

 libplugintest_DIR := $(d)

@ -9,3 +9,19 @@ libplugintest_ALLOW_UNDEFINED := 1
 libplugintest_EXCLUDE_FROM_LIBRARY_LIST := 1

 libplugintest_CXXFLAGS := -I src/libutil -I src/libstore -I src/libexpr -I src/libfetchers
+
+libplugintestfail_DIR := $(d)
+
+libplugintestfail_SOURCES := $(d)/plugintestfail.cc
+
+libplugintestfail_ALLOW_UNDEFINED := 1
+
+libplugintestfail_EXCLUDE_FROM_LIBRARY_LIST := 1
+
+libplugintestfail_CXXFLAGS := -I src/libutil -I src/libstore -I src/libexpr -I src/libfetchers -DMISSING_REFERENCE
+
+# Make sure that the linker strictly evaluates all symbols on .so load on Linux
+# so it will definitely fail to load as expected.
+ifdef HOST_LINUX
+  libplugintestfail_LDFLAGS += -z now
+endif
--- a/tests/functional/plugins/plugintest.cc
+++ b/tests/functional/plugins/plugintest.cc
@ -1,8 +1,15 @@
 #include "config.hh"
 #include "primops.hh"
+#include <stdlib.h>

 using namespace nix;

+#ifdef MISSING_REFERENCE
+extern void meow();
+#else
+#define meow() {}
+#endif
+
 struct MySettings : Config
 {
    Setting<bool> settingSet{this, false, "setting-set",
@ -13,6 +20,11 @@ MySettings mySettings;

 static GlobalConfig::Register rs(&mySettings);

+[[gnu::used, gnu::unused, gnu::retain]]
+static void maybeRequireMeowForDlopen() {
+    meow();
+}
+
 static void prim_anotherNull (EvalState & state, const PosIdx pos, Value ** args, Value & v)
 {
    if (mySettings.settingSet)
--- a/tests/functional/plugins/plugintestfail.cc
+++ b/tests/functional/plugins/plugintestfail.cc
@ -0,0 +1 @@
+plugintest.cc
--- a/tests/functional/repl_characterization/.gitignore
+++ b/tests/functional/repl_characterization/.gitignore
@ -0,0 +1 @@
+test-repl-characterization
--- a/tests/functional/repl_characterization/data/basic.ast
+++ b/tests/functional/repl_characterization/data/basic.ast
@ -0,0 +1,17 @@
+Commentary "meow meow meow"
+Command "command"
+Output "output output one"
+Output ""
+Output ""
+Output "output output two"
+Commentary "meow meow"
+Command "command two"
+Output "output output output"
+Commentary "commentary"
+Output "output output output"
+Output ""
+Commentary "the blank below should be chomped"
+Command "command three"
+Commentary ""
+Output "meow output"
+Output ""
--- a/tests/functional/repl_characterization/data/basic.test
+++ b/tests/functional/repl_characterization/data/basic.test
@ -0,0 +1,17 @@
+meow meow meow
+  nix-repl> command
+  output output one
+
+
+  output output two
+meow meow
+  nix-repl> command two
+  output output output
+commentary
+  output output output
+
+the blank below should be chomped
+  nix-repl> command three
+
+  meow output
+
--- a/tests/functional/repl_characterization/data/basic_repl.test
+++ b/tests/functional/repl_characterization/data/basic_repl.test
@ -0,0 +1,60 @@
+  nix-repl> 1 + 1
+  2
+
+  nix-repl> :doc builtins.head
+  Synopsis: builtins.head list
+
+      Return the first element of a list; abort evaluation if
+      the argument isn’t a list or is an empty list. You can
+      test whether a list is empty by comparing it with [].
+
+  nix-repl> f = a: "" + a
+
+Expect the trace to not contain any traceback:
+
+  nix-repl> f 2
+  error:
+         … while evaluating a path segment
+           at «string»:1:10:
+              1|  a: "" + a
+               |          ^
+
+         error: cannot coerce an integer to a string: 2
+
+  nix-repl> :te
+  showing error traces
+
+Expect the trace to have traceback:
+
+  nix-repl> f 2
+  error:
+         … from call site
+           at «string»:1:1:
+              1| f 2
+               | ^
+
+         … while calling anonymous lambda
+           at «string»:1:2:
+              1|  a: "" + a
+               |  ^
+
+         … while evaluating a path segment
+           at «string»:1:10:
+              1|  a: "" + a
+               |          ^
+
+         error: cannot coerce an integer to a string: 2
+
+Turning it off should also work:
+
+  nix-repl> :te
+  not showing error traces
+
+  nix-repl> f 2
+  error:
+         … while evaluating a path segment
+           at «string»:1:10:
+              1|  a: "" + a
+               |          ^
+
+         error: cannot coerce an integer to a string: 2
--- a/tests/functional/repl_characterization/data/basic_tidied.ast
+++ b/tests/functional/repl_characterization/data/basic_tidied.ast
@ -0,0 +1,10 @@
+Command "command"
+Output "output output one"
+Output ""
+Output ""
+Output "output output two"
+Command "command two"
+Output "output output output"
+Output "output output output"
+Command "command three"
+Output "meow output"
--- a/tests/functional/repl_characterization/data/regression_9917.nix
+++ b/tests/functional/repl_characterization/data/regression_9917.nix
@ -0,0 +1,9 @@
+let
+  a = builtins.trace "before inner break" (
+    builtins.break { msg = "hello"; }
+  );
+  b = builtins.trace "before outer break" (
+    builtins.break a
+  );
+in
+  b
--- a/tests/functional/repl_characterization/data/regression_9917.test
+++ b/tests/functional/repl_characterization/data/regression_9917.test
@ -0,0 +1,34 @@
+https://github.com/NixOS/nix/pull/9917 (Enter debugger more reliably in let expressions and function calls)
+
+This test ensures that continues don't skip opportunities to enter the debugger.
+  trace: before outer break
+  info: breakpoint reached
+
+  nix-repl> :c
+  trace: before inner break
+  info: breakpoint reached
+
+  nix-repl> :bt
+
+  0: error: breakpoint reached
+  «none»:0
+  1: while calling a function
+  TEST_DATA/regression_9917.nix:3:5
+
+       2|   a = builtins.trace "before inner break" (
+       3|     builtins.break { msg = "hello"; }
+        |     ^
+       4|   );
+
+  2: while calling a function
+  TEST_DATA/regression_9917.nix:2:7
+
+       1| let
+       2|   a = builtins.trace "before inner break" (
+        |       ^
+       3|     builtins.break { msg = "hello"; }
+
+  nix-repl> :c
+
+  nix-repl> msg
+  "hello"
--- a/tests/functional/repl_characterization/data/regression_9918.nix
+++ b/tests/functional/repl_characterization/data/regression_9918.nix
@ -0,0 +1,5 @@
+let
+  r = [];
+  x = builtins.throw r;
+in
+  x
--- a/tests/functional/repl_characterization/data/regression_9918.test
+++ b/tests/functional/repl_characterization/data/regression_9918.test
@ -0,0 +1,16 @@
+  error:
+         … while evaluating the error message passed to builtin.throw
+
+         error: cannot coerce a list to a string: [ ]
+
+We expect to be able to see locals like r in the debugger:
+
+  nix-repl> r
+  [ ]
+
+  nix-repl> :env
+  Env level 0
+  static: x r
+
+  Env level 1
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
--- a/tests/functional/repl_characterization/data/stack_vars.nix
+++ b/tests/functional/repl_characterization/data/stack_vars.nix
@ -0,0 +1,9 @@
+let
+  a = builtins.trace "before inner break" (
+    let meow' = 3; in builtins.break { msg = "hello"; }
+  );
+  b = builtins.trace "before outer break" (
+    let meow = 2; in builtins.break a
+  );
+in
+  b
--- a/tests/functional/repl_characterization/data/stack_vars.test
+++ b/tests/functional/repl_characterization/data/stack_vars.test
@ -0,0 +1,74 @@
+  trace: before outer break
+  info: breakpoint reached
+
+Here we are in the outer break and the let of "meow". st should show meow there
+as it is in scope.
+  nix-repl> :st
+
+  0: error: breakpoint reached
+  «none»:0
+  Env level 0
+  static: meow
+
+  Env level 1
+  static: a b
+
+  Env level 2
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
+
+  nix-repl> meow
+  2
+
+If we :st past the frame in the backtrace with the meow in it, the meow should not be there.
+
+  nix-repl> :st 3
+
+  3: while calling a function
+  TEST_DATA/stack_vars.nix:5:7
+
+       4|   );
+       5|   b = builtins.trace "before outer break" (
+        |       ^
+       6|     let meow = 2; in builtins.break a
+
+  Env level 0
+  static: a b
+
+  Env level 1
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
+
+  nix-repl> :c
+  trace: before inner break
+  info: breakpoint reached
+
+  nix-repl> :st
+
+  0: error: breakpoint reached
+  «none»:0
+  Env level 0
+  static: meow'
+
+  Env level 1
+  static: a b
+
+  Env level 2
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
+
+  nix-repl> meow'
+  3
+
+  nix-repl> :st 3
+
+  3: while calling a function
+  TEST_DATA/stack_vars.nix:2:7
+
+       1| let
+       2|   a = builtins.trace "before inner break" (
+        |       ^
+       3|     let meow' = 3; in builtins.break { msg = "hello"; }
+
+  Env level 0
+  static: a b
+
+  Env level 1
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
--- a/tests/functional/repl_characterization/local.mk
+++ b/tests/functional/repl_characterization/local.mk
@ -0,0 +1,15 @@
+programs += test-repl-characterization
+
+test-repl-characterization_DIR := $(d)
+
+# do not install
+test-repl-characterization_INSTALL_DIR :=
+
+test-repl-characterization_SOURCES := \
+  $(wildcard $(d)/*.cc) \
+
+test-repl-characterization_CXXFLAGS += -I src/libutil -I tests/unit/libutil-support -DNIX_BIN_DIR="\"$(bindir)\""
+
+test-repl-characterization_LIBS = libutil libutil-test-support
+
+test-repl-characterization_LDFLAGS = $(THREAD_LDFLAGS) $(SODIUM_LIBS) $(EDITLINE_LIBS) $(BOOST_LDFLAGS) $(LOWDOWN_LIBS) $(GTEST_LIBS)
--- a/tests/functional/repl_characterization/repl_characterization.cc
+++ b/tests/functional/repl_characterization/repl_characterization.cc
@ -0,0 +1,129 @@
+#include <filesystem>
+#include <gtest/gtest.h>
+
+#include <string>
+#include <string_view>
+#include <optional>
+#include <unistd.h>
+#include <boost/algorithm/string/replace.hpp>
+
+#include "test-session.hh"
+#include "util.hh"
+#include "tests/characterization.hh"
+#include "tests/cli-literate-parser.hh"
+#include "tests/terminal-code-eater.hh"
+
+using namespace std::string_literals;
+
+namespace nix {
+
+static constexpr const char * REPL_PROMPT = "nix-repl> ";
+
+// ASCII ENQ character
+static constexpr const char * AUTOMATION_PROMPT = "\x05";
+
+static std::string_view trimOutLog(std::string_view outLog)
+{
+    const std::string trailer = "\n"s + AUTOMATION_PROMPT;
+    if (outLog.ends_with(trailer)) {
+        outLog.remove_suffix(trailer.length());
+    }
+    return outLog;
+}
+
+class ReplSessionTest : public CharacterizationTest
+{
+    Path unitTestData = getUnitTestData();
+
+public:
+    Path goldenMaster(std::string_view testStem) const override
+    {
+        return unitTestData + "/" + testStem;
+    }
+
+    void runReplTest(std::string_view const & content, std::vector<std::string> extraArgs = {}) const
+    {
+        auto syntax = CLILiterateParser::parse(REPL_PROMPT, content);
+
+        Strings args{"--quiet", "repl", "--quiet", "--extra-experimental-features", "repl-automation"};
+        args.insert(args.end(), extraArgs.begin(), extraArgs.end());
+
+        auto nixBin = canonPath(getEnvNonEmpty("NIX_BIN_DIR").value_or(NIX_BIN_DIR));
+
+        // TODO: why the fuck does this need two --quiets
+        auto process = RunningProcess::start(nixBin + "/nix", args);
+        auto session = TestSession{AUTOMATION_PROMPT, std::move(process)};
+
+        for (auto & bit : syntax) {
+            if (bit.kind != CLILiterateParser::NodeKind::COMMAND) {
+                continue;
+            }
+
+            if (!session.waitForPrompt()) {
+                ASSERT_TRUE(false);
+            }
+            session.runCommand(bit.text);
+        }
+        if (!session.waitForPrompt()) {
+            ASSERT_TRUE(false);
+        }
+        session.close();
+
+        auto replacedOutLog = boost::algorithm::replace_all_copy(session.outLog, unitTestData, "TEST_DATA");
+        auto cleanedOutLog = trimOutLog(replacedOutLog);
+
+        auto parsedOutLog = CLILiterateParser::parse(AUTOMATION_PROMPT, cleanedOutLog, 0);
+
+        CLILiterateParser::tidyOutputForComparison(parsedOutLog);
+        CLILiterateParser::tidyOutputForComparison(syntax);
+
+        ASSERT_EQ(parsedOutLog, syntax);
+    }
+};
+
+TEST_F(ReplSessionTest, parses)
+{
+    writeTest("basic.ast", [this]() {
+        const std::string content = readFile(goldenMaster("basic.test"));
+        auto parser = CLILiterateParser{REPL_PROMPT};
+        parser.feed(content);
+
+        std::ostringstream out{};
+        for (auto & bit : parser.syntax()) {
+            out << bit.print() << "\n";
+        }
+        return out.str();
+    });
+
+    writeTest("basic_tidied.ast", [this]() {
+        const std::string content = readFile(goldenMaster("basic.test"));
+        auto syntax = CLILiterateParser::parse(REPL_PROMPT, content);
+
+        CLILiterateParser::tidyOutputForComparison(syntax);
+
+        std::ostringstream out{};
+        for (auto & bit : syntax) {
+            out << bit.print() << "\n";
+        }
+        return out.str();
+    });
+}
+
+TEST_F(ReplSessionTest, repl_basic)
+{
+    readTest("basic_repl.test", [this](std::string input) { runReplTest(input); });
+}
+
+#define DEBUGGER_TEST(name) \
+    TEST_F(ReplSessionTest, name) \
+    { \
+        readTest(#name ".test", [this](std::string input) { \
+            runReplTest(input, {"--debugger", "-f", goldenMaster(#name ".nix")}); \
+        }); \
+    }
+
+DEBUGGER_TEST(regression_9918);
+DEBUGGER_TEST(regression_9917);
+DEBUGGER_TEST(stack_vars);
+
+};
--- a/tests/functional/repl_characterization/test-session.cc
+++ b/tests/functional/repl_characterization/test-session.cc
@ -0,0 +1,151 @@
+#include <iostream>
+#include <unistd.h>
+
+#include "test-session.hh"
+#include "util.hh"
+#include "tests/debug-char.hh"
+
+namespace nix {
+
+static constexpr const bool DEBUG_REPL_PARSER = false;
+
+RunningProcess RunningProcess::start(std::string executable, Strings args)
+{
+    args.push_front(executable);
+
+    Pipe procStdin{};
+    Pipe procStdout{};
+
+    procStdin.create();
+    procStdout.create();
+
+    // This is separate from runProgram2 because we have different IO requirements
+    pid_t pid = startProcess([&]() {
+        if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1)
+            throw SysError("dupping stdout");
+        if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1)
+            throw SysError("dupping stdin");
+        procStdin.writeSide.close();
+        procStdout.readSide.close();
+        if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1)
+            throw SysError("dupping stderr");
+        execve(executable.c_str(), stringsToCharPtrs(args).data(), environ);
+        throw SysError("exec did not happen");
+    });
+
+    procStdout.writeSide.close();
+    procStdin.readSide.close();
+
+    return RunningProcess{
+        .pid = pid,
+        .procStdin = std::move(procStdin),
+        .procStdout = std::move(procStdout),
+    };
+}
+
+[[gnu::unused]]
+std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s)
+{
+    switch (s) {
+    case ReplOutputParser::State::Prompt:
+        os << "prompt";
+        break;
+    case ReplOutputParser::State::Context:
+        os << "context";
+        break;
+    }
+    return os;
+}
+
+void ReplOutputParser::transition(State new_state, char responsible_char, bool wasPrompt)
+{
+    if constexpr (DEBUG_REPL_PARSER) {
+        std::cerr << "transition " << new_state << " for " << DebugChar{responsible_char}
+                  << (wasPrompt ? " [prompt]" : "") << "\n";
+    }
+    state = new_state;
+    pos_in_prompt = 0;
+}
+
+bool ReplOutputParser::feed(char c)
+{
+    if (c == '\n') {
+        transition(State::Prompt, c);
+        return false;
+    }
+    switch (state) {
+    case State::Context:
+        break;
+    case State::Prompt:
+        if (pos_in_prompt == prompt.length() - 1 && prompt[pos_in_prompt] == c) {
+            transition(State::Context, c, true);
+            return true;
+        }
+        if (pos_in_prompt >= prompt.length() - 1 || prompt[pos_in_prompt] != c) {
+            transition(State::Context, c);
+            break;
+        }
+        pos_in_prompt++;
+        break;
+    }
+    return false;
+}
+
+/** Waits for the prompt and then returns if a prompt was found */
+bool TestSession::waitForPrompt()
+{
+    std::vector<char> buf(1024);
+
+    for (;;) {
+        ssize_t res = read(proc.procStdout.readSide.get(), buf.data(), buf.size());
+
+        if (res < 0) {
+            throw SysError("read");
+        }
+        if (res == 0) {
+            return false;
+        }
+
+        bool foundPrompt = false;
+        for (ssize_t i = 0; i < res; ++i) {
+            // foundPrompt = foundPrompt || outputParser.feed(buf[i]);
+            bool wasEaten = true;
+            eater.feed(buf[i], [&](char c) {
+                wasEaten = false;
+                foundPrompt = outputParser.feed(buf[i]) || foundPrompt;
+
+                outLog.push_back(c);
+            });
+
+            if constexpr (DEBUG_REPL_PARSER) {
+                std::cerr << "raw " << DebugChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n";
+            }
+        }
+
+        if (foundPrompt) {
+            return true;
+        }
+    }
+}
+
+void TestSession::close()
+{
+    proc.procStdin.close();
+    proc.procStdout.close();
+}
+
+void TestSession::runCommand(std::string command)
+{
+    if constexpr (DEBUG_REPL_PARSER)
+        std::cerr << "runCommand " << command << "\n";
+    command += "\n";
+    // We have to feed a newline into the output parser, since Nix might not
+    // give us a newline before a prompt in all cases (it might clear line
+    // first, e.g.)
+    outputParser.feed('\n');
+    // Echo is disabled, so we have to make our own
+    outLog.append(command);
+    writeFull(proc.procStdin.writeSide.get(), command, false);
+}
+
+};
--- a/tests/functional/repl_characterization/test-session.hh
+++ b/tests/functional/repl_characterization/test-session.hh
@ -0,0 +1,69 @@
+#pragma once
+///@file
+
+#include <sched.h>
+#include <string>
+
+#include "util.hh"
+#include "tests/terminal-code-eater.hh"
+
+namespace nix {
+
+struct RunningProcess
+{
+    pid_t pid;
+    Pipe procStdin;
+    Pipe procStdout;
+
+    static RunningProcess start(std::string executable, Strings args);
+};
+
+/** DFA that catches repl prompts */
+class ReplOutputParser
+{
+public:
+    ReplOutputParser(std::string prompt)
+        : prompt(prompt)
+    {
+        assert(!prompt.empty());
+    }
+    /** Feeds in a character and returns whether this is an open prompt */
+    bool feed(char c);
+
+    enum class State {
+        Prompt,
+        Context,
+    };
+
+private:
+    State state = State::Prompt;
+    size_t pos_in_prompt = 0;
+    std::string const prompt;
+
+    void transition(State state, char responsible_char, bool wasPrompt = false);
+};
+
+struct TestSession
+{
+    RunningProcess proc;
+    ReplOutputParser outputParser;
+    TerminalCodeEater eater;
+    std::string outLog;
+    std::string prompt;
+
+    TestSession(std::string prompt, RunningProcess && proc)
+        : proc(std::move(proc))
+        , outputParser(prompt)
+        , eater({})
+        , outLog({})
+        , prompt(prompt)
+    {
+    }
+
+    bool waitForPrompt();
+
+    void runCommand(std::string command);
+
+    void close();
+};
+};
--- a/tests/functional/test-repl-characterization.sh
+++ b/tests/functional/test-repl-characterization.sh
@ -0,0 +1,3 @@
+source common.sh
+
+_NIX_TEST_UNIT_DATA=$(pwd)/repl_characterization/data ./repl_characterization/test-repl-characterization
--- a/tests/unit/libutil-support/local.mk
+++ b/tests/unit/libutil-support/local.mk
@ -10,6 +10,7 @@ libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc)

 libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES)

-libutil-test-support_LIBS = libutil
+# libexpr so we can steal their string printer from print.cc
+libutil-test-support_LIBS = libutil libexpr

 libutil-test-support_LDFLAGS := -pthread -lrapidcheck
--- a/tests/unit/libutil-support/tests/cli-literate-parser.cc
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc
@ -0,0 +1,246 @@
+#include "cli-literate-parser.hh"
+#include "libexpr/print.hh"
+#include "debug-char.hh"
+#include "types.hh"
+#include "util.hh"
+#include <iostream>
+#include <memory>
+#include <boost/algorithm/string/trim.hpp>
+
+using namespace std::string_literals;
+
+namespace nix {
+
+static constexpr const bool DEBUG_PARSER = false;
+
+constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char *
+{
+    return std::visit(
+        overloaded{// clang-format off
+            [](Indent const&) -> const char * { return "indent"; },
+            [](Commentary const&) -> const char * { return "indent"; },
+            [](Prompt const&) -> const char * { return "prompt"; },
+            [](Command const&) -> const char * { return "command"; },
+            [](OutputLine const&) -> const char * { return "output_line"; }},
+        // clang-format on
+        s);
+}
+
+auto CLILiterateParser::Node::print() const -> std::string
+{
+    std::ostringstream s{};
+    switch (kind) {
+    case NodeKind::COMMENTARY:
+        s << "Commentary ";
+        break;
+    case NodeKind::COMMAND:
+        s << "Command ";
+        break;
+    case NodeKind::OUTPUT:
+        s << "Output ";
+        break;
+    }
+    printLiteralString(s, this->text);
+    return s.str();
+}
+
+void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os)
+{
+    for (auto & node : nodes) {
+        *os << node.print() << "\\n";
+    }
+}
+
+auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node>
+{
+    CLILiterateParser p{std::move(prompt), indent};
+    p.feed(input);
+    return std::move(p).intoSyntax();
+}
+
+auto CLILiterateParser::intoSyntax() && -> std::vector<Node>
+{
+    return std::move(this->syntax_);
+}
+
+CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent)
+    : state_(indent == 0 ? State(Prompt{}) : State(Indent{}))
+    , prompt_(prompt)
+    , indent_(indent)
+    , lastWasOutput_(false)
+    , syntax_{}
+{
+    assert(!prompt.empty());
+}
+
+void CLILiterateParser::feed(char c)
+{
+    if constexpr (DEBUG_PARSER) {
+        std::cout << stateDebug(state_) << " " << DebugChar{c} << "\n";
+    }
+
+    if (c == '\n') {
+        onNewline();
+        return;
+    }
+
+    std::visit(
+        overloaded{
+            [&](Indent & s) {
+                if (c == ' ') {
+                    if (++s.pos >= indent_) {
+                        transition(Prompt{});
+                    }
+                } else {
+                    transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}});
+                }
+            },
+            [&](Prompt & s) {
+                if (s.pos >= prompt_.length()) {
+                    transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}});
+                    return;
+                } else if (c == prompt_[s.pos]) {
+                    // good prompt character
+                    ++s.pos;
+                } else {
+                    // didn't match the prompt, so it must have actually been output.
+                    s.lineAccumulator.push_back(c);
+                    transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
+                    return;
+                }
+                s.lineAccumulator.push_back(c);
+            },
+            [&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
+        state_);
+}
+
+void CLILiterateParser::onNewline()
+{
+    State lastState = std::move(state_);
+    bool newLastWasOutput = false;
+
+    syntax_.push_back(std::visit(
+        overloaded{
+            [&](Indent & s) {
+                // XXX: technically this eats trailing spaces
+
+                // a newline following output is considered part of that output
+                if (lastWasOutput_) {
+                    newLastWasOutput = true;
+                    return Node::mkOutput("");
+                }
+                return Node::mkCommentary("");
+            },
+            [&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); },
+            [&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); },
+            [&](OutputLine & s) {
+                newLastWasOutput = true;
+                return Node::mkOutput(std::move(s.lineAccumulator));
+            },
+            [&](Prompt & s) {
+                // INDENT followed by newline is also considered a blank output line
+                return Node::mkOutput(std::move(s.lineAccumulator));
+            }},
+        lastState));
+
+    transition(Indent{});
+    lastWasOutput_ = newLastWasOutput;
+}
+
+void CLILiterateParser::feed(std::string_view s)
+{
+    for (char ch : s) {
+        feed(ch);
+    }
+}
+
+void CLILiterateParser::transition(State new_state)
+{
+    // When we expect INDENT and we are parsing without indents, commentary
+    // cannot exist, so we want to transition directly into PROMPT before
+    // resuming normal processing.
+    if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) {
+        new_state = Prompt{AccumulatingState{}, i->pos};
+    }
+
+    state_ = new_state;
+}
+
+auto CLILiterateParser::syntax() const -> std::vector<Node> const &
+{
+    return syntax_;
+}
+
+auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
+    -> std::string
+{
+    std::string indent_str(indent, ' ');
+    std::ostringstream out{};
+
+    for (auto & node : syntax) {
+        switch (node.kind) {
+        case NodeKind::COMMENTARY:
+            // TODO: should not ignore commentary
+            break;
+        case NodeKind::COMMAND:
+            out << indent_str << prompt << node.text << "\n";
+            break;
+        case NodeKind::OUTPUT:
+            out << indent_str << node.text << "\n";
+            break;
+        }
+    }
+
+    return out.str();
+}
+
+void CLILiterateParser::tidyOutputForComparison(std::vector<Node> & syntax)
+{
+    std::vector<Node> newSyntax{};
+
+    // Eat trailing newlines, so assume that the very end was actually a command
+    bool lastWasCommand = true;
+    bool newLastWasCommand = true;
+
+    auto v = std::ranges::reverse_view(syntax);
+
+    for (auto it = v.begin(); it != v.end(); ++it) {
+        Node item = *it;
+
+        lastWasCommand = newLastWasCommand;
+        // chomp commentary
+        if (item.kind == NodeKind::COMMENTARY) {
+            continue;
+        }
+
+        if (item.kind == NodeKind::COMMAND) {
+            newLastWasCommand = true;
+
+            if (item.text == "") {
+                // chomp empty commands
+                continue;
+            }
+        }
+
+        if (item.kind == NodeKind::OUTPUT) {
+            // TODO: horrible
+            bool nextIsCommand = (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND;
+            std::string trimmedText = boost::algorithm::trim_right_copy(item.text);
+            if ((lastWasCommand || nextIsCommand) && trimmedText == "") {
+                // chomp empty text above or directly below commands
+                continue;
+            }
+
+            // real output, stop chomping
+            newLastWasCommand = false;
+
+            item = Node::mkOutput(std::move(trimmedText));
+        }
+        newSyntax.push_back(std::move(item));
+    }
+
+    std::reverse(newSyntax.begin(), newSyntax.end());
+    syntax = std::move(newSyntax);
+}
+
+};
--- a/tests/unit/libutil-support/tests/cli-literate-parser.hh
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh
@ -0,0 +1,134 @@
+#pragma once
+///@file
+
+#include <compare>
+#include <memory>
+#include <sstream>
+#include <variant>
+#include <vector>
+#include <string>
+
+namespace nix {
+/*
+ * A DFA parser for literate test cases for CLIs.
+ *
+ * FIXME: implement merging of these, so you can auto update cases that have
+ * comments.
+ *
+ * Format:
+ * COMMENTARY
+ * INDENT PROMPT COMMAND
+ * INDENT OUTPUT
+ *
+ * e.g.
+ * commentary commentary commentary
+ *   nix-repl> :t 1
+ *   an integer
+ *
+ * Yields:
+ * Commentary "commentary commentary commentary"
+ * Command ":t 1"
+ * Output "an integer"
+ *
+ * Note: one Output line is generated for each line of the sources, because
+ * this is effectively necessary to be able to align them in the future to
+ * auto-update tests.
+ */
+class CLILiterateParser
+{
+public:
+
+    enum class NodeKind {
+        COMMENTARY,
+        COMMAND,
+        OUTPUT,
+    };
+
+    struct Node
+    {
+        NodeKind kind;
+        std::string text;
+        std::strong_ordering operator<=>(Node const &) const = default;
+
+        static Node mkCommentary(std::string text)
+        {
+            return Node{.kind = NodeKind::COMMENTARY, .text = text};
+        }
+
+        static Node mkCommand(std::string text)
+        {
+            return Node{.kind = NodeKind::COMMAND, .text = text};
+        }
+
+        static Node mkOutput(std::string text)
+        {
+            return Node{.kind = NodeKind::OUTPUT, .text = text};
+        }
+
+        auto print() const -> std::string;
+    };
+
+    CLILiterateParser(std::string prompt, size_t indent = 2);
+
+    auto syntax() const -> std::vector<Node> const &;
+
+    /** Feeds a character into the parser */
+    void feed(char c);
+
+    /** Feeds a string into the parser */
+    void feed(std::string_view s);
+
+    /** Parses an input in a non-streaming fashion */
+    static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
+
+    /** Returns, losslessly, the string that would have generated a syntax tree */
+    static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
+
+    /** Consumes a CLILiterateParser and gives you the syntax out of it */
+    auto intoSyntax() && -> std::vector<Node>;
+
+    /** Tidies syntax to remove trailing whitespace from outputs and remove any
+     * empty prompts */
+    static void tidyOutputForComparison(std::vector<Node> & syntax);
+
+private:
+
+    struct AccumulatingState
+    {
+        std::string lineAccumulator;
+    };
+    struct Indent
+    {
+        size_t pos = 0;
+    };
+    struct Commentary : public AccumulatingState
+    {};
+    struct Prompt : AccumulatingState
+    {
+        size_t pos = 0;
+    };
+    struct Command : public AccumulatingState
+    {};
+    struct OutputLine : public AccumulatingState
+    {};
+
+    using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
+    State state_;
+
+    constexpr static auto stateDebug(State const&) -> const char *;
+
+    const std::string prompt_;
+    const size_t indent_;
+
+    /** Last line was output, so we consider a blank to be part of the output */
+    bool lastWasOutput_;
+
+    std::vector<Node> syntax_;
+
+    void transition(State newState);
+    void onNewline();
+};
+
+// Override gtest printing for lists of nodes
+void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os);
+};
--- a/tests/unit/libutil-support/tests/debug-char.hh
+++ b/tests/unit/libutil-support/tests/debug-char.hh
@ -0,0 +1,24 @@
+///@file
+#include <ostream>
+#include <boost/io/ios_state.hpp>
+
+namespace nix {
+
+struct DebugChar
+{
+    char c;
+};
+
+inline std::ostream & operator<<(std::ostream & s, DebugChar c)
+{
+    boost::io::ios_flags_saver _ifs(s);
+
+    if (isprint(c.c)) {
+        s << static_cast<char>(c.c);
+    } else {
+        s << std::hex << "0x" << (static_cast<unsigned int>(c.c) & 0xff);
+    }
+    return s;
+}
+
+}
--- a/tests/unit/libutil-support/tests/terminal-code-eater.cc
+++ b/tests/unit/libutil-support/tests/terminal-code-eater.cc
@ -0,0 +1,85 @@
+#include "terminal-code-eater.hh"
+#include "debug-char.hh"
+#include <assert.h>
+#include <cstdint>
+#include <iostream>
+
+namespace nix {
+
+static constexpr const bool DEBUG_EATER = false;
+
+void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
+{
+    auto isParamChar = [](char v) -> bool { return v >= 0x30 && v <= 0x3f; };
+    auto isIntermediateChar = [](char v) -> bool { return v >= 0x20 && v <= 0x2f; };
+    auto isFinalChar = [](char v) -> bool { return v >= 0x40 && v <= 0x7e; };
+    if constexpr (DEBUG_EATER) {
+        std::cerr << "eater" << DebugChar{c} << "\n";
+    }
+
+    switch (state) {
+    case State::ExpectESC:
+        switch (c) {
+        case '\e':
+            transition(State::ExpectESCSeq);
+            return;
+        // Just eat \r, since it is part of clearing a line
+        case '\r':
+            return;
+        }
+        if constexpr (DEBUG_EATER) {
+            std::cerr << "eater uneat" << DebugChar{c} << "\n";
+        }
+        on_char(c);
+        break;
+    case State::ExpectESCSeq:
+        switch (c) {
+        // CSI
+        case '[':
+            transition(State::InCSIParams);
+            return;
+        default:
+            transition(State::ExpectESC);
+            return;
+        }
+        break;
+    // https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences
+    // A CSI sequence is: CSI [\x30-\x3f]* [\x20-\x2f]* [\x40-\x7e]
+    //                        ^ params     ^ intermediates ^ final byte
+    case State::InCSIParams:
+        if (isFinalChar(c)) {
+            transition(State::ExpectESC);
+            return;
+        } else if (isIntermediateChar(c)) {
+            transition(State::InCSIIntermediates);
+            return;
+        } else if (isParamChar(c)) {
+            return;
+        } else {
+            // Corrupt escape sequence? Throw an assert, for now.
+            // transition(State::ExpectESC);
+            assert(false && "Corrupt terminal escape sequence");
+            return;
+        }
+        break;
+    case State::InCSIIntermediates:
+        if (isFinalChar(c)) {
+            transition(State::ExpectESC);
+            return;
+        } else if (isIntermediateChar(c)) {
+            return;
+        } else {
+            // Corrupt escape sequence? Throw an assert, for now.
+            // transition(State::ExpectESC);
+            assert(false && "Corrupt terminal escape sequence in intermediates");
+            return;
+        }
+        break;
+    }
+}
+
+void TerminalCodeEater::transition(State new_state)
+{
+    state = new_state;
+}
+};
--- a/tests/unit/libutil-support/tests/terminal-code-eater.hh
+++ b/tests/unit/libutil-support/tests/terminal-code-eater.hh
@ -0,0 +1,29 @@
+#pragma once
+/// @file
+
+#include <functional>
+
+namespace nix {
+
+/** DFA that eats terminal escapes
+ *
+ * See: https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
+ */
+class TerminalCodeEater
+{
+public:
+    void feed(char c, std::function<void(char)> on_char);
+
+private:
+    enum class State {
+        ExpectESC,
+        ExpectESCSeq,
+        InCSIParams,
+        InCSIIntermediates,
+    };
+
+    State state = State::ExpectESC;
+
+    void transition(State new_state);
+};
+};
Author	SHA1	Message	Date
jade	dce253ee01	burn it all Change-Id: `I0db6eee7d6aae3e02ea0155c04a55a7142e456c3`	2024-03-13 22:03:42 -07:00
José Luis Lafuente	2a95127732	Add clang format configuration (cherry picked from commit 53fdcbca509b6c5dacaea3d3c465d86e49b0dd74) Change-Id: `I5446fd45de2bf644e34112f719afb3318a440b30`	2024-03-13 15:46:55 -07:00
jade	c9e77f0595	Allow dlopen of plugins to fail It happens with some frequency that plugins that might be unimportant to the evaluation at hand mismatch with the nix version, leading to spurious load failures. Let's make these non fatal. Change-Id: `Iba10e951d171725ccf1a121bcd9be1e1d6ad69eb`	2024-03-13 15:46:55 -07:00
jade	063d436c56	Test that :st does ... something Change-Id: `I97c00b5eb1288f68d8c2b484436cc185d040b8b2`	2024-03-13 15:46:55 -07:00
jade	d2e6fec7ce	repl_characterization: Also verify the stack trace exists Change-Id: `I8b2d8211a24011fae1586a1182d7d0772a039cd7`	2024-03-13 15:46:55 -07:00
jade	212654d68f	repl_characterization: eat newlines after commands and source-dir paths This is because they are unrepresentable in the source files with commentary but not in the output, so we should just eat them in normalization. It's ok. Change-Id: `I2cb7e8b3fc7b00874885bb287cbaa200b41cb16b`	2024-03-13 15:46:55 -07:00
jade	9c3a1babe6	Add regression tests for #9917 , #9918 Change-Id: `Ib0591e1499c5dba5e5a83ee75a899c9d16986827`	2024-03-13 15:46:55 -07:00
jade	b85085157a	Implement a repl characterization test system This allows for automating using the repl without needing a PTY, with very easy to write test files. Change-Id: `Ia8d7854edd91f93477638942cb6fc261354e6035`	2024-03-13 15:46:55 -07:00
jade	ed95b02215	Implement a parser for a literate testing system for the repl This parser can be reused for other purposes. It's inspired by https://bitheap.org/cram/ Although eelco's impostor exists https://github.com/mobusoperandi/eelco, it is not very nice to depend on out of tree testing frameworks with no way to customize them. Change-Id: `Ifca50177e09730182baf0ebf829c3505bbb0274a`	2024-03-13 15:46:55 -07:00