diff --git a/doc/manual/rl-next/stack-traces.md b/doc/manual/rl-next/stack-traces.md new file mode 100644 index 000000000..e16d6c886 --- /dev/null +++ b/doc/manual/rl-next/stack-traces.md @@ -0,0 +1,26 @@ +--- +synopsis: "Some Lix crashes now produce reporting instructions and a stack trace, then abort" +cls: [1854] +category: Improvements +credits: jade +--- + +Lix, being a C++ program, can crash in a few kinds of ways. +It can obviously do a memory access violation, which will generate a core dump and thus be relatively debuggable. +But, worse, it could throw an unhandled exception, and, in the past, we would just show the message but not where it comes from, in spite of this always being a bug, since we expect all such errors to be translated to a Lix specific error. +Now the latter kind of bug should print reporting instructions, a rudimentary stack trace and (depending on system configuration) generate a core dump. + +Sample output: + +``` +Lix crashed. This is a bug. We would appreciate if you report it along with what caused it at https://git.lix.systems/lix-project/lix/issues with the following information included: + +Exception: std::runtime_error: test exception +Stack trace: + 0# nix::printStackTrace() in /home/jade/lix/lix3/build/src/nix/../libutil/liblixutil.so + 1# 0x000073C9862331F2 in /home/jade/lix/lix3/build/src/nix/../libmain/liblixmain.so + 2# 0x000073C985F2E21A in /nix/store/p44qan69linp3ii0xrviypsw2j4qdcp2-gcc-13.2.0-lib/lib/libstdc++.so.6 + 3# 0x000073C985F2E285 in /nix/store/p44qan69linp3ii0xrviypsw2j4qdcp2-gcc-13.2.0-lib/lib/libstdc++.so.6 + 4# nix::handleExceptions(std::__cxx11::basic_string, std::allocator > const&, std::function) in /home/jade/lix/lix3/build/src/nix/../libmain/liblixmain.so + ... +``` diff --git a/src/libmain/crash-handler.cc b/src/libmain/crash-handler.cc new file mode 100644 index 000000000..3f1b9f7d8 --- /dev/null +++ b/src/libmain/crash-handler.cc @@ -0,0 +1,41 @@ +#include "crash-handler.hh" +#include "fmt.hh" + +#include +#include + +namespace nix { + +namespace { +void onTerminate() +{ + std::cerr << "Lix crashed. This is a bug. We would appreciate if you report it along with what caused it at https://git.lix.systems/lix-project/lix/issues with the following information included:\n\n"; + try { + std::exception_ptr eptr = std::current_exception(); + if (eptr) { + std::rethrow_exception(eptr); + } else { + std::cerr << "std::terminate() called without exception\n"; + } + } catch (const std::exception & ex) { + std::cerr << "Exception: " << boost::core::demangle(typeid(ex).name()) << ": " << ex.what() << "\n"; + } catch (...) { + std::cerr << "Unknown exception! Spooky.\n"; + } + + std::cerr << "Stack trace:\n"; + nix::printStackTrace(); + + std::abort(); +} +} + +void registerCrashHandler() +{ + // DO NOT use this for signals. Boost stacktrace is very much not + // async-signal-safe, and in a world with ASLR, addr2line is pointless. + // + // If you want signals, set up a minidump system and do it out-of-process. + std::set_terminate(onTerminate); +} +} diff --git a/src/libmain/crash-handler.hh b/src/libmain/crash-handler.hh new file mode 100644 index 000000000..4c5641b8c --- /dev/null +++ b/src/libmain/crash-handler.hh @@ -0,0 +1,21 @@ +#pragma once +/// @file Crash handler for Lix that prints back traces (hopefully in instances where it is not just going to crash the process itself). +/* + * Author's note: This will probably be partially/fully supplanted by a + * minidump writer like the following once we get our act together on crashes a + * little bit more: + * https://github.com/rust-minidump/minidump-writer + * https://github.com/EmbarkStudios/crash-handling + * (out of process implementation *should* be able to be done on-demand) + * + * Such an out-of-process implementation could then both make minidumps and + * print stack traces for arbitrarily messed-up process states such that we can + * safely give out backtraces for SIGSEGV and other deadly signals. + */ + +namespace nix { + +/** Registers the Lix crash handler for std::terminate (currently; will support more crashes later). See also detectStackOverflow(). */ +void registerCrashHandler(); + +} diff --git a/src/libmain/meson.build b/src/libmain/meson.build index a7cce287c..a1a888c16 100644 --- a/src/libmain/meson.build +++ b/src/libmain/meson.build @@ -1,5 +1,6 @@ libmain_sources = files( 'common-args.cc', + 'crash-handler.cc', 'loggers.cc', 'progress-bar.cc', 'shared.cc', @@ -8,6 +9,7 @@ libmain_sources = files( libmain_headers = files( 'common-args.hh', + 'crash-handler.hh', 'loggers.hh', 'progress-bar.hh', 'shared.hh', diff --git a/src/libmain/shared.cc b/src/libmain/shared.cc index bc9548e09..64bd00606 100644 --- a/src/libmain/shared.cc +++ b/src/libmain/shared.cc @@ -1,3 +1,4 @@ +#include "crash-handler.hh" #include "globals.hh" #include "shared.hh" #include "store-api.hh" @@ -118,6 +119,8 @@ static void sigHandler(int signo) { } void initNix() { + registerCrashHandler(); + /* Turn on buffering for cerr. */ static char buf[1024]; std::cerr.rdbuf()->pubsetbuf(buf, sizeof(buf)); @@ -335,12 +338,15 @@ int handleExceptions(const std::string & programName, std::function fun) } catch (BaseError & e) { logError(e.info()); return e.info().status; - } catch (std::bad_alloc & e) { + } catch (const std::bad_alloc & e) { printError(error + "out of memory"); return 1; - } catch (std::exception & e) { - printError(error + e.what()); - return 1; + } catch (const std::exception & e) { + // Random exceptions bubbling into main are cause for bug reports, crash + std::terminate(); + } catch (...) { + // Explicitly do not tolerate non-std exceptions escaping. + std::terminate(); } return 0; diff --git a/src/libmain/shared.hh b/src/libmain/shared.hh index b41efe567..49b72a54e 100644 --- a/src/libmain/shared.hh +++ b/src/libmain/shared.hh @@ -111,7 +111,7 @@ struct PrintFreed /** - * Install a SIGSEGV handler to detect stack overflows. + * Install a SIGSEGV handler to detect stack overflows. See also registerCrashHandler(). */ void detectStackOverflow(); diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc index ffc2543ef..f43b759d2 100644 --- a/src/libstore/globals.cc +++ b/src/libstore/globals.cc @@ -443,7 +443,7 @@ static bool initLibStoreDone = false; void assertLibStoreInitialized() { if (!initLibStoreDone) { printError("The program must call nix::initNix() before calling any libstore library functions."); - abort(); + std::terminate(); }; } diff --git a/tests/unit/libmain/crash.cc b/tests/unit/libmain/crash.cc new file mode 100644 index 000000000..883dc39bd --- /dev/null +++ b/tests/unit/libmain/crash.cc @@ -0,0 +1,56 @@ +#include +#include "crash-handler.hh" + +namespace nix { + +class OopsException : public std::exception +{ + const char * msg; + +public: + OopsException(const char * msg) : msg(msg) {} + const char * what() const noexcept override + { + return msg; + } +}; + +void causeCrashForTesting(std::function fixture) +{ + registerCrashHandler(); + std::cerr << "time to crash\n"; + try { + fixture(); + } catch (...) { + std::terminate(); + } +} + +TEST(CrashHandler, exceptionName) +{ + ASSERT_DEATH( + causeCrashForTesting([]() { throw OopsException{"lol oops"}; }), + "time to crash\nLix crashed.*OopsException: lol oops" + ); +} + +TEST(CrashHandler, unknownTerminate) +{ + ASSERT_DEATH( + causeCrashForTesting([]() { std::terminate(); }), + "time to crash\nLix crashed.*std::terminate\\(\\) called without exception" + ); +} + +TEST(CrashHandler, nonStdException) +{ + ASSERT_DEATH( + causeCrashForTesting([]() { + // NOLINTNEXTLINE(hicpp-exception-baseclass): intentional + throw 4; + }), + "time to crash\nLix crashed.*Unknown exception! Spooky\\." + ); +} + +} diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 5baf10de2..3d3930731 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -264,9 +264,14 @@ test( protocol : 'gtest', ) +libmain_tests_sources = files( + 'libmain/crash.cc', + 'libmain/progress-bar.cc', +) + libmain_tester = executable( 'liblixmain-tests', - files('libmain/progress-bar.cc'), + libmain_tests_sources, dependencies : [ liblixmain, liblixexpr,