From 19e0ce2c03d8e0baa16998b086665664c420c1df Mon Sep 17 00:00:00 2001 From: Jade Lovelace Date: Sat, 24 Aug 2024 21:11:30 -0700 Subject: [PATCH] main: log stack traces for std::terminate These stack traces kind of suck for the reasons mentioned on the CppTrace page here (no symbols for inline functions is a major one): https://github.com/jeremy-rifkin/cpptrace I would consider using CppTrace if it were packaged, but to be honest, I think that the more reasonable option is actually to move entirely to out-of-process crash handling and symbolization. The reason for this is that if you want to generate anything of substance on SIGSEGV or really any deadly signal, you are stuck in async-signal-safe land, which is not a place to be trying to run a symbolizer. LLVM does it anyway, probably carefully, and chromium *can* do it on debug builds but in general uses crashpad: https://source.chromium.org/chromium/chromium/src/+/main:base/debug/stack_trace_posix.cc;l=974;drc=82dff63dbf9db05e9274e11d9128af7b9f51ceaa;bpv=1;bpt=1 However, some stack traces are better than *no* stack traces when we get mystery exceptions falling out the bottom of the program. I've also promoted the path for "mystery exceptions falling out the bottom of the program" to hard crash and generate a core dump because although there's been some months since the last one of these, these are nonetheless always *atrociously* diagnosed. We can't improve the crash handling further until either we use Crashpad (which involves more C++ deps, no thanks) or we put in the ostensibly work in progress Rust minidump infrastructure, in which case we need to finish full support for Rust in libutil first. Sample report: Lix crashed. This is a bug. We would appreciate if you report it at https://git.lix.systems/lix-project/lix/issues with the following information included: Exception: std::runtime_error: lol Stack trace: 0# nix::printStackTrace() in /home/jade/lix/lix3/build/src/nix/../libutil/liblixutil.so 1# 0x000073C9862331F2 in /home/jade/lix/lix3/build/src/nix/../libmain/liblixmain.so 2# 0x000073C985F2E21A in /nix/store/p44qan69linp3ii0xrviypsw2j4qdcp2-gcc-13.2.0-lib/lib/libstdc++.so.6 3# 0x000073C985F2E285 in /nix/store/p44qan69linp3ii0xrviypsw2j4qdcp2-gcc-13.2.0-lib/lib/libstdc++.so.6 4# nix::handleExceptions(std::__cxx11::basic_string, std::allocator > const&, std::function) in /home/jade/lix/lix3/build/src/nix/../libmain/liblixmain.so 5# 0x00005CF65B6B048B in /home/jade/lix/lix3/build/src/nix/nix 6# 0x000073C985C8810E in /nix/store/dbcw19dshdwnxdv5q2g6wldj6syyvq7l-glibc-2.39-52/lib/libc.so.6 7# __libc_start_main in /nix/store/dbcw19dshdwnxdv5q2g6wldj6syyvq7l-glibc-2.39-52/lib/libc.so.6 8# 0x00005CF65B610335 in /home/jade/lix/lix3/build/src/nix/nix Change-Id: I1a9f6d349b617fd7145a37159b78ecb9382cb4e9 --- doc/manual/rl-next/stack-traces.md | 26 ++++++++++++++ src/libmain/crash-handler.cc | 41 ++++++++++++++++++++++ src/libmain/crash-handler.hh | 21 +++++++++++ src/libmain/meson.build | 2 ++ src/libmain/shared.cc | 14 +++++--- src/libmain/shared.hh | 2 +- src/libstore/globals.cc | 2 +- tests/unit/libmain/crash.cc | 56 ++++++++++++++++++++++++++++++ tests/unit/meson.build | 7 +++- 9 files changed, 164 insertions(+), 7 deletions(-) create mode 100644 doc/manual/rl-next/stack-traces.md create mode 100644 src/libmain/crash-handler.cc create mode 100644 src/libmain/crash-handler.hh create mode 100644 tests/unit/libmain/crash.cc diff --git a/doc/manual/rl-next/stack-traces.md b/doc/manual/rl-next/stack-traces.md new file mode 100644 index 000000000..e16d6c886 --- /dev/null +++ b/doc/manual/rl-next/stack-traces.md @@ -0,0 +1,26 @@ +--- +synopsis: "Some Lix crashes now produce reporting instructions and a stack trace, then abort" +cls: [1854] +category: Improvements +credits: jade +--- + +Lix, being a C++ program, can crash in a few kinds of ways. +It can obviously do a memory access violation, which will generate a core dump and thus be relatively debuggable. +But, worse, it could throw an unhandled exception, and, in the past, we would just show the message but not where it comes from, in spite of this always being a bug, since we expect all such errors to be translated to a Lix specific error. +Now the latter kind of bug should print reporting instructions, a rudimentary stack trace and (depending on system configuration) generate a core dump. + +Sample output: + +``` +Lix crashed. This is a bug. We would appreciate if you report it along with what caused it at https://git.lix.systems/lix-project/lix/issues with the following information included: + +Exception: std::runtime_error: test exception +Stack trace: + 0# nix::printStackTrace() in /home/jade/lix/lix3/build/src/nix/../libutil/liblixutil.so + 1# 0x000073C9862331F2 in /home/jade/lix/lix3/build/src/nix/../libmain/liblixmain.so + 2# 0x000073C985F2E21A in /nix/store/p44qan69linp3ii0xrviypsw2j4qdcp2-gcc-13.2.0-lib/lib/libstdc++.so.6 + 3# 0x000073C985F2E285 in /nix/store/p44qan69linp3ii0xrviypsw2j4qdcp2-gcc-13.2.0-lib/lib/libstdc++.so.6 + 4# nix::handleExceptions(std::__cxx11::basic_string, std::allocator > const&, std::function) in /home/jade/lix/lix3/build/src/nix/../libmain/liblixmain.so + ... +``` diff --git a/src/libmain/crash-handler.cc b/src/libmain/crash-handler.cc new file mode 100644 index 000000000..3f1b9f7d8 --- /dev/null +++ b/src/libmain/crash-handler.cc @@ -0,0 +1,41 @@ +#include "crash-handler.hh" +#include "fmt.hh" + +#include +#include + +namespace nix { + +namespace { +void onTerminate() +{ + std::cerr << "Lix crashed. This is a bug. We would appreciate if you report it along with what caused it at https://git.lix.systems/lix-project/lix/issues with the following information included:\n\n"; + try { + std::exception_ptr eptr = std::current_exception(); + if (eptr) { + std::rethrow_exception(eptr); + } else { + std::cerr << "std::terminate() called without exception\n"; + } + } catch (const std::exception & ex) { + std::cerr << "Exception: " << boost::core::demangle(typeid(ex).name()) << ": " << ex.what() << "\n"; + } catch (...) { + std::cerr << "Unknown exception! Spooky.\n"; + } + + std::cerr << "Stack trace:\n"; + nix::printStackTrace(); + + std::abort(); +} +} + +void registerCrashHandler() +{ + // DO NOT use this for signals. Boost stacktrace is very much not + // async-signal-safe, and in a world with ASLR, addr2line is pointless. + // + // If you want signals, set up a minidump system and do it out-of-process. + std::set_terminate(onTerminate); +} +} diff --git a/src/libmain/crash-handler.hh b/src/libmain/crash-handler.hh new file mode 100644 index 000000000..4c5641b8c --- /dev/null +++ b/src/libmain/crash-handler.hh @@ -0,0 +1,21 @@ +#pragma once +/// @file Crash handler for Lix that prints back traces (hopefully in instances where it is not just going to crash the process itself). +/* + * Author's note: This will probably be partially/fully supplanted by a + * minidump writer like the following once we get our act together on crashes a + * little bit more: + * https://github.com/rust-minidump/minidump-writer + * https://github.com/EmbarkStudios/crash-handling + * (out of process implementation *should* be able to be done on-demand) + * + * Such an out-of-process implementation could then both make minidumps and + * print stack traces for arbitrarily messed-up process states such that we can + * safely give out backtraces for SIGSEGV and other deadly signals. + */ + +namespace nix { + +/** Registers the Lix crash handler for std::terminate (currently; will support more crashes later). See also detectStackOverflow(). */ +void registerCrashHandler(); + +} diff --git a/src/libmain/meson.build b/src/libmain/meson.build index a7cce287c..a1a888c16 100644 --- a/src/libmain/meson.build +++ b/src/libmain/meson.build @@ -1,5 +1,6 @@ libmain_sources = files( 'common-args.cc', + 'crash-handler.cc', 'loggers.cc', 'progress-bar.cc', 'shared.cc', @@ -8,6 +9,7 @@ libmain_sources = files( libmain_headers = files( 'common-args.hh', + 'crash-handler.hh', 'loggers.hh', 'progress-bar.hh', 'shared.hh', diff --git a/src/libmain/shared.cc b/src/libmain/shared.cc index bc9548e09..64bd00606 100644 --- a/src/libmain/shared.cc +++ b/src/libmain/shared.cc @@ -1,3 +1,4 @@ +#include "crash-handler.hh" #include "globals.hh" #include "shared.hh" #include "store-api.hh" @@ -118,6 +119,8 @@ static void sigHandler(int signo) { } void initNix() { + registerCrashHandler(); + /* Turn on buffering for cerr. */ static char buf[1024]; std::cerr.rdbuf()->pubsetbuf(buf, sizeof(buf)); @@ -335,12 +338,15 @@ int handleExceptions(const std::string & programName, std::function fun) } catch (BaseError & e) { logError(e.info()); return e.info().status; - } catch (std::bad_alloc & e) { + } catch (const std::bad_alloc & e) { printError(error + "out of memory"); return 1; - } catch (std::exception & e) { - printError(error + e.what()); - return 1; + } catch (const std::exception & e) { + // Random exceptions bubbling into main are cause for bug reports, crash + std::terminate(); + } catch (...) { + // Explicitly do not tolerate non-std exceptions escaping. + std::terminate(); } return 0; diff --git a/src/libmain/shared.hh b/src/libmain/shared.hh index b41efe567..49b72a54e 100644 --- a/src/libmain/shared.hh +++ b/src/libmain/shared.hh @@ -111,7 +111,7 @@ struct PrintFreed /** - * Install a SIGSEGV handler to detect stack overflows. + * Install a SIGSEGV handler to detect stack overflows. See also registerCrashHandler(). */ void detectStackOverflow(); diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc index ffc2543ef..f43b759d2 100644 --- a/src/libstore/globals.cc +++ b/src/libstore/globals.cc @@ -443,7 +443,7 @@ static bool initLibStoreDone = false; void assertLibStoreInitialized() { if (!initLibStoreDone) { printError("The program must call nix::initNix() before calling any libstore library functions."); - abort(); + std::terminate(); }; } diff --git a/tests/unit/libmain/crash.cc b/tests/unit/libmain/crash.cc new file mode 100644 index 000000000..883dc39bd --- /dev/null +++ b/tests/unit/libmain/crash.cc @@ -0,0 +1,56 @@ +#include +#include "crash-handler.hh" + +namespace nix { + +class OopsException : public std::exception +{ + const char * msg; + +public: + OopsException(const char * msg) : msg(msg) {} + const char * what() const noexcept override + { + return msg; + } +}; + +void causeCrashForTesting(std::function fixture) +{ + registerCrashHandler(); + std::cerr << "time to crash\n"; + try { + fixture(); + } catch (...) { + std::terminate(); + } +} + +TEST(CrashHandler, exceptionName) +{ + ASSERT_DEATH( + causeCrashForTesting([]() { throw OopsException{"lol oops"}; }), + "time to crash\nLix crashed.*OopsException: lol oops" + ); +} + +TEST(CrashHandler, unknownTerminate) +{ + ASSERT_DEATH( + causeCrashForTesting([]() { std::terminate(); }), + "time to crash\nLix crashed.*std::terminate\\(\\) called without exception" + ); +} + +TEST(CrashHandler, nonStdException) +{ + ASSERT_DEATH( + causeCrashForTesting([]() { + // NOLINTNEXTLINE(hicpp-exception-baseclass): intentional + throw 4; + }), + "time to crash\nLix crashed.*Unknown exception! Spooky\\." + ); +} + +} diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 8ff0b5ec5..894f48b2a 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -262,9 +262,14 @@ test( protocol : 'gtest', ) +libmain_tests_sources = files( + 'libmain/crash.cc', + 'libmain/progress-bar.cc', +) + libmain_tester = executable( 'liblixmain-tests', - files('libmain/progress-bar.cc'), + libmain_tests_sources, dependencies : [ liblixmain, liblixexpr,