diff --git a/boehmgc-coroutine-sp-fallback.diff b/boehmgc-coroutine-sp-fallback.diff new file mode 100644 index 000000000..fa8dd0325 --- /dev/null +++ b/boehmgc-coroutine-sp-fallback.diff @@ -0,0 +1,42 @@ +diff --git a/pthread_stop_world.c b/pthread_stop_world.c +index 1cee6a0b..46c3acd9 100644 +--- a/pthread_stop_world.c ++++ b/pthread_stop_world.c +@@ -674,6 +674,8 @@ GC_INNER void GC_push_all_stacks(void) + struct GC_traced_stack_sect_s *traced_stack_sect; + pthread_t self = pthread_self(); + word total_size = 0; ++ size_t stack_limit; ++ pthread_attr_t pattr; + + if (!EXPECT(GC_thr_initialized, TRUE)) + GC_thr_init(); +@@ -723,6 +725,28 @@ GC_INNER void GC_push_all_stacks(void) + hi = p->altstack + p->altstack_size; + /* FIXME: Need to scan the normal stack too, but how ? */ + /* FIXME: Assume stack grows down */ ++ } else { ++ if (pthread_getattr_np(p->id, &pattr)) { ++ ABORT("GC_push_all_stacks: pthread_getattr_np failed!"); ++ } ++ if (pthread_attr_getstacksize(&pattr, &stack_limit)) { ++ ABORT("GC_push_all_stacks: pthread_attr_getstacksize failed!"); ++ } ++ // When a thread goes into a coroutine, we lose its original sp until ++ // control flow returns to the thread. ++ // While in the coroutine, the sp points outside the thread stack, ++ // so we can detect this and push the entire thread stack instead, ++ // as an approximation. ++ // We assume that the coroutine has similarly added its entire stack. ++ // This could be made accurate by cooperating with the application ++ // via new functions and/or callbacks. ++ #ifndef STACK_GROWS_UP ++ if (lo >= hi || lo < hi - stack_limit) { // sp outside stack ++ lo = hi - stack_limit; ++ } ++ #else ++ #error "STACK_GROWS_UP not supported in boost_coroutine2 (as of june 2021), so we don't support it in Nix." ++ #endif + } + GC_push_all_stack_sections(lo, hi, traced_stack_sect); + # ifdef STACK_GROWS_UP diff --git a/flake.nix b/flake.nix index c97130ac9..999028ec2 100644 --- a/flake.nix +++ b/flake.nix @@ -104,7 +104,13 @@ }); propagatedDeps = - [ (boehmgc.override { enableLargeConfig = true; }) + [ ((boehmgc.override { + enableLargeConfig = true; + }).overrideAttrs(o: { + patches = (o.patches or []) ++ [ + ./boehmgc-coroutine-sp-fallback.diff + ]; + })) ]; perlDeps = diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index ef9f8efca..c078bf4a1 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -233,22 +233,34 @@ static void * oomHandler(size_t requested) } class BoehmGCStackAllocator : public StackAllocator { - boost::coroutines2::protected_fixedsize_stack stack { - // We allocate 8 MB, the default max stack size on NixOS. - // A smaller stack might be quicker to allocate but reduces the stack - // depth available for source filter expressions etc. - std::max(boost::context::stack_traits::default_size(), static_cast(8 * 1024 * 1024)) + boost::coroutines2::protected_fixedsize_stack stack { + // We allocate 8 MB, the default max stack size on NixOS. + // A smaller stack might be quicker to allocate but reduces the stack + // depth available for source filter expressions etc. + std::max(boost::context::stack_traits::default_size(), static_cast(8 * 1024 * 1024)) }; + // This is specific to boost::coroutines2::protected_fixedsize_stack. + // The stack protection page is included in sctx.size, so we have to + // subtract one page size from the stack size. + std::size_t pfss_usable_stack_size(boost::context::stack_context &sctx) { + return sctx.size - boost::context::stack_traits::page_size(); + } + public: boost::context::stack_context allocate() override { auto sctx = stack.allocate(); - GC_add_roots(static_cast(sctx.sp) - sctx.size, sctx.sp); + + // Stacks generally start at a high address and grow to lower addresses. + // Architectures that do the opposite are rare; in fact so rare that + // boost_routine does not implement it. + // So we subtract the stack size. + GC_add_roots(static_cast(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp); return sctx; } void deallocate(boost::context::stack_context sctx) override { - GC_remove_roots(static_cast(sctx.sp) - sctx.size, sctx.sp); + GC_remove_roots(static_cast(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp); stack.deallocate(sctx); }