diff --git a/darwin_stop_world.c b/darwin_stop_world.c
index 0468aaec..b348d869 100644
--- a/darwin_stop_world.c
+++ b/darwin_stop_world.c
@@ -356,6 +356,7 @@ GC_INNER void GC_push_all_stacks(void)
   int nthreads = 0;
   word total_size = 0;
   mach_msg_type_number_t listcount = (mach_msg_type_number_t)THREAD_TABLE_SZ;
+  size_t stack_limit;
   if (!EXPECT(GC_thr_initialized, TRUE))
     GC_thr_init();
 
@@ -411,6 +412,19 @@ GC_INNER void GC_push_all_stacks(void)
             GC_push_all_stack_sections(lo, hi, p->traced_stack_sect);
           }
           if (altstack_lo) {
+            // When a thread goes into a coroutine, we lose its original sp until
+            // control flow returns to the thread.
+            // While in the coroutine, the sp points outside the thread stack,
+            // so we can detect this and push the entire thread stack instead,
+            // as an approximation.
+            // We assume that the coroutine has similarly added its entire stack.
+            // This could be made accurate by cooperating with the application
+            // via new functions and/or callbacks.
+            stack_limit = pthread_get_stacksize_np(p->id);
+            if (altstack_lo >= altstack_hi || altstack_lo < altstack_hi - stack_limit) { // sp outside stack
+              altstack_lo = altstack_hi - stack_limit;
+            }
+
             total_size += altstack_hi - altstack_lo;
             GC_push_all_stack(altstack_lo, altstack_hi);
           }
diff --git a/include/gc.h b/include/gc.h
index edab6c22..f2c61282 100644
--- a/include/gc.h
+++ b/include/gc.h
@@ -2172,6 +2172,11 @@ GC_API void GC_CALL GC_win32_free_heap(void);
         (*GC_amiga_allocwrapper_do)(a,GC_malloc_atomic_ignore_off_page)
 #endif /* _AMIGA && !GC_AMIGA_MAKINGLIB */
 
+#if !__APPLE__
+/* Patch doesn't work on apple */
+#define NIX_BOEHM_PATCH_VERSION 1
+#endif
+
 #ifdef __cplusplus
   } /* extern "C" */
 #endif
diff --git a/pthread_stop_world.c b/pthread_stop_world.c
index b5d71e62..aed7b0bf 100644
--- a/pthread_stop_world.c
+++ b/pthread_stop_world.c
@@ -768,6 +768,8 @@ STATIC void GC_restart_handler(int sig)
 /* world is stopped.  Should not fail if it isn't.                      */
 GC_INNER void GC_push_all_stacks(void)
 {
+    size_t stack_limit;
+    pthread_attr_t pattr;
     GC_bool found_me = FALSE;
     size_t nthreads = 0;
     int i;
@@ -851,6 +853,37 @@ GC_INNER void GC_push_all_stacks(void)
           hi = p->altstack + p->altstack_size;
           /* FIXME: Need to scan the normal stack too, but how ? */
           /* FIXME: Assume stack grows down */
+        } else {
+#ifdef HAVE_PTHREAD_ATTR_GET_NP
+          if (!pthread_attr_init(&pattr)
+              || !pthread_attr_get_np(p->id, &pattr))
+#else /* HAVE_PTHREAD_GETATTR_NP */
+          if (pthread_getattr_np(p->id, &pattr))
+#endif
+          {
+            ABORT("GC_push_all_stacks: pthread_getattr_np failed!");
+          }
+          if (pthread_attr_getstacksize(&pattr, &stack_limit)) {
+            ABORT("GC_push_all_stacks: pthread_attr_getstacksize failed!");
+          }
+          if (pthread_attr_destroy(&pattr)) {
+            ABORT("GC_push_all_stacks: pthread_attr_destroy failed!");
+          }
+          // When a thread goes into a coroutine, we lose its original sp until
+          // control flow returns to the thread.
+          // While in the coroutine, the sp points outside the thread stack,
+          // so we can detect this and push the entire thread stack instead,
+          // as an approximation.
+          // We assume that the coroutine has similarly added its entire stack.
+          // This could be made accurate by cooperating with the application
+          // via new functions and/or callbacks.
+          #ifndef STACK_GROWS_UP
+            if (lo >= hi || lo < hi - stack_limit) { // sp outside stack
+              lo = hi - stack_limit;
+            }
+          #else
+          #error "STACK_GROWS_UP not supported in boost_coroutine2 (as of june 2021), so we don't support it in Nix."
+          #endif
         }
         GC_push_all_stack_sections(lo, hi, traced_stack_sect);
 #       ifdef STACK_GROWS_UP