From 524f89f1399724e596f61faba2c6861b1bb7b9c5 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 21 Aug 2014 14:08:09 +0200 Subject: [PATCH] Use unshare() instead of clone() It turns out that using clone() to start a child process is unsafe in a multithreaded program. It can cause the initialisation of a build child process to hang in setgroups(), as seen several times in the build farm: The reason is that Glibc thinks that the other threads of the parent exist in the child, so in setxid_mark_thread() it tries to get a futex that has been acquired by another thread just before the clone(). With fork(), Glibc runs pthread_atfork() handlers that take care of this (in particular, __reclaim_stacks()). But clone() doesn't do that. Fortunately, we can use fork()+unshare() instead of clone() to set up private namespaces. See also https://www.mail-archive.com/lxc-devel@lists.linuxcontainers.org/msg03434.html. --- src/libstore/build.cc | 90 +++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 55 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 856e5f820..b64c32d10 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -1575,13 +1575,6 @@ void chmod_(const Path & path, mode_t mode) } -int childEntry(void * arg) -{ - ((DerivationGoal *) arg)->initChild(); - return 1; -} - - void DerivationGoal::startBuilder() { startNest(nest, lvlInfo, format( @@ -1897,48 +1890,10 @@ void DerivationGoal::startBuilder() /* Create a pipe to get the output of the builder. */ builderOut.create(); - /* Fork a child to build the package. Note that while we - currently use forks to run and wait for the children, it - shouldn't be hard to use threads for this on systems where - fork() is unavailable or inefficient. - - If we're building in a chroot, then also set up private - namespaces for the build: - - - The PID namespace causes the build to start as PID 1. - Processes outside of the chroot are not visible to those on - the inside, but processes inside the chroot are visible from - the outside (though with different PIDs). - - - The private mount namespace ensures that all the bind mounts - we do will only show up in this process and its children, and - will disappear automatically when we're done. - - - The private network namespace ensures that the builder cannot - talk to the outside world (or vice versa). It only has a - private loopback interface. - - - The IPC namespace prevents the builder from communicating - with outside processes using SysV IPC mechanisms (shared - memory, message queues, semaphores). It also ensures that - all IPC objects are destroyed when the builder exits. - - - The UTS namespace ensures that builders see a hostname of - localhost rather than the actual hostname. - */ -#if CHROOT_ENABLED - if (useChroot) { - char stack[32 * 1024]; - pid = clone(childEntry, stack + sizeof(stack) - 8, - CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | SIGCHLD, this); - } else -#endif - { - pid = fork(); - if (pid == 0) initChild(); - } - - if (pid == -1) throw SysError("unable to fork"); + /* Fork a child to build the package. */ + pid = startProcess([&]() { + initChild(); + }); /* parent */ pid.setSeparatePG(true); @@ -1965,14 +1920,41 @@ void DerivationGoal::initChild() try { /* child */ - _writeToStderr = 0; - - restoreAffinity(); - commonChildInit(builderOut); #if CHROOT_ENABLED if (useChroot) { + + /* Set up private namespaces for the build: + + - The PID namespace causes the build to start as PID 1. + Processes outside of the chroot are not visible to + those on the inside, but processes inside the chroot + are visible from the outside (though with different + PIDs). + + - The private mount namespace ensures that all the bind + mounts we do will only show up in this process and + its children, and will disappear automatically when + we're done. + + - The private network namespace ensures that the + builder cannot talk to the outside world (or vice + versa). It only has a private loopback interface. + + - The IPC namespace prevents the builder from + communicating with outside processes using SysV IPC + mechanisms (shared memory, message queues, + semaphores). It also ensures that all IPC objects + are destroyed when the builder exits. + + - The UTS namespace ensures that builders see a + hostname of localhost rather than the actual + hostname. + */ + if (unshare(CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS) == -1) + throw SysError("setting up private namespaces"); + /* Initialise the loopback interface. */ AutoCloseFD fd(socket(PF_INET, SOCK_DGRAM, IPPROTO_IP)); if (fd == -1) throw SysError("cannot open IP socket"); @@ -2169,8 +2151,6 @@ void DerivationGoal::initChild() writeToStderr("while setting up the build environment: " + string(e.what()) + "\n"); _exit(1); } - - abort(); /* never reached */ }