nix-daemon: Use a thread instead of SIGPOLL to catch client disconnects
The thread calls poll() to wait until a HUP (or other error event) happens on the client connection. If so, it sends SIGINT to the main thread, which is then cleaned up normally. This is much nicer than messing around with SIGPOLL.
This commit is contained in:
parent
fdee1ced43
commit
49fe9592a4
1
local.mk
1
local.mk
|
@ -5,6 +5,7 @@ endif
|
|||
dist-files += configure config.h.in nix.spec
|
||||
|
||||
GLOBAL_CXXFLAGS += -I . -I src -I src/libutil -I src/libstore -I src/libmain -I src/libexpr
|
||||
GLOBAL_LDFLAGS += -pthread
|
||||
|
||||
$(foreach i, config.h $(call rwildcard, src/lib*, *.hh), $(eval $(call install-file-in, $(i), $(includedir)/nix, 0644)))
|
||||
|
||||
|
|
43
src/libutil/monitor-fd.hh
Normal file
43
src/libutil/monitor-fd.hh
Normal file
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include <thread>
|
||||
|
||||
#include <poll.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
|
||||
namespace nix {
|
||||
|
||||
|
||||
class MonitorFdHup
|
||||
{
|
||||
private:
|
||||
std::thread thread;
|
||||
|
||||
public:
|
||||
MonitorFdHup(int fd)
|
||||
{
|
||||
thread = std::thread([&]() {
|
||||
/* Wait indefinitely until a POLLHUP occurs. */
|
||||
struct pollfd fds[1];
|
||||
fds[0].fd = fd;
|
||||
fds[0].events = 0;
|
||||
if (poll(fds, 1, -1) == -1) {
|
||||
if (errno != EINTR) abort(); // can't happen
|
||||
return; // destructor is asking us to exit
|
||||
}
|
||||
/* We got POLLHUP, so send an INT signal to the main thread. */
|
||||
kill(getpid(), SIGINT);
|
||||
});
|
||||
};
|
||||
|
||||
~MonitorFdHup()
|
||||
{
|
||||
pthread_kill(thread.native_handle(), SIGINT);
|
||||
thread.join();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
|
@ -6,6 +6,7 @@
|
|||
#include "archive.hh"
|
||||
#include "affinity.hh"
|
||||
#include "globals.hh"
|
||||
#include "monitor-fd.hh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -17,7 +18,6 @@
|
|||
#include <sys/stat.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <pwd.h>
|
||||
#include <grp.h>
|
||||
|
@ -25,25 +25,6 @@
|
|||
using namespace nix;
|
||||
|
||||
|
||||
/* On platforms that have O_ASYNC, we can detect when a client
|
||||
disconnects and immediately kill any ongoing builds. On platforms
|
||||
that lack it, we only notice the disconnection the next time we try
|
||||
to write to the client. So if you have a builder that never
|
||||
generates output on stdout/stderr, the daemon will never notice
|
||||
that the client has disconnected until the builder terminates.
|
||||
|
||||
GNU/Hurd does have O_ASYNC, but its Unix-domain socket translator
|
||||
(pflocal) does not implement F_SETOWN. See
|
||||
<http://lists.gnu.org/archive/html/bug-guix/2013-07/msg00021.html> for
|
||||
details.*/
|
||||
#if defined O_ASYNC && !defined __GNU__
|
||||
#define HAVE_HUP_NOTIFICATION
|
||||
#ifndef SIGPOLL
|
||||
#define SIGPOLL SIGIO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
static FdSource from(STDIN_FILENO);
|
||||
static FdSink to(STDOUT_FILENO);
|
||||
|
||||
|
@ -51,7 +32,6 @@ bool canSendStderr;
|
|||
pid_t myPid;
|
||||
|
||||
|
||||
|
||||
/* This function is called anytime we want to write something to
|
||||
stderr. If we're in a state where the protocol allows it (i.e.,
|
||||
when canSendStderr), send the message to the client over the
|
||||
|
@ -78,111 +58,11 @@ static void tunnelStderr(const unsigned char * buf, size_t count)
|
|||
}
|
||||
|
||||
|
||||
/* Return true if the remote side has closed its end of the
|
||||
connection, false otherwise. Should not be called on any socket on
|
||||
which we expect input! */
|
||||
static bool isFarSideClosed(int socket)
|
||||
{
|
||||
struct timeval timeout;
|
||||
timeout.tv_sec = timeout.tv_usec = 0;
|
||||
|
||||
fd_set fds;
|
||||
FD_ZERO(&fds);
|
||||
FD_SET(socket, &fds);
|
||||
|
||||
while (select(socket + 1, &fds, 0, 0, &timeout) == -1)
|
||||
if (errno != EINTR) throw SysError("select()");
|
||||
|
||||
if (!FD_ISSET(socket, &fds)) return false;
|
||||
|
||||
/* Destructive read to determine whether the select() marked the
|
||||
socket as readable because there is actual input or because
|
||||
we've reached EOF (i.e., a read of size 0 is available). */
|
||||
char c;
|
||||
int rd;
|
||||
if ((rd = read(socket, &c, 1)) > 0)
|
||||
throw Error("EOF expected (protocol error?)");
|
||||
else if (rd == -1 && errno != ECONNRESET)
|
||||
throw SysError("expected connection reset or EOF");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* A SIGPOLL signal is received when data is available on the client
|
||||
communication socket, or when the client has closed its side of the
|
||||
socket. This handler is enabled at precisely those moments in the
|
||||
protocol when we're doing work and the client is supposed to be
|
||||
quiet. Thus, if we get a SIGPOLL signal, it means that the client
|
||||
has quit. So we should quit as well.
|
||||
|
||||
Too bad most operating systems don't support the POLL_HUP value for
|
||||
si_code in siginfo_t. That would make most of the SIGPOLL
|
||||
complexity unnecessary, i.e., we could just enable SIGPOLL all the
|
||||
time and wouldn't have to worry about races. */
|
||||
static void sigPollHandler(int sigNo)
|
||||
{
|
||||
using namespace std;
|
||||
try {
|
||||
/* Check that the far side actually closed. We're still
|
||||
getting spurious signals every once in a while. I.e.,
|
||||
there is no input available, but we get a signal with
|
||||
POLL_IN set. Maybe it's delayed or something. */
|
||||
if (isFarSideClosed(from.fd)) {
|
||||
if (!blockInt) {
|
||||
_isInterrupted = 1;
|
||||
blockInt = 1;
|
||||
canSendStderr = false;
|
||||
const char * s = "SIGPOLL\n";
|
||||
write(STDERR_FILENO, s, strlen(s));
|
||||
}
|
||||
} else {
|
||||
const char * s = "spurious SIGPOLL\n";
|
||||
write(STDERR_FILENO, s, strlen(s));
|
||||
}
|
||||
}
|
||||
catch (Error & e) {
|
||||
/* Shouldn't happen. */
|
||||
string s = "impossible: " + e.msg() + '\n';
|
||||
write(STDERR_FILENO, s.data(), s.size());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void setSigPollAction(bool enable)
|
||||
{
|
||||
#ifdef HAVE_HUP_NOTIFICATION
|
||||
struct sigaction act, oact;
|
||||
act.sa_handler = enable ? sigPollHandler : SIG_IGN;
|
||||
sigfillset(&act.sa_mask);
|
||||
act.sa_flags = 0;
|
||||
if (sigaction(SIGPOLL, &act, &oact))
|
||||
throw SysError("setting handler for SIGPOLL");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* startWork() means that we're starting an operation for which we
|
||||
want to send out stderr to the client. */
|
||||
static void startWork()
|
||||
{
|
||||
canSendStderr = true;
|
||||
|
||||
/* Handle client death asynchronously. */
|
||||
setSigPollAction(true);
|
||||
|
||||
/* Of course, there is a race condition here: the socket could
|
||||
have closed between when we last read from / wrote to it, and
|
||||
between the time we set the handler for SIGPOLL. In that case
|
||||
we won't get the signal. So do a non-blocking select() to find
|
||||
out if any input is available on the socket. If there is, it
|
||||
has to be the 0-byte read that indicates that the socket has
|
||||
closed. */
|
||||
if (isFarSideClosed(from.fd)) {
|
||||
_isInterrupted = 1;
|
||||
checkInterrupt();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -190,11 +70,6 @@ static void startWork()
|
|||
client. */
|
||||
static void stopWork(bool success = true, const string & msg = "", unsigned int status = 0)
|
||||
{
|
||||
/* Stop handling async client death; we're going to a state where
|
||||
we're either sending or receiving from the client, so we'll be
|
||||
notified of client death anyway. */
|
||||
setSigPollAction(false);
|
||||
|
||||
canSendStderr = false;
|
||||
|
||||
if (success)
|
||||
|
@ -225,17 +100,10 @@ struct TunnelSource : BufferedSource
|
|||
TunnelSource(Source & from) : from(from) { }
|
||||
size_t readUnbuffered(unsigned char * data, size_t len)
|
||||
{
|
||||
/* Careful: we're going to receive data from the client now,
|
||||
so we have to disable the SIGPOLL handler. */
|
||||
setSigPollAction(false);
|
||||
canSendStderr = false;
|
||||
|
||||
writeInt(STDERR_READ, to);
|
||||
writeInt(len, to);
|
||||
to.flush();
|
||||
size_t n = readString(data, len, from);
|
||||
|
||||
startWork();
|
||||
if (n == 0) throw EndOfFile("unexpected end-of-file");
|
||||
return n;
|
||||
}
|
||||
|
@ -662,19 +530,12 @@ static void performOp(bool trusted, unsigned int clientVersion,
|
|||
|
||||
static void processConnection(bool trusted)
|
||||
{
|
||||
MonitorFdHup monitor(from.fd);
|
||||
|
||||
canSendStderr = false;
|
||||
myPid = getpid();
|
||||
_writeToStderr = tunnelStderr;
|
||||
|
||||
#ifdef HAVE_HUP_NOTIFICATION
|
||||
/* Allow us to receive SIGPOLL for events on the client socket. */
|
||||
setSigPollAction(false);
|
||||
if (fcntl(from.fd, F_SETOWN, getpid()) == -1)
|
||||
throw SysError("F_SETOWN");
|
||||
if (fcntl(from.fd, F_SETFL, fcntl(from.fd, F_GETFL, 0) | O_ASYNC) == -1)
|
||||
throw SysError("F_SETFL");
|
||||
#endif
|
||||
|
||||
/* Exchange the greeting. */
|
||||
unsigned int magic = readInt(from);
|
||||
if (magic != WORKER_MAGIC_1) throw Error("protocol mismatch");
|
||||
|
@ -724,6 +585,8 @@ static void processConnection(bool trusted)
|
|||
WorkerOp op;
|
||||
try {
|
||||
op = (WorkerOp) readInt(from);
|
||||
} catch (Interrupted & e) {
|
||||
break;
|
||||
} catch (EndOfFile & e) {
|
||||
break;
|
||||
}
|
||||
|
@ -871,10 +734,8 @@ static void daemonLoop()
|
|||
(struct sockaddr *) &remoteAddr, &remoteAddrLen);
|
||||
checkInterrupt();
|
||||
if (remote == -1) {
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
else
|
||||
throw SysError("accepting connection");
|
||||
if (errno == EINTR) continue;
|
||||
throw SysError("accepting connection");
|
||||
}
|
||||
|
||||
closeOnExec(remote);
|
||||
|
|
Loading…
Reference in a new issue