forked from lix-project/lix
3f4ed681c2
zombie at a time, so if multiple children died before the handler got to run, some of them would not be cleaned up.
699 lines
19 KiB
C++
699 lines
19 KiB
C++
#include "shared.hh"
|
|
#include "local-store.hh"
|
|
#include "util.hh"
|
|
#include "serialise.hh"
|
|
#include "worker-protocol.hh"
|
|
#include "archive.hh"
|
|
#include "globals.hh"
|
|
|
|
#include <iostream>
|
|
#include <unistd.h>
|
|
#include <signal.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/un.h>
|
|
#include <fcntl.h>
|
|
#include <errno.h>
|
|
|
|
using namespace nix;
|
|
|
|
|
|
#ifndef SIGPOLL
|
|
#define SIGPOLL SIGIO
|
|
#endif
|
|
|
|
|
|
static FdSource from(STDIN_FILENO);
|
|
static FdSink to(STDOUT_FILENO);
|
|
|
|
bool canSendStderr;
|
|
pid_t myPid;
|
|
|
|
|
|
|
|
/* This function is called anytime we want to write something to
|
|
stderr. If we're in a state where the protocol allows it (i.e.,
|
|
when canSendStderr), send the message to the client over the
|
|
socket. */
|
|
static void tunnelStderr(const unsigned char * buf, size_t count)
|
|
{
|
|
/* Don't send the message to the client if we're a child of the
|
|
process handling the connection. Otherwise we could screw up
|
|
the protocol. It's up to the parent to redirect stderr and
|
|
send it to the client somehow (e.g., as in build.cc). */
|
|
if (canSendStderr && myPid == getpid()) {
|
|
try {
|
|
writeInt(STDERR_NEXT, to);
|
|
writeString(string((char *) buf, count), to);
|
|
} catch (...) {
|
|
/* Write failed; that means that the other side is
|
|
gone. */
|
|
canSendStderr = false;
|
|
throw;
|
|
}
|
|
} else
|
|
writeFull(STDERR_FILENO, buf, count);
|
|
}
|
|
|
|
|
|
/* Return true if the remote side has closed its end of the
|
|
connection, false otherwise. Should not be called on any socket on
|
|
which we expect input! */
|
|
static bool isFarSideClosed(int socket)
|
|
{
|
|
struct timeval timeout;
|
|
timeout.tv_sec = timeout.tv_usec = 0;
|
|
|
|
fd_set fds;
|
|
FD_ZERO(&fds);
|
|
FD_SET(socket, &fds);
|
|
|
|
while (select(socket + 1, &fds, 0, 0, &timeout) == -1)
|
|
if (errno != EINTR) throw SysError("select()");
|
|
|
|
if (!FD_ISSET(socket, &fds)) return false;
|
|
|
|
/* Destructive read to determine whether the select() marked the
|
|
socket as readable because there is actual input or because
|
|
we've reached EOF (i.e., a read of size 0 is available). */
|
|
char c;
|
|
int rd;
|
|
if ((rd = read(socket, &c, 1)) > 0)
|
|
throw Error("EOF expected (protocol error?)");
|
|
else if (rd == -1 && errno != ECONNRESET)
|
|
throw SysError("expected connection reset or EOF");
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/* A SIGPOLL signal is received when data is available on the client
|
|
communication scoket, or when the client has closed its side of the
|
|
socket. This handler is enabled at precisely those moments in the
|
|
protocol when we're doing work and the client is supposed to be
|
|
quiet. Thus, if we get a SIGPOLL signal, it means that the client
|
|
has quit. So we should quit as well.
|
|
|
|
Too bad most operating systems don't support the POLL_HUP value for
|
|
si_code in siginfo_t. That would make most of the SIGPOLL
|
|
complexity unnecessary, i.e., we could just enable SIGPOLL all the
|
|
time and wouldn't have to worry about races. */
|
|
static void sigPollHandler(int sigNo)
|
|
{
|
|
try {
|
|
/* Check that the far side actually closed. We're still
|
|
getting spurious signals every once in a while. I.e.,
|
|
there is no input available, but we get a signal with
|
|
POLL_IN set. Maybe it's delayed or something. */
|
|
if (isFarSideClosed(from.fd)) {
|
|
if (!blockInt) {
|
|
_isInterrupted = 1;
|
|
blockInt = 1;
|
|
canSendStderr = false;
|
|
string s = "SIGPOLL\n";
|
|
write(STDERR_FILENO, s.c_str(), s.size());
|
|
}
|
|
} else {
|
|
string s = "spurious SIGPOLL\n";
|
|
write(STDERR_FILENO, s.c_str(), s.size());
|
|
}
|
|
}
|
|
catch (Error & e) {
|
|
/* Shouldn't happen. */
|
|
string s = "impossible: " + e.msg() + '\n';
|
|
write(STDERR_FILENO, s.c_str(), s.size());
|
|
throw;
|
|
}
|
|
}
|
|
|
|
|
|
static void setSigPollAction(bool enable)
|
|
{
|
|
struct sigaction act, oact;
|
|
act.sa_handler = enable ? sigPollHandler : SIG_IGN;
|
|
sigfillset(&act.sa_mask);
|
|
act.sa_flags = 0;
|
|
if (sigaction(SIGPOLL, &act, &oact))
|
|
throw SysError("setting handler for SIGPOLL");
|
|
}
|
|
|
|
|
|
/* startWork() means that we're starting an operation for which we
|
|
want to send out stderr to the client. */
|
|
static void startWork()
|
|
{
|
|
canSendStderr = true;
|
|
|
|
/* Handle client death asynchronously. */
|
|
setSigPollAction(true);
|
|
|
|
/* Of course, there is a race condition here: the socket could
|
|
have closed between when we last read from / wrote to it, and
|
|
between the time we set the handler for SIGPOLL. In that case
|
|
we won't get the signal. So do a non-blocking select() to find
|
|
out if any input is available on the socket. If there is, it
|
|
has to be the 0-byte read that indicates that the socket has
|
|
closed. */
|
|
if (isFarSideClosed(from.fd)) {
|
|
_isInterrupted = 1;
|
|
checkInterrupt();
|
|
}
|
|
}
|
|
|
|
|
|
/* stopWork() means that we're done; stop sending stderr to the
|
|
client. */
|
|
static void stopWork(bool success = true, const string & msg = "")
|
|
{
|
|
/* Stop handling async client death; we're going to a state where
|
|
we're either sending or receiving from the client, so we'll be
|
|
notified of client death anyway. */
|
|
setSigPollAction(false);
|
|
|
|
canSendStderr = false;
|
|
|
|
if (success)
|
|
writeInt(STDERR_LAST, to);
|
|
else {
|
|
writeInt(STDERR_ERROR, to);
|
|
writeString(msg, to);
|
|
}
|
|
}
|
|
|
|
|
|
struct TunnelSink : Sink
|
|
{
|
|
Sink & to;
|
|
TunnelSink(Sink & to) : to(to)
|
|
{
|
|
}
|
|
virtual void operator ()
|
|
(const unsigned char * data, unsigned int len)
|
|
{
|
|
writeInt(STDERR_WRITE, to);
|
|
writeString(string((const char *) data, len), to);
|
|
}
|
|
};
|
|
|
|
|
|
struct TunnelSource : Source
|
|
{
|
|
Source & from;
|
|
TunnelSource(Source & from) : from(from)
|
|
{
|
|
}
|
|
virtual void operator ()
|
|
(unsigned char * data, unsigned int len)
|
|
{
|
|
/* Careful: we're going to receive data from the client now,
|
|
so we have to disable the SIGPOLL handler. */
|
|
setSigPollAction(false);
|
|
canSendStderr = false;
|
|
|
|
writeInt(STDERR_READ, to);
|
|
writeInt(len, to);
|
|
string s = readString(from);
|
|
if (s.size() != len) throw Error("not enough data");
|
|
memcpy(data, (const unsigned char *) s.c_str(), len);
|
|
|
|
startWork();
|
|
}
|
|
};
|
|
|
|
|
|
static void performOp(unsigned int clientVersion,
|
|
Source & from, Sink & to, unsigned int op)
|
|
{
|
|
switch (op) {
|
|
|
|
#if 0
|
|
case wopQuit: {
|
|
/* Close the database. */
|
|
store.reset((StoreAPI *) 0);
|
|
writeInt(1, to);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
case wopIsValidPath: {
|
|
Path path = readStorePath(from);
|
|
startWork();
|
|
bool result = store->isValidPath(path);
|
|
stopWork();
|
|
writeInt(result, to);
|
|
break;
|
|
}
|
|
|
|
case wopHasSubstitutes: {
|
|
Path path = readStorePath(from);
|
|
startWork();
|
|
bool result = store->hasSubstitutes(path);
|
|
stopWork();
|
|
writeInt(result, to);
|
|
break;
|
|
}
|
|
|
|
case wopQueryPathHash: {
|
|
Path path = readStorePath(from);
|
|
startWork();
|
|
Hash hash = store->queryPathHash(path);
|
|
stopWork();
|
|
writeString(printHash(hash), to);
|
|
break;
|
|
}
|
|
|
|
case wopQueryReferences:
|
|
case wopQueryReferrers: {
|
|
Path path = readStorePath(from);
|
|
startWork();
|
|
PathSet paths;
|
|
if (op == wopQueryReferences)
|
|
store->queryReferences(path, paths);
|
|
else
|
|
store->queryReferrers(path, paths);
|
|
stopWork();
|
|
writeStringSet(paths, to);
|
|
break;
|
|
}
|
|
|
|
case wopQueryDeriver: {
|
|
Path path = readStorePath(from);
|
|
startWork();
|
|
Path deriver = store->queryDeriver(path);
|
|
stopWork();
|
|
writeString(deriver, to);
|
|
break;
|
|
}
|
|
|
|
case wopAddToStore: {
|
|
/* !!! uberquick hack */
|
|
string baseName = readString(from);
|
|
bool fixed = readInt(from) == 1;
|
|
bool recursive = readInt(from) == 1;
|
|
string hashAlgo = readString(from);
|
|
|
|
Path tmp = createTempDir();
|
|
AutoDelete delTmp(tmp);
|
|
Path tmp2 = tmp + "/" + baseName;
|
|
restorePath(tmp2, from);
|
|
|
|
startWork();
|
|
Path path = store->addToStore(tmp2, fixed, recursive, hashAlgo);
|
|
stopWork();
|
|
|
|
writeString(path, to);
|
|
break;
|
|
}
|
|
|
|
case wopAddTextToStore: {
|
|
string suffix = readString(from);
|
|
string s = readString(from);
|
|
PathSet refs = readStorePaths(from);
|
|
startWork();
|
|
Path path = store->addTextToStore(suffix, s, refs);
|
|
stopWork();
|
|
writeString(path, to);
|
|
break;
|
|
}
|
|
|
|
case wopExportPath: {
|
|
Path path = readStorePath(from);
|
|
bool sign = readInt(from) == 1;
|
|
startWork();
|
|
TunnelSink sink(to);
|
|
store->exportPath(path, sign, sink);
|
|
stopWork();
|
|
writeInt(1, to);
|
|
break;
|
|
}
|
|
|
|
case wopImportPath: {
|
|
startWork();
|
|
TunnelSource source(from);
|
|
Path path = store->importPath(true, source);
|
|
stopWork();
|
|
writeString(path, to);
|
|
break;
|
|
}
|
|
|
|
case wopBuildDerivations: {
|
|
PathSet drvs = readStorePaths(from);
|
|
startWork();
|
|
store->buildDerivations(drvs);
|
|
stopWork();
|
|
writeInt(1, to);
|
|
break;
|
|
}
|
|
|
|
case wopEnsurePath: {
|
|
Path path = readStorePath(from);
|
|
startWork();
|
|
store->ensurePath(path);
|
|
stopWork();
|
|
writeInt(1, to);
|
|
break;
|
|
}
|
|
|
|
case wopAddTempRoot: {
|
|
Path path = readStorePath(from);
|
|
startWork();
|
|
store->addTempRoot(path);
|
|
stopWork();
|
|
writeInt(1, to);
|
|
break;
|
|
}
|
|
|
|
case wopAddIndirectRoot: {
|
|
Path path = absPath(readString(from));
|
|
startWork();
|
|
store->addIndirectRoot(path);
|
|
stopWork();
|
|
writeInt(1, to);
|
|
break;
|
|
}
|
|
|
|
case wopSyncWithGC: {
|
|
startWork();
|
|
store->syncWithGC();
|
|
stopWork();
|
|
writeInt(1, to);
|
|
break;
|
|
}
|
|
|
|
case wopFindRoots: {
|
|
startWork();
|
|
Roots roots = store->findRoots();
|
|
stopWork();
|
|
writeInt(roots.size(), to);
|
|
for (Roots::iterator i = roots.begin(); i != roots.end(); ++i) {
|
|
writeString(i->first, to);
|
|
writeString(i->second, to);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case wopCollectGarbage: {
|
|
GCOptions options;
|
|
options.action = (GCOptions::GCAction) readInt(from);
|
|
options.pathsToDelete = readStorePaths(from);
|
|
options.ignoreLiveness = readInt(from);
|
|
options.maxFreed = readLongLong(from);
|
|
options.maxLinks = readInt(from);
|
|
|
|
GCResults results;
|
|
|
|
startWork();
|
|
if (options.ignoreLiveness)
|
|
throw Error("you are not allowed to ignore liveness");
|
|
store->collectGarbage(options, results);
|
|
stopWork();
|
|
|
|
writeStringSet(results.paths, to);
|
|
writeLongLong(results.bytesFreed, to);
|
|
writeLongLong(results.blocksFreed, to);
|
|
|
|
break;
|
|
}
|
|
|
|
case wopSetOptions: {
|
|
keepFailed = readInt(from) != 0;
|
|
keepGoing = readInt(from) != 0;
|
|
tryFallback = readInt(from) != 0;
|
|
verbosity = (Verbosity) readInt(from);
|
|
maxBuildJobs = readInt(from);
|
|
maxSilentTime = readInt(from);
|
|
if (GET_PROTOCOL_MINOR(clientVersion) >= 2)
|
|
useBuildHook = readInt(from) != 0;
|
|
if (GET_PROTOCOL_MINOR(clientVersion) >= 4) {
|
|
buildVerbosity = (Verbosity) readInt(from);
|
|
logType = (LogType) readInt(from);
|
|
printBuildTrace = readInt(from) != 0;
|
|
}
|
|
startWork();
|
|
stopWork();
|
|
break;
|
|
}
|
|
|
|
case wopQuerySubstitutablePathInfo: {
|
|
Path path = absPath(readString(from));
|
|
startWork();
|
|
SubstitutablePathInfo info;
|
|
bool res = store->querySubstitutablePathInfo(path, info);
|
|
stopWork();
|
|
writeInt(res ? 1 : 0, to);
|
|
if (res) {
|
|
writeString(info.deriver, to);
|
|
writeStringSet(info.references, to);
|
|
writeLongLong(info.downloadSize, to);
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
throw Error(format("invalid operation %1%") % op);
|
|
}
|
|
}
|
|
|
|
|
|
static void processConnection()
|
|
{
|
|
RemoveTempRoots removeTempRoots __attribute__((unused));
|
|
|
|
canSendStderr = false;
|
|
myPid = getpid();
|
|
writeToStderr = tunnelStderr;
|
|
|
|
/* Allow us to receive SIGPOLL for events on the client socket. */
|
|
setSigPollAction(false);
|
|
if (fcntl(from.fd, F_SETOWN, getpid()) == -1)
|
|
throw SysError("F_SETOWN");
|
|
if (fcntl(from.fd, F_SETFL, fcntl(from.fd, F_GETFL, 0) | FASYNC) == -1)
|
|
throw SysError("F_SETFL");
|
|
|
|
/* Exchange the greeting. */
|
|
unsigned int magic = readInt(from);
|
|
if (magic != WORKER_MAGIC_1) throw Error("protocol mismatch");
|
|
writeInt(WORKER_MAGIC_2, to);
|
|
|
|
writeInt(PROTOCOL_VERSION, to);
|
|
unsigned int clientVersion = readInt(from);
|
|
|
|
/* Send startup error messages to the client. */
|
|
startWork();
|
|
|
|
try {
|
|
|
|
/* If we can't accept clientVersion, then throw an error
|
|
*here* (not above). */
|
|
|
|
#if 0
|
|
/* Prevent users from doing something very dangerous. */
|
|
if (geteuid() == 0 &&
|
|
querySetting("build-users-group", "") == "")
|
|
throw Error("if you run `nix-worker' as root, then you MUST set `build-users-group'!");
|
|
#endif
|
|
|
|
/* Open the store. */
|
|
store = boost::shared_ptr<StoreAPI>(new LocalStore());
|
|
|
|
stopWork();
|
|
|
|
} catch (Error & e) {
|
|
stopWork(false, e.msg());
|
|
return;
|
|
}
|
|
|
|
/* Process client requests. */
|
|
unsigned int opCount = 0;
|
|
|
|
while (true) {
|
|
WorkerOp op;
|
|
try {
|
|
op = (WorkerOp) readInt(from);
|
|
} catch (EndOfFile & e) {
|
|
break;
|
|
}
|
|
|
|
opCount++;
|
|
|
|
try {
|
|
performOp(clientVersion, from, to, op);
|
|
} catch (Error & e) {
|
|
stopWork(false, e.msg());
|
|
}
|
|
|
|
assert(!canSendStderr);
|
|
};
|
|
|
|
printMsg(lvlError, format("%1% worker operations") % opCount);
|
|
}
|
|
|
|
|
|
static void sigChldHandler(int sigNo)
|
|
{
|
|
/* Reap all dead children. */
|
|
while (waitpid(-1, 0, WNOHANG) > 0) ;
|
|
}
|
|
|
|
|
|
static void setSigChldAction(bool autoReap)
|
|
{
|
|
struct sigaction act, oact;
|
|
act.sa_handler = autoReap ? sigChldHandler : SIG_DFL;
|
|
sigfillset(&act.sa_mask);
|
|
act.sa_flags = 0;
|
|
if (sigaction(SIGCHLD, &act, &oact))
|
|
throw SysError("setting SIGCHLD handler");
|
|
}
|
|
|
|
|
|
static void daemonLoop()
|
|
{
|
|
/* Get rid of children automatically; don't let them become
|
|
zombies. */
|
|
setSigChldAction(true);
|
|
|
|
/* Create and bind to a Unix domain socket. */
|
|
AutoCloseFD fdSocket = socket(PF_UNIX, SOCK_STREAM, 0);
|
|
if (fdSocket == -1)
|
|
throw SysError("cannot create Unix domain socket");
|
|
|
|
string socketPath = nixStateDir + DEFAULT_SOCKET_PATH;
|
|
|
|
createDirs(dirOf(socketPath));
|
|
|
|
/* Urgh, sockaddr_un allows path names of only 108 characters. So
|
|
chdir to the socket directory so that we can pass a relative
|
|
path name. */
|
|
chdir(dirOf(socketPath).c_str());
|
|
Path socketPathRel = "./" + baseNameOf(socketPath);
|
|
|
|
struct sockaddr_un addr;
|
|
addr.sun_family = AF_UNIX;
|
|
if (socketPathRel.size() >= sizeof(addr.sun_path))
|
|
throw Error(format("socket path `%1%' is too long") % socketPathRel);
|
|
strcpy(addr.sun_path, socketPathRel.c_str());
|
|
|
|
unlink(socketPath.c_str());
|
|
|
|
/* Make sure that the socket is created with 0666 permission
|
|
(everybody can connect --- provided they have access to the
|
|
directory containing the socket). */
|
|
mode_t oldMode = umask(0111);
|
|
int res = bind(fdSocket, (struct sockaddr *) &addr, sizeof(addr));
|
|
umask(oldMode);
|
|
if (res == -1)
|
|
throw SysError(format("cannot bind to socket `%1%'") % socketPath);
|
|
|
|
chdir("/"); /* back to the root */
|
|
|
|
if (listen(fdSocket, 5) == -1)
|
|
throw SysError(format("cannot listen on socket `%1%'") % socketPath);
|
|
|
|
/* Loop accepting connections. */
|
|
while (1) {
|
|
|
|
try {
|
|
/* Important: the server process *cannot* open the
|
|
Berkeley DB environment, because it doesn't like forks
|
|
very much. */
|
|
assert(!store);
|
|
|
|
/* Accept a connection. */
|
|
struct sockaddr_un remoteAddr;
|
|
socklen_t remoteAddrLen = sizeof(remoteAddr);
|
|
|
|
AutoCloseFD remote = accept(fdSocket,
|
|
(struct sockaddr *) &remoteAddr, &remoteAddrLen);
|
|
checkInterrupt();
|
|
if (remote == -1)
|
|
if (errno == EINTR)
|
|
continue;
|
|
else
|
|
throw SysError("accepting connection");
|
|
|
|
printMsg(lvlInfo, format("accepted connection %1%") % remote);
|
|
|
|
/* Fork a child to handle the connection. */
|
|
pid_t child;
|
|
child = fork();
|
|
|
|
switch (child) {
|
|
|
|
case -1:
|
|
throw SysError("unable to fork");
|
|
|
|
case 0:
|
|
try { /* child */
|
|
|
|
/* Background the worker. */
|
|
if (setsid() == -1)
|
|
throw SysError(format("creating a new session"));
|
|
|
|
/* Restore normal handling of SIGCHLD. */
|
|
setSigChldAction(false);
|
|
|
|
/* Handle the connection. */
|
|
from.fd = remote;
|
|
to.fd = remote;
|
|
processConnection();
|
|
|
|
} catch (std::exception & e) {
|
|
std::cerr << format("child error: %1%\n") % e.what();
|
|
}
|
|
exit(0);
|
|
}
|
|
|
|
} catch (Interrupted & e) {
|
|
throw;
|
|
} catch (Error & e) {
|
|
printMsg(lvlError, format("error processing connection: %1%") % e.msg());
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void run(Strings args)
|
|
{
|
|
bool slave = false;
|
|
bool daemon = false;
|
|
|
|
for (Strings::iterator i = args.begin(); i != args.end(); ) {
|
|
string arg = *i++;
|
|
if (arg == "--slave") slave = true;
|
|
if (arg == "--daemon") daemon = true;
|
|
}
|
|
|
|
if (slave) {
|
|
/* This prevents us from receiving signals from the terminal
|
|
when we're running in setuid mode. */
|
|
if (setsid() == -1)
|
|
throw SysError(format("creating a new session"));
|
|
|
|
processConnection();
|
|
}
|
|
|
|
else if (daemon) {
|
|
if (setuidMode)
|
|
throw Error("daemon cannot be started in setuid mode");
|
|
chdir("/");
|
|
daemonLoop();
|
|
}
|
|
|
|
else
|
|
throw Error("must be run in either --slave or --daemon mode");
|
|
}
|
|
|
|
|
|
#include "help.txt.hh"
|
|
|
|
void printHelp()
|
|
{
|
|
std::cout << string((char *) helpText, sizeof helpText);
|
|
}
|
|
|
|
|
|
string programId = "nix-worker";
|