Merge "gc: Find roots using libproc on Darwin" into main

This commit is contained in:
Artemis Tosini 2024-04-26 17:26:45 +00:00 committed by Gerrit Code Review
commit 789aa39576
12 changed files with 341 additions and 34 deletions

View file

@ -7,6 +7,8 @@ libstore_DIR := $(d)
libstore_SOURCES := $(wildcard $(d)/*.cc $(d)/builtins/*.cc $(d)/build/*.cc)
ifdef HOST_LINUX
libstore_SOURCES += $(d)/platform/linux.cc
else ifdef HOST_DARWIN
libstore_SOURCES += $(d)/platform/darwin.cc
else
libstore_SOURCES += $(d)/platform/fallback.cc
endif

View file

@ -162,6 +162,9 @@ libstore_headers = files(
if host_machine.system() == 'linux'
libstore_sources += files('platform/linux.cc')
libstore_headers += files('platform/linux.hh')
elif host_machine.system() == 'darwin'
libstore_sources += files('platform/darwin.cc')
libstore_headers += files('platform/darwin.hh')
else
libstore_sources += files('platform/fallback.cc')
libstore_headers += files('platform/fallback.hh')

View file

@ -2,6 +2,8 @@
#if __linux__
#include "platform/linux.hh"
#elif __APPLE__
#include "platform/darwin.hh"
#else
#include "platform/fallback.hh"
#endif
@ -11,6 +13,8 @@ std::shared_ptr<LocalStore> LocalStore::makeLocalStore(const Params & params)
{
#if __linux__
return std::shared_ptr<LocalStore>(new LinuxLocalStore(params));
#elif __APPLE__
return std::shared_ptr<LocalStore>(new DarwinLocalStore(params));
#else
return std::shared_ptr<LocalStore>(new FallbackLocalStore(params));
#endif

View file

@ -0,0 +1,223 @@
#include "gc-store.hh"
#include "signals.hh"
#include "platform/darwin.hh"
#include "regex.hh"
#include <sys/proc_info.h>
#include <sys/sysctl.h>
#include <libproc.h>
#include <regex>
namespace nix {
void DarwinLocalStore::findPlatformRoots(UncheckedRoots & unchecked)
{
auto storePathRegex = regex::storePathRegex(storeDir);
std::vector<int> pids;
int pidBufSize = 1;
while (pidBufSize > pids.size() * sizeof(int)) {
// Reserve some extra size so we don't fail too much
pids.resize((pidBufSize + pidBufSize / 8) / sizeof(int));
pidBufSize = proc_listpids(PROC_ALL_PIDS, 0, pids.data(), pids.size() * sizeof(int));
if (pidBufSize <= 0) {
throw SysError("Listing PIDs");
}
}
pids.resize(pidBufSize / sizeof(int));
for (auto pid : pids) {
// It doesn't make sense to ask about the kernel
if (pid == 0) {
continue;
}
try {
// Process cwd/root directory
struct proc_vnodepathinfo vnodeInfo;
if (proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vnodeInfo, sizeof(vnodeInfo)) <= 0) {
throw SysError("Getting pid %1% working directory", pid);
}
unchecked[std::string(vnodeInfo.pvi_cdir.vip_path)].emplace(fmt("{libproc/%d/cwd}", pid)
);
unchecked[std::string(vnodeInfo.pvi_rdir.vip_path)].emplace(
fmt("{libproc/%d/rootdir}", pid)
);
// File descriptors
std::vector<struct proc_fdinfo> fds;
int fdBufSize = 1;
while (fdBufSize > fds.size() * sizeof(struct proc_fdinfo)) {
// Reserve some extra size so we don't fail too much
fds.resize((fdBufSize + fdBufSize / 8) / sizeof(struct proc_fdinfo));
fdBufSize = proc_pidinfo(
pid, PROC_PIDLISTFDS, 0, fds.data(), fds.size() * sizeof(struct proc_fdinfo)
);
if (fdBufSize <= 0) {
throw SysError("Listing pid %1% file descriptors", pid);
}
}
fds.resize(fdBufSize / sizeof(struct proc_fdinfo));
for (auto fd : fds) {
// By definition, only a vnode is on the filesystem
if (fd.proc_fdtype != PROX_FDTYPE_VNODE) {
continue;
}
struct vnode_fdinfowithpath fdInfo;
if (proc_pidfdinfo(
pid, fd.proc_fd, PROC_PIDFDVNODEPATHINFO, &fdInfo, sizeof(fdInfo)
)
<= 0)
{
// They probably just closed this fd, no need to cancel looking at ranges and
// arguments
if (errno == EBADF) {
continue;
}
throw SysError("Getting pid %1% fd %2% path", pid, fd.proc_fd);
}
unchecked[std::string(fdInfo.pvip.vip_path)].emplace(
fmt("{libproc/%d/fd/%d}", pid, fd.proc_fd)
);
}
// Regions (e.g. mmapped files, executables, shared libraries)
uint64_t nextAddr = 0;
while (true) {
// Seriously, what are you doing XNU?
// There's 3 flavors of PROC_PIDREGIONPATHINFO:
// * PROC_PIDREGIONPATHINFO includes all regions
// * PROC_PIDREGIONPATHINFO2 includes regions backed by a vnode
// * PROC_PIDREGIONPATHINFO3 includes regions backed by a vnode on a specified
// filesystem Only PROC_PIDREGIONPATHINFO is documented. Unfortunately, using it
// would make finding gcroots take about 100x as long and tests would fail from
// timeout. According to the Frida source code, PROC_PIDREGIONPATHINFO2 has been
// available since XNU 2782.1.97 in OS X 10.10
//
// 22 means PROC_PIDREGIONPATHINFO2
struct proc_regionwithpathinfo regionInfo;
if (proc_pidinfo(pid, 22, nextAddr, &regionInfo, sizeof(regionInfo)) <= 0) {
// PROC_PIDREGIONPATHINFO signals we're done with an error,
// so we're expected to hit this once per process
if (errno == ESRCH || errno == EINVAL) {
break;
}
throw SysError("Getting pid %1% region path", pid);
}
unchecked[std::string(regionInfo.prp_vip.vip_path)].emplace(
fmt("{libproc/%d/region}", pid)
);
nextAddr = regionInfo.prp_prinfo.pri_address + regionInfo.prp_prinfo.pri_size;
}
// Arguments and environment variables
// We can't read environment variables of binaries with entitlements unless
// nix has the `com.apple.private.read-environment-variables` entitlement or SIP is off
// We can read arguments for all applications though.
// Yes, it's a sysctl, the proc_info and sysctl APIs are mostly similar,
// but both have exclusive capabilities
int sysctlName[3] = {CTL_KERN, KERN_PROCARGS2, pid};
size_t argsSize = 0;
if (sysctl(sysctlName, 3, nullptr, &argsSize, nullptr, 0) < 0) {
throw SysError("Reading pid %1% arguments", pid);
}
std::vector<char> args(argsSize);
if (sysctl(sysctlName, 3, args.data(), &argsSize, nullptr, 0) < 0) {
throw SysError("Reading pid %1% arguments", pid);
}
if (argsSize < args.size()) {
args.resize(argsSize);
}
// We have these perfectly nice arguments, but have to ignore them because
// otherwise we'd see arguments to nix-store commands and
// `nix-store --delete /nix/store/whatever` would always fail
// First 4 bytes are an int of argc.
if (args.size() < sizeof(int)) {
continue;
}
auto argc = reinterpret_cast<int *>(args.data())[0];
auto argsIter = args.begin();
std::advance(argsIter, sizeof(int));
// Executable then argc args, each separated by some number of null bytes
for (int i = 0; argsIter != args.end() && i < argc + 1; i++) {
argsIter = std::find(argsIter, args.end(), '\0');
argsIter = std::find_if(argsIter, args.end(), [](char ch) { return ch != '\0'; });
}
if (argsIter != args.end()) {
auto env_end = std::sregex_iterator{};
for (auto i = std::sregex_iterator{argsIter, args.end(), storePathRegex};
i != env_end;
++i)
{
unchecked[i->str()].emplace(fmt("{libproc/%d/environ}", pid));
}
};
// Per-thread working directories
struct proc_taskallinfo taskAllInfo;
if (proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &taskAllInfo, sizeof(taskAllInfo)) <= 0) {
throw SysError("Reading pid %1% tasks", pid);
}
// If the process doesn't have the per-thread cwd flag then we already have the
// process-wide cwd from PROC_PIDVNODEPATHINFO
if (taskAllInfo.pbsd.pbi_flags & PROC_FLAG_THCWD) {
std::vector<uint64_t> tids(taskAllInfo.ptinfo.pti_threadnum);
int tidBufSize = proc_pidinfo(
pid, PROC_PIDLISTTHREADS, 0, tids.data(), tids.size() * sizeof(uint64_t)
);
if (tidBufSize <= 0) {
throw SysError("Listing pid %1% threads", pid);
}
for (auto tid : tids) {
struct proc_threadwithpathinfo threadPathInfo;
if (proc_pidinfo(
pid,
PROC_PIDTHREADPATHINFO,
tid,
&threadPathInfo,
sizeof(threadPathInfo)
)
<= 0)
{
throw SysError("Reading pid %1% thread %2% cwd", pid, tid);
}
unchecked[std::string(threadPathInfo.pvip.vip_path)].emplace(
fmt("{libproc/%d/thread/%d/cwd}", pid, tid)
);
}
}
} catch (SysError & e) {
// ENOENT/ESRCH: Process no longer exists (proc_info)
// EINVAL: Process no longer exists (sysctl)
// EACCESS/EPERM: We don't have permission to read this field (proc_info)
// EIO: Kernel failed to read from target process memory during KERN_PROCARGS2 (sysctl)
if (errno == ENOENT || errno == ESRCH || errno == EINVAL || errno == EACCES
|| errno == EPERM || errno == EIO)
{
continue;
}
throw;
}
}
}
}

View file

@ -0,0 +1,35 @@
#pragma once
///@file
#include "gc-store.hh"
#include "local-store.hh"
namespace nix {
/**
* Darwin-specific implementation of LocalStore
*/
class DarwinLocalStore : public LocalStore
{
public:
DarwinLocalStore(const Params & params)
: StoreConfig(params)
, LocalFSStoreConfig(params)
, LocalStoreConfig(params)
, Store(params)
, LocalFSStore(params)
, LocalStore(params)
{
}
DarwinLocalStore(const std::string scheme, std::string path, const Params & params)
: DarwinLocalStore(params)
{
throw UnimplementedError("DarwinLocalStore");
}
private:
void findPlatformRoots(UncheckedRoots & unchecked) override;
};
}

View file

@ -1,6 +1,7 @@
#include "gc-store.hh"
#include "signals.hh"
#include "platform/linux.hh"
#include "regex.hh"
#include <regex>
@ -26,12 +27,6 @@ static void readProcLink(const std::string & file, UncheckedRoots & roots)
}
}
static std::string quoteRegexChars(const std::string & raw)
{
static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])");
return std::regex_replace(raw, specialRegex, R"(\$&)");
}
static void readFileRoots(const char * path, UncheckedRoots & roots)
{
try {
@ -50,8 +45,7 @@ void LinuxLocalStore::findPlatformRoots(UncheckedRoots & unchecked)
struct dirent * ent;
auto digitsRegex = std::regex(R"(^\d+$)");
auto mapRegex = std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)");
auto storePathRegex =
std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)");
auto storePathRegex = regex::storePathRegex(storeDir);
while (errno = 0, ent = readdir(procDir.get())) {
checkInterrupt();
if (std::regex_match(ent->d_name, digitsRegex)) {

View file

@ -22,6 +22,7 @@ libutil_sources = files(
'position.cc',
'print-elided.cc',
'references.cc',
'regex.cc',
'serialise.cc',
'shlex.cc',
'signals.cc',
@ -77,6 +78,7 @@ libutil_headers = files(
'ref.hh',
'references.hh',
'regex-combinators.hh',
'regex.hh',
'repair-flag.hh',
'serialise.hh',
'shlex.hh',

16
src/libutil/regex.cc Normal file
View file

@ -0,0 +1,16 @@
#include <string>
#include <regex>
namespace nix::regex {
std::string quoteRegexChars(const std::string & raw)
{
static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])");
return std::regex_replace(raw, specialRegex, R"(\$&)");
}
std::regex storePathRegex(const std::string & storeDir)
{
return std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)");
}
}

11
src/libutil/regex.hh Normal file
View file

@ -0,0 +1,11 @@
#pragma once
///@file
#include <string>
#include <regex>
namespace nix::regex {
std::string quoteRegexChars(const std::string & raw);
std::regex storePathRegex(const std::string & storeDir);
}

View file

@ -24,7 +24,6 @@ if [[ -n $NIX_STORE ]]; then
export _NIX_TEST_NO_SANDBOX=1
fi
export _NIX_IN_TEST=$TEST_ROOT/shared
export _NIX_TEST_NO_LSOF=1
export NIX_REMOTE=${NIX_REMOTE_-}
unset NIX_PATH
export TEST_HOME=$TEST_ROOT/test-home

View file

@ -1,17 +1,29 @@
with import ./config.nix;
mkDerivation {
name = "gc-runtime";
builder =
# Test inline source file definitions.
builtins.toFile "builder.sh" ''
mkdir $out
{
environ = mkDerivation {
name = "gc-runtime-environ";
buildCommand = "mkdir $out; echo environ > $out/environ";
};
cat > $out/program <<EOF
#! ${shell}
sleep 10000
EOF
open = mkDerivation {
name = "gc-runtime-open";
buildCommand = "mkdir $out; echo open > $out/open";
};
chmod +x $out/program
'';
program = mkDerivation {
name = "gc-runtime-program";
builder =
# Test inline source file definitions.
builtins.toFile "builder.sh" ''
mkdir $out
cat > $out/program <<EOF
#! ${shell}
sleep 10000 < \$1
EOF
chmod +x $out/program
'';
};
}

View file

@ -1,38 +1,44 @@
source common.sh
case $system in
*linux*)
;;
*)
skipTest "Not running Linux";
esac
set -m # enable job control, needed for kill
profiles="$NIX_STATE_DIR"/profiles
rm -rf $profiles
nix-env -p $profiles/test -f ./gc-runtime.nix -i gc-runtime
nix-env -p $profiles/test -f ./gc-runtime.nix -i gc-runtime-{program,environ,open}
outPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime)
echo $outPath
programPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime-program)
environPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime-environ)
openPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime-open)
echo $programPath $environPath $openPath
echo "backgrounding program..."
$profiles/test/program &
export environPath
$profiles/test/program $openPath/open &
sleep 2 # hack - wait for the program to get started
child=$!
echo PID=$child
nix-env -p $profiles/test -e gc-runtime
nix-env -p $profiles/test -e gc-runtime-{program,environ,open}
nix-env -p $profiles/test --delete-generations old
nix-store --gc
kill -- -$child
if ! test -e $outPath; then
if ! test -e $programPath; then
echo "running program was garbage collected!"
exit 1
fi
if ! test -e $environPath; then
echo "file in environment variable was garbage collected!"
exit 1
fi
if ! test -e $openPath; then
echo "opened file was garbage collected!"
exit 1
fi
exit 0