gc: clean up error handling on Darwin

This commit is contained in:
Artemis Tosini 2024-03-30 22:21:37 -04:00
parent 1cce5759c7
commit 474ea179fb

View file

@ -330,6 +330,13 @@ Roots LocalStore::findRoots(bool censor)
typedef std::unordered_map<Path, std::unordered_set<std::string>> UncheckedRoots; typedef std::unordered_map<Path, std::unordered_set<std::string>> UncheckedRoots;
static std::string quoteRegexChars(const std::string & raw)
{
static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])");
return std::regex_replace(raw, specialRegex, R"(\$&)");
}
#if __linux__
static void readProcLink(const std::string & file, UncheckedRoots & roots) static void readProcLink(const std::string & file, UncheckedRoots & roots)
{ {
constexpr auto bufsiz = PATH_MAX; constexpr auto bufsiz = PATH_MAX;
@ -348,13 +355,6 @@ static void readProcLink(const std::string & file, UncheckedRoots & roots)
.emplace(file); .emplace(file);
} }
static std::string quoteRegexChars(const std::string & raw)
{
static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])");
return std::regex_replace(raw, specialRegex, R"(\$&)");
}
#if __linux__
static void readFileRoots(const char * path, UncheckedRoots & roots) static void readFileRoots(const char * path, UncheckedRoots & roots)
{ {
try { try {
@ -445,120 +445,122 @@ static void readLibprocRoots(const Path & storeDir, UncheckedRoots & unchecked)
if (pid == 0) if (pid == 0)
continue; continue;
// Process cwd/root directory try {
struct proc_vnodepathinfo vnodeInfo; // Process cwd/root directory
if (proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vnodeInfo, sizeof(vnodeInfo)) <= 0) { struct proc_vnodepathinfo vnodeInfo;
if ((errno == ESRCH) || (errno == EPERM)) continue; if (proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vnodeInfo, sizeof(vnodeInfo)) <= 0)
throw SysError("Getting pid %1% working directory", pid); throw SysError("Getting pid %1% working directory", pid);
};
unchecked[std::string(vnodeInfo.pvi_cdir.vip_path)].emplace("{libproc/cwd}");
unchecked[std::string(vnodeInfo.pvi_rdir.vip_path)].emplace("{libproc/rootdir}");
// File descriptors unchecked[std::string(vnodeInfo.pvi_cdir.vip_path)].emplace("{libproc/cwd}");
int fdBufSize = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, nullptr, 0); unchecked[std::string(vnodeInfo.pvi_rdir.vip_path)].emplace("{libproc/rootdir}");
if (fdBufSize <= 0) {
if ((errno == ESRCH) || (errno == EPERM)) continue;
throw SysError("Listing pid %1% file descriptors", pid);
}
std::vector<struct proc_fdinfo> fds(fdBufSize / sizeof(struct proc_fdinfo));
fdBufSize = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, fds.data(), fds.size() * sizeof(struct proc_fdinfo));
if (fdBufSize <= 0) {
if ((errno == ESRCH) || (errno == EPERM)) continue;
throw SysError("Listing pid %1% file descriptors", pid);
}
fds.resize(fdBufSize / sizeof(struct proc_fdinfo));
for (auto fd: fds) {
// While there are names for other FD types, only vnodes are in the filesystem
if (fd.proc_fdtype != PROX_FDTYPE_VNODE)
continue;
struct vnode_fdinfowithpath fdInfo;
if (proc_pidfdinfo(pid, fd.proc_fd, PROC_PIDFDVNODEPATHINFO, &fdInfo, sizeof(fdInfo)) <= 0) {
if ((errno == ESRCH) || (errno == EPERM) || (errno == EBADF)) continue;
throw SysError("Getting pid %1% fd %2% path", pid, fd.proc_fd);
}
unchecked[std::string(fdInfo.pvip.vip_path)].emplace("{libproc/fd}");
}
// Regions (e.g. mmapped files, executables, shared libraries)
struct proc_regionwithpathinfo regionInfo;
uint64_t nextAddr = 0;
while (true) {
// Seriously, what are you doing XNU?
// There's 3 flavors of PROC_PIDREGIONPATHINFO:
// * PROC_PIDREGIONPATHINFO includes all regions
// * PROC_PIDREGIONPATHINFO2 includes regions backed by a vnode
// * PROC_PIDREGIONPATHINFO3 includes regions backed by a vnode on a specified filesystem
// Only PROC_PIDREGIONPATHINFO is documented.
// Unfortunately, using it would make finding gcroots take about 50x as long
// and tests would fail from timeout.
// According to the Frida source code, PROC_PIDREGIONPATHINFO2 has been available
// since XNU 2782.1.97 in OS X 10.10
//
// This is PROC_PIDREGIONPATHINFO2
if (proc_pidinfo(pid, 22, nextAddr, &regionInfo, sizeof(regionInfo)) <= 0) {
if ((errno == ESRCH) || (errno == EINVAL) || (errno == EPERM)) break;
throw SysError("Getting pid %1% region path", pid);
}
unchecked[std::string(regionInfo.prp_vip.vip_path)].emplace("{libproc/region}");
nextAddr = regionInfo.prp_prinfo.pri_address + regionInfo.prp_prinfo.pri_size;
}
// Arguments and environment variables
// We can't read environment variables of binaries with with entitlements unless
// nix has the `com.apple.private.read-environment-variables` entitlement or SIP is off
// We can read arguments for all applications though.
// Yes, it's a sysctl, the proc_info and sysctl APIs are mostly similar,
// but both have exclusive capabilities
// We don't care about what is args and what is environment so we could use
// KERN_PROCARGS, but KERN_PROCARGS2 saves about 20ms in my testing
int sysctlName[3] = {CTL_KERN, KERN_PROCARGS2, pid};
size_t argsSize = 0;
if (sysctl(sysctlName, 3, nullptr, &argsSize, nullptr, 0) < 0) {
throw SysError("Reading pid %1% arguments", pid);
}
std::vector<char> args(argsSize);
if (sysctl(sysctlName, 3, args.data(), &argsSize, nullptr, 0) < 0) {
throw SysError("Reading pid %1% arguments", pid);
}
if (argsSize < args.size())
args.resize(argsSize);
auto env_end = std::sregex_iterator{};
for (auto i = std::sregex_iterator{args.begin(), args.end(), storePathRegex}; i != env_end; ++i)
unchecked[i->str()].emplace("{libproc/args}");
// Per-thread working directories // File descriptors
struct proc_taskallinfo taskAllInfo; int fdBufSize = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, nullptr, 0);
if (proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &taskAllInfo, sizeof(taskAllInfo)) <= 0) { if (fdBufSize <= 0)
if ((errno == ESRCH) || (errno == EPERM)) continue; throw SysError("Listing pid %1% file descriptors", pid);
throw SysError("Reading pid %1% tasks", pid);
}
// If the process doesn't have the per-thread cwd flag then we already have the std::vector<struct proc_fdinfo> fds(fdBufSize / sizeof(struct proc_fdinfo));
// process-wide cwd from PROC_PIDVNODEPATHINFO fdBufSize = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, fds.data(), fds.size() * sizeof(struct proc_fdinfo));
if (taskAllInfo.pbsd.pbi_flags & PROC_FLAG_THCWD) { if (fdBufSize <= 0)
std::vector<uint64_t> tids(taskAllInfo.ptinfo.pti_threadnum); throw SysError("Listing pid %1% file descriptors", pid);
int tidBufSize = proc_pidinfo(pid, PROC_PIDLISTTHREADS, 0, tids.data(), tids.size() * sizeof(uint64_t));
if (tidBufSize <= 0) { fds.resize(fdBufSize / sizeof(struct proc_fdinfo));
if ((errno == ESRCH) || (errno == EPERM)) continue;
throw SysError("Listing pid %1% threads", pid); for (auto fd: fds) {
// By definition, only a vnode is on the filesystem
if (fd.proc_fdtype != PROX_FDTYPE_VNODE)
continue;
struct vnode_fdinfowithpath fdInfo;
if (proc_pidfdinfo(pid, fd.proc_fd, PROC_PIDFDVNODEPATHINFO, &fdInfo, sizeof(fdInfo)) <= 0)
throw SysError("Getting pid %1% fd %2% path", pid, fd.proc_fd);
unchecked[std::string(fdInfo.pvip.vip_path)].emplace("{libproc/fd}");
} }
for (auto tid: tids) {
struct proc_threadwithpathinfo threadPathInfo; // Regions (e.g. mmapped files, executables, shared libraries)
if (proc_pidinfo(pid, PROC_PIDTHREADPATHINFO, tid, &threadPathInfo, sizeof(threadPathInfo)) <= 0) { uint64_t nextAddr = 0;
if ((errno == ESRCH) || (errno == EPERM)) continue; while (true) {
throw SysError("Reading pid %1% thread %2% cwd", pid, tid); // Seriously, what are you doing XNU?
// There's 3 flavors of PROC_PIDREGIONPATHINFO:
// * PROC_PIDREGIONPATHINFO includes all regions
// * PROC_PIDREGIONPATHINFO2 includes regions backed by a vnode
// * PROC_PIDREGIONPATHINFO3 includes regions backed by a vnode on a specified filesystem
// Only PROC_PIDREGIONPATHINFO is documented.
// Unfortunately, using it would make finding gcroots take about 100x as long
// and tests would fail from timeout.
// According to the Frida source code, PROC_PIDREGIONPATHINFO2 has been available
// since XNU 2782.1.97 in OS X 10.10
//
// 22 means PROC_PIDREGIONPATHINFO2
struct proc_regionwithpathinfo regionInfo;
if (proc_pidinfo(pid, 22, nextAddr, &regionInfo, sizeof(regionInfo)) <= 0) {
// PROC_PIDREGIONPATHINFO signals we're done with an error,
// so we're expected to hit this once per process
if (errno == ESRCH || errno == EINVAL) break;
throw SysError("Getting pid %1% region path", pid);
} }
unchecked[std::string(threadPathInfo.pvip.vip_path)].emplace("{libproc/threadcwd}");
unchecked[std::string(regionInfo.prp_vip.vip_path)].emplace("{libproc/region}");
nextAddr = regionInfo.prp_prinfo.pri_address + regionInfo.prp_prinfo.pri_size;
} }
// Arguments and environment variables
// We can't read environment variables of binaries with with entitlements unless
// nix has the `com.apple.private.read-environment-variables` entitlement or SIP is off
// We can read arguments for all applications though.
// Yes, it's a sysctl, the proc_info and sysctl APIs are mostly similar,
// but both have exclusive capabilities
// We don't care about what is args and what is environment so we could use
// KERN_PROCARGS, but KERN_PROCARGS2 saves about 20ms in my testing
int sysctlName[3] = {CTL_KERN, KERN_PROCARGS2, pid};
size_t argsSize = 0;
if (sysctl(sysctlName, 3, nullptr, &argsSize, nullptr, 0) < 0) {
throw SysError("Reading pid %1% arguments", pid);
}
std::vector<char> args(argsSize);
if (sysctl(sysctlName, 3, args.data(), &argsSize, nullptr, 0) < 0) {
throw SysError("Reading pid %1% arguments", pid);
}
if (argsSize < args.size())
args.resize(argsSize);
auto env_end = std::sregex_iterator{};
for (auto i = std::sregex_iterator{args.begin(), args.end(), storePathRegex}; i != env_end; ++i)
unchecked[i->str()].emplace("{libproc/args}");
// Per-thread working directories
struct proc_taskallinfo taskAllInfo;
if (proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &taskAllInfo, sizeof(taskAllInfo)) <= 0)
throw SysError("Reading pid %1% tasks", pid);
// If the process doesn't have the per-thread cwd flag then we already have the
// process-wide cwd from PROC_PIDVNODEPATHINFO
if (taskAllInfo.pbsd.pbi_flags & PROC_FLAG_THCWD) {
std::vector<uint64_t> tids(taskAllInfo.ptinfo.pti_threadnum);
int tidBufSize = proc_pidinfo(pid, PROC_PIDLISTTHREADS, 0, tids.data(), tids.size() * sizeof(uint64_t));
if (tidBufSize <= 0)
throw SysError("Listing pid %1% threads", pid);
for (auto tid: tids) {
struct proc_threadwithpathinfo threadPathInfo;
if (proc_pidinfo(pid, PROC_PIDTHREADPATHINFO, tid, &threadPathInfo, sizeof(threadPathInfo)) <= 0)
throw SysError("Reading pid %1% thread %2% cwd", pid, tid);
unchecked[std::string(threadPathInfo.pvip.vip_path)].emplace("{libproc/threadcwd}");
}
}
} catch (SysError & e) {
if (errno == ENOENT || errno == EACCES || errno == ESRCH)
continue;
throw;
} }
} }
} }