From 474ea179fb177c0e45745819395161fe3dfa92c2 Mon Sep 17 00:00:00 2001 From: Artemis Tosini Date: Sat, 30 Mar 2024 22:21:37 -0400 Subject: [PATCH] gc: clean up error handling on Darwin --- src/libstore/gc.cc | 228 +++++++++++++++++++++++---------------------- 1 file changed, 115 insertions(+), 113 deletions(-) diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc index 0342965b3..e10156d8a 100644 --- a/src/libstore/gc.cc +++ b/src/libstore/gc.cc @@ -330,6 +330,13 @@ Roots LocalStore::findRoots(bool censor) typedef std::unordered_map> UncheckedRoots; +static std::string quoteRegexChars(const std::string & raw) +{ + static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])"); + return std::regex_replace(raw, specialRegex, R"(\$&)"); +} + +#if __linux__ static void readProcLink(const std::string & file, UncheckedRoots & roots) { constexpr auto bufsiz = PATH_MAX; @@ -348,13 +355,6 @@ static void readProcLink(const std::string & file, UncheckedRoots & roots) .emplace(file); } -static std::string quoteRegexChars(const std::string & raw) -{ - static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])"); - return std::regex_replace(raw, specialRegex, R"(\$&)"); -} - -#if __linux__ static void readFileRoots(const char * path, UncheckedRoots & roots) { try { @@ -445,120 +445,122 @@ static void readLibprocRoots(const Path & storeDir, UncheckedRoots & unchecked) if (pid == 0) continue; - // Process cwd/root directory - struct proc_vnodepathinfo vnodeInfo; - if (proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vnodeInfo, sizeof(vnodeInfo)) <= 0) { - if ((errno == ESRCH) || (errno == EPERM)) continue; - throw SysError("Getting pid %1% working directory", pid); - }; - unchecked[std::string(vnodeInfo.pvi_cdir.vip_path)].emplace("{libproc/cwd}"); - unchecked[std::string(vnodeInfo.pvi_rdir.vip_path)].emplace("{libproc/rootdir}"); + try { + // Process cwd/root directory + struct proc_vnodepathinfo vnodeInfo; + if (proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vnodeInfo, sizeof(vnodeInfo)) <= 0) + throw SysError("Getting pid %1% working directory", pid); - // File descriptors - int fdBufSize = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, nullptr, 0); - if (fdBufSize <= 0) { - if ((errno == ESRCH) || (errno == EPERM)) continue; - throw SysError("Listing pid %1% file descriptors", pid); - } - std::vector fds(fdBufSize / sizeof(struct proc_fdinfo)); - fdBufSize = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, fds.data(), fds.size() * sizeof(struct proc_fdinfo)); - if (fdBufSize <= 0) { - if ((errno == ESRCH) || (errno == EPERM)) continue; - throw SysError("Listing pid %1% file descriptors", pid); - } - fds.resize(fdBufSize / sizeof(struct proc_fdinfo)); - for (auto fd: fds) { - // While there are names for other FD types, only vnodes are in the filesystem - if (fd.proc_fdtype != PROX_FDTYPE_VNODE) - continue; - - struct vnode_fdinfowithpath fdInfo; - if (proc_pidfdinfo(pid, fd.proc_fd, PROC_PIDFDVNODEPATHINFO, &fdInfo, sizeof(fdInfo)) <= 0) { - if ((errno == ESRCH) || (errno == EPERM) || (errno == EBADF)) continue; - throw SysError("Getting pid %1% fd %2% path", pid, fd.proc_fd); - } - unchecked[std::string(fdInfo.pvip.vip_path)].emplace("{libproc/fd}"); - } - - // Regions (e.g. mmapped files, executables, shared libraries) - struct proc_regionwithpathinfo regionInfo; - uint64_t nextAddr = 0; - while (true) { - // Seriously, what are you doing XNU? - // There's 3 flavors of PROC_PIDREGIONPATHINFO: - // * PROC_PIDREGIONPATHINFO includes all regions - // * PROC_PIDREGIONPATHINFO2 includes regions backed by a vnode - // * PROC_PIDREGIONPATHINFO3 includes regions backed by a vnode on a specified filesystem - // Only PROC_PIDREGIONPATHINFO is documented. - // Unfortunately, using it would make finding gcroots take about 50x as long - // and tests would fail from timeout. - // According to the Frida source code, PROC_PIDREGIONPATHINFO2 has been available - // since XNU 2782.1.97 in OS X 10.10 - // - // This is PROC_PIDREGIONPATHINFO2 - if (proc_pidinfo(pid, 22, nextAddr, ®ionInfo, sizeof(regionInfo)) <= 0) { - if ((errno == ESRCH) || (errno == EINVAL) || (errno == EPERM)) break; - throw SysError("Getting pid %1% region path", pid); - } - unchecked[std::string(regionInfo.prp_vip.vip_path)].emplace("{libproc/region}"); - - nextAddr = regionInfo.prp_prinfo.pri_address + regionInfo.prp_prinfo.pri_size; - } - - // Arguments and environment variables - // We can't read environment variables of binaries with with entitlements unless - // nix has the `com.apple.private.read-environment-variables` entitlement or SIP is off - // We can read arguments for all applications though. - - // Yes, it's a sysctl, the proc_info and sysctl APIs are mostly similar, - // but both have exclusive capabilities - // We don't care about what is args and what is environment so we could use - // KERN_PROCARGS, but KERN_PROCARGS2 saves about 20ms in my testing - int sysctlName[3] = {CTL_KERN, KERN_PROCARGS2, pid}; - size_t argsSize = 0; - if (sysctl(sysctlName, 3, nullptr, &argsSize, nullptr, 0) < 0) { - throw SysError("Reading pid %1% arguments", pid); - } - - std::vector args(argsSize); - - if (sysctl(sysctlName, 3, args.data(), &argsSize, nullptr, 0) < 0) { - throw SysError("Reading pid %1% arguments", pid); - } - - if (argsSize < args.size()) - args.resize(argsSize); - - auto env_end = std::sregex_iterator{}; - for (auto i = std::sregex_iterator{args.begin(), args.end(), storePathRegex}; i != env_end; ++i) - unchecked[i->str()].emplace("{libproc/args}"); + unchecked[std::string(vnodeInfo.pvi_cdir.vip_path)].emplace("{libproc/cwd}"); + unchecked[std::string(vnodeInfo.pvi_rdir.vip_path)].emplace("{libproc/rootdir}"); - // Per-thread working directories - struct proc_taskallinfo taskAllInfo; - if (proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &taskAllInfo, sizeof(taskAllInfo)) <= 0) { - if ((errno == ESRCH) || (errno == EPERM)) continue; - throw SysError("Reading pid %1% tasks", pid); - } + // File descriptors + int fdBufSize = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, nullptr, 0); + if (fdBufSize <= 0) + throw SysError("Listing pid %1% file descriptors", pid); - // If the process doesn't have the per-thread cwd flag then we already have the - // process-wide cwd from PROC_PIDVNODEPATHINFO - if (taskAllInfo.pbsd.pbi_flags & PROC_FLAG_THCWD) { - std::vector tids(taskAllInfo.ptinfo.pti_threadnum); - int tidBufSize = proc_pidinfo(pid, PROC_PIDLISTTHREADS, 0, tids.data(), tids.size() * sizeof(uint64_t)); - if (tidBufSize <= 0) { - if ((errno == ESRCH) || (errno == EPERM)) continue; - throw SysError("Listing pid %1% threads", pid); + std::vector fds(fdBufSize / sizeof(struct proc_fdinfo)); + fdBufSize = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, fds.data(), fds.size() * sizeof(struct proc_fdinfo)); + if (fdBufSize <= 0) + throw SysError("Listing pid %1% file descriptors", pid); + + fds.resize(fdBufSize / sizeof(struct proc_fdinfo)); + + for (auto fd: fds) { + // By definition, only a vnode is on the filesystem + if (fd.proc_fdtype != PROX_FDTYPE_VNODE) + continue; + + struct vnode_fdinfowithpath fdInfo; + if (proc_pidfdinfo(pid, fd.proc_fd, PROC_PIDFDVNODEPATHINFO, &fdInfo, sizeof(fdInfo)) <= 0) + throw SysError("Getting pid %1% fd %2% path", pid, fd.proc_fd); + unchecked[std::string(fdInfo.pvip.vip_path)].emplace("{libproc/fd}"); } - for (auto tid: tids) { - struct proc_threadwithpathinfo threadPathInfo; - if (proc_pidinfo(pid, PROC_PIDTHREADPATHINFO, tid, &threadPathInfo, sizeof(threadPathInfo)) <= 0) { - if ((errno == ESRCH) || (errno == EPERM)) continue; - throw SysError("Reading pid %1% thread %2% cwd", pid, tid); + + // Regions (e.g. mmapped files, executables, shared libraries) + uint64_t nextAddr = 0; + while (true) { + // Seriously, what are you doing XNU? + // There's 3 flavors of PROC_PIDREGIONPATHINFO: + // * PROC_PIDREGIONPATHINFO includes all regions + // * PROC_PIDREGIONPATHINFO2 includes regions backed by a vnode + // * PROC_PIDREGIONPATHINFO3 includes regions backed by a vnode on a specified filesystem + // Only PROC_PIDREGIONPATHINFO is documented. + // Unfortunately, using it would make finding gcroots take about 100x as long + // and tests would fail from timeout. + // According to the Frida source code, PROC_PIDREGIONPATHINFO2 has been available + // since XNU 2782.1.97 in OS X 10.10 + // + // 22 means PROC_PIDREGIONPATHINFO2 + struct proc_regionwithpathinfo regionInfo; + if (proc_pidinfo(pid, 22, nextAddr, ®ionInfo, sizeof(regionInfo)) <= 0) { + // PROC_PIDREGIONPATHINFO signals we're done with an error, + // so we're expected to hit this once per process + if (errno == ESRCH || errno == EINVAL) break; + throw SysError("Getting pid %1% region path", pid); } - unchecked[std::string(threadPathInfo.pvip.vip_path)].emplace("{libproc/threadcwd}"); + + unchecked[std::string(regionInfo.prp_vip.vip_path)].emplace("{libproc/region}"); + + nextAddr = regionInfo.prp_prinfo.pri_address + regionInfo.prp_prinfo.pri_size; } + + + // Arguments and environment variables + // We can't read environment variables of binaries with with entitlements unless + // nix has the `com.apple.private.read-environment-variables` entitlement or SIP is off + // We can read arguments for all applications though. + + // Yes, it's a sysctl, the proc_info and sysctl APIs are mostly similar, + // but both have exclusive capabilities + // We don't care about what is args and what is environment so we could use + // KERN_PROCARGS, but KERN_PROCARGS2 saves about 20ms in my testing + int sysctlName[3] = {CTL_KERN, KERN_PROCARGS2, pid}; + size_t argsSize = 0; + if (sysctl(sysctlName, 3, nullptr, &argsSize, nullptr, 0) < 0) { + throw SysError("Reading pid %1% arguments", pid); + } + + std::vector args(argsSize); + if (sysctl(sysctlName, 3, args.data(), &argsSize, nullptr, 0) < 0) { + throw SysError("Reading pid %1% arguments", pid); + } + + if (argsSize < args.size()) + args.resize(argsSize); + + auto env_end = std::sregex_iterator{}; + for (auto i = std::sregex_iterator{args.begin(), args.end(), storePathRegex}; i != env_end; ++i) + unchecked[i->str()].emplace("{libproc/args}"); + + + // Per-thread working directories + struct proc_taskallinfo taskAllInfo; + if (proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &taskAllInfo, sizeof(taskAllInfo)) <= 0) + throw SysError("Reading pid %1% tasks", pid); + + // If the process doesn't have the per-thread cwd flag then we already have the + // process-wide cwd from PROC_PIDVNODEPATHINFO + if (taskAllInfo.pbsd.pbi_flags & PROC_FLAG_THCWD) { + std::vector tids(taskAllInfo.ptinfo.pti_threadnum); + int tidBufSize = proc_pidinfo(pid, PROC_PIDLISTTHREADS, 0, tids.data(), tids.size() * sizeof(uint64_t)); + if (tidBufSize <= 0) + throw SysError("Listing pid %1% threads", pid); + + for (auto tid: tids) { + struct proc_threadwithpathinfo threadPathInfo; + if (proc_pidinfo(pid, PROC_PIDTHREADPATHINFO, tid, &threadPathInfo, sizeof(threadPathInfo)) <= 0) + throw SysError("Reading pid %1% thread %2% cwd", pid, tid); + + unchecked[std::string(threadPathInfo.pvip.vip_path)].emplace("{libproc/threadcwd}"); + } + } + } catch (SysError & e) { + if (errno == ENOENT || errno == EACCES || errno == ESRCH) + continue; + throw; } } }