From 435848cef12d065b209c82c96ce3a8bfa5e6a051 Mon Sep 17 00:00:00 2001 From: aszlig Date: Fri, 1 Apr 2022 09:23:43 -0700 Subject: [PATCH] libutil: Fix restoring mount namespace I regularly pass around simple scripts by using nix-shell as the script interpreter, eg. like this: #!/usr/bin/env nix-shell #!nix-shell -p dd_rescue coreutils bash -i bash While this works most of the time, I recently had one occasion where it would not and the above would result in the following: $ sudo ./myscript.sh bash: ./myscript.sh: No such file or directory Note the "sudo" here, because this error only occurs if we're root. The reason for the latter is because running Nix as root means that we can directly access the store, which makes sure we use a filesystem namespace to make the store writable. XXX - REWORD! So when stracing the process, I stumbled on the following sequence: openat(AT_FDCWD, "/proc/self/ns/mnt", O_RDONLY) = 3 unshare(CLONE_NEWNS) = 0 ... later ... getcwd("/the/real/cwd", 4096) = 14 setns(3, CLONE_NEWNS) = 0 getcwd("/", 4096) = 2 In the whole strace output there are no calls to chdir() whatsoever, so I decided to look into the kernel source to see what else could change directories and found this[1]: /* Update the pwd and root */ set_fs_pwd(fs, &root); set_fs_root(fs, &root); The set_fs_pwd() call is roughly equivalent to a chdir() syscall and this is called when the setns() syscall is invoked[2]. [1]: https://github.com/torvalds/linux/blob/b14ffae378aa1db993e62b01392e70d1e585fb23/fs/namespace.c#L4659 [2]: https://github.com/torvalds/linux/blob/b14ffae378aa1db993e62b01392e70d1e585fb23/kernel/nsproxy.c#L346 --- src/libutil/util.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 59e3aad6d..e62672717 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -1688,13 +1689,17 @@ void setStackSize(size_t stackSize) #endif } +#if __linux__ static AutoCloseFD fdSavedMountNamespace; +std::optional savedCwd; +#endif void saveMountNamespace() { #if __linux__ static std::once_flag done; std::call_once(done, []() { + savedCwd.emplace(std::filesystem::current_path()); AutoCloseFD fd = open("/proc/self/ns/mnt", O_RDONLY); if (!fd) throw SysError("saving parent mount namespace"); @@ -1712,6 +1717,12 @@ void restoreMountNamespace() } catch (Error & e) { debug(e.msg()); } + try { + if (savedCwd) + std::filesystem::current_path(*savedCwd); + } catch (std::filesystem::filesystem_error const &e) { + debug(e.what()); + } #endif }