From 127ee1a101e3f5ebab39ad98cbe58fefcd52eca5 Mon Sep 17 00:00:00 2001 From: Alois Wohlschlager Date: Sun, 26 May 2024 10:35:03 +0200 Subject: [PATCH] libstore/build: use an allowlist approach to syscall filtering Previously, system call filtering (to prevent builders from storing files with setuid/setgid permission bits or extended attributes) was performed using a blocklist. While this looks simple at first, it actually carries significant security and maintainability risks: after all, the kernel may add new syscalls to achieve the same functionality one is trying to block, and it can even be hard to actually add the syscall to the blocklist when building against a C library that doesn't know about it yet. For a recent demonstration of this happening in practice to Nix, see the introduction of fchmodat2 [0] [1]. The allowlist approach does not share the same drawback. While it does require a rather large list of harmless syscalls to be maintained in the codebase, failing to update this list (and roll out the update to all users) in time has rather benign effects; at worst, very recent programs that already rely on new syscalls will fail with an error the same way they would on a slightly older kernel that doesn't support them yet. Most importantly, no unintended new ways of performing dangerous operations will be silently allowed. Another possible drawback is reduced system call performance due to the larger filter created by the allowlist requiring more computation [2]. However, this issue has not convincingly been demonstrated yet in practice, for example in systemd or various browsers. To the contrary, it has been measured that the the actual filter constructed here has approximately the same overhead as a very simple filter blocking only one system call. This commit tries to keep the behavior as close to unchanged as possible. The system call list is in line with libseccomp 2.5.5 and glibc 2.39, which are the latest versions at the point of writing. Since libseccomp 2.5.5 is already a requirement and the distributions shipping this together with older versions of glibc are mostly not a thing any more, this should not lead to more build failures any more. [0] https://github.com/NixOS/nixpkgs/issues/300635 [1] https://github.com/NixOS/nix/issues/10424 [2] https://github.com/flatpak/flatpak/pull/4462#issuecomment-1061690607 Change-Id: I541be3ea9b249bcceddfed6a5a13ac10b11e16ad --- flake.nix | 1 + maintainers/check-syscalls.nix | 16 + maintainers/check-syscalls.sh | 7 + package.nix | 2 + src/libstore/build/local-derivation-goal.cc | 545 +++++++++++++++++++- src/libstore/linux/fchmodat2-compat.hh | 35 -- tests/nixos/setuid/fchmodat2-suid.c | 5 +- 7 files changed, 550 insertions(+), 61 deletions(-) create mode 100644 maintainers/check-syscalls.nix create mode 100755 maintainers/check-syscalls.sh delete mode 100644 src/libstore/linux/fchmodat2-compat.hh diff --git a/flake.nix b/flake.nix index 0609a8681..1fc02a590 100644 --- a/flake.nix +++ b/flake.nix @@ -164,6 +164,7 @@ nixUnstable = prev.nixUnstable; check-headers = final.buildPackages.callPackage ./maintainers/check-headers.nix { }; + check-syscalls = final.buildPackages.callPackage ./maintainers/check-syscalls.nix { }; default-busybox-sandbox-shell = final.busybox.override { useMusl = true; diff --git a/maintainers/check-syscalls.nix b/maintainers/check-syscalls.nix new file mode 100644 index 000000000..1a3de5c6d --- /dev/null +++ b/maintainers/check-syscalls.nix @@ -0,0 +1,16 @@ +{ + runCommandNoCC, + lib, + libseccomp, + writeShellScriptBin, +}: +let + syscalls-csv = runCommandNoCC "syscalls.csv" { } '' + echo ${lib.escapeShellArg libseccomp.src} + tar -xf ${lib.escapeShellArg libseccomp.src} --strip-components=2 ${libseccomp.name}/src/syscalls.csv + mv syscalls.csv "$out" + ''; +in +writeShellScriptBin "check-syscalls" '' + ${./check-syscalls.sh} ${syscalls-csv} +'' diff --git a/maintainers/check-syscalls.sh b/maintainers/check-syscalls.sh new file mode 100755 index 000000000..cd72ac23b --- /dev/null +++ b/maintainers/check-syscalls.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -e + +diff -u <(awk < src/libstore/build/local-derivation-goal.cc '/BEGIN extract-syscalls/ { extracting = 1; next } +match($0, /allowSyscall\(ctx, SCMP_SYS\(([^)]*)\)\);|\/\/ skip ([^ ]*)/, result) { print result[1] result[2] } +/END extract-syscalls/ { extracting = 0; next }') <(tail -n+2 "$1" | cut -d, -f 1) diff --git a/package.nix b/package.nix index 16477a6a2..c6b2e4451 100644 --- a/package.nix +++ b/package.nix @@ -403,6 +403,7 @@ stdenv.mkDerivation (finalAttrs: { # Lix specific packages pre-commit-checks, contribNotice, + check-syscalls, }: let glibcFix = lib.optionalAttrs (buildPlatform.isLinux && glibcLocales != null) { @@ -453,6 +454,7 @@ stdenv.mkDerivation (finalAttrs: { # `bash` from inside `nix develop`, say, because you are using it # via direnv, you will by default get bash (unusable edition). bashInteractive + check-syscalls pythonEnv # docker image tool skopeo diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 8e1f4e849..33cbfb29d 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -45,7 +45,6 @@ #include #include #if HAVE_SECCOMP -#include "linux/fchmodat2-compat.hh" #include #endif #define pivot_root(new_root, put_old) (syscall(SYS_pivot_root, new_root, put_old)) @@ -1363,6 +1362,20 @@ void LocalDerivationGoal::chownToBuilder(const Path & path) throw SysError("cannot change ownership of '%1%'", path); } +#if HAVE_SECCOMP + +static void allowSyscall(scmp_filter_ctx ctx, int syscall) { + if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 0) != 0) + throw SysError("unable to add seccomp rule"); +} + +#define ALLOW_CHMOD_IF_SAFE(ctx, syscall, modePos) \ + if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISUID | S_ISGID, 0)) != 0 || \ + seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) != 0 || \ + seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) != 0) \ + throw SysError("unable to add seccomp rule"); + +#endif void setupSeccomp() { @@ -1370,7 +1383,9 @@ void setupSeccomp() #if HAVE_SECCOMP scmp_filter_ctx ctx; - if (!(ctx = seccomp_init(SCMP_ACT_ALLOW))) + // Pretend that syscalls we don't yet know about don't exist. + // This is the best option for compatibility: after all, they did in fact not exist not too long ago. + if (!(ctx = seccomp_init(SCMP_ACT_ERRNO(ENOSYS)))) throw SysError("unable to initialize seccomp mode 2"); Finally cleanup([&]() { @@ -1405,28 +1420,514 @@ void setupSeccomp() seccomp_arch_add(ctx, SCMP_ARCH_MIPSEL64N32) != 0) printError("unable to add mips64el-*abin32 seccomp architecture"); - /* Prevent builders from creating setuid/setgid binaries. */ - for (int perm : { S_ISUID, S_ISGID }) { - if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(chmod), 1, - SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0) - throw SysError("unable to add seccomp rule"); + // This list is intended for machine consumption. + // Please keep its format, order and BEGIN/END markers. + // + // Currently, it is up to date with libseccomp 2.5.5 and glibc 2.39. + // Run check-syscalls to determine which new syscalls should be added. + // New syscalls must be audited and handled in a way that blocks the following dangerous operations: + // * Creation of non-empty setuid/setgid files + // * Creation of extended attributes (including ACLs) + // + // BEGIN extract-syscalls + allowSyscall(ctx, SCMP_SYS(accept)); + allowSyscall(ctx, SCMP_SYS(accept4)); + allowSyscall(ctx, SCMP_SYS(access)); + allowSyscall(ctx, SCMP_SYS(acct)); + allowSyscall(ctx, SCMP_SYS(add_key)); + allowSyscall(ctx, SCMP_SYS(adjtimex)); + allowSyscall(ctx, SCMP_SYS(afs_syscall)); + allowSyscall(ctx, SCMP_SYS(alarm)); + allowSyscall(ctx, SCMP_SYS(arch_prctl)); + allowSyscall(ctx, SCMP_SYS(arm_fadvise64_64)); + allowSyscall(ctx, SCMP_SYS(arm_sync_file_range)); + allowSyscall(ctx, SCMP_SYS(bdflush)); + allowSyscall(ctx, SCMP_SYS(bind)); + allowSyscall(ctx, SCMP_SYS(bpf)); + allowSyscall(ctx, SCMP_SYS(break)); + allowSyscall(ctx, SCMP_SYS(breakpoint)); + allowSyscall(ctx, SCMP_SYS(brk)); + allowSyscall(ctx, SCMP_SYS(cachectl)); + allowSyscall(ctx, SCMP_SYS(cacheflush)); + allowSyscall(ctx, SCMP_SYS(cachestat)); + allowSyscall(ctx, SCMP_SYS(capget)); + allowSyscall(ctx, SCMP_SYS(capset)); + allowSyscall(ctx, SCMP_SYS(chdir)); + // skip chmod (dangerous) + allowSyscall(ctx, SCMP_SYS(chown)); + allowSyscall(ctx, SCMP_SYS(chown32)); + allowSyscall(ctx, SCMP_SYS(chroot)); + allowSyscall(ctx, SCMP_SYS(clock_adjtime)); + allowSyscall(ctx, SCMP_SYS(clock_adjtime64)); + allowSyscall(ctx, SCMP_SYS(clock_getres)); + allowSyscall(ctx, SCMP_SYS(clock_getres_time64)); + allowSyscall(ctx, SCMP_SYS(clock_gettime)); + allowSyscall(ctx, SCMP_SYS(clock_gettime64)); + allowSyscall(ctx, SCMP_SYS(clock_nanosleep)); + allowSyscall(ctx, SCMP_SYS(clock_nanosleep_time64)); + allowSyscall(ctx, SCMP_SYS(clock_settime)); + allowSyscall(ctx, SCMP_SYS(clock_settime64)); + allowSyscall(ctx, SCMP_SYS(clone)); + allowSyscall(ctx, SCMP_SYS(clone3)); + allowSyscall(ctx, SCMP_SYS(close)); + allowSyscall(ctx, SCMP_SYS(close_range)); + allowSyscall(ctx, SCMP_SYS(connect)); + allowSyscall(ctx, SCMP_SYS(copy_file_range)); + allowSyscall(ctx, SCMP_SYS(creat)); + allowSyscall(ctx, SCMP_SYS(create_module)); + allowSyscall(ctx, SCMP_SYS(delete_module)); + allowSyscall(ctx, SCMP_SYS(dup)); + allowSyscall(ctx, SCMP_SYS(dup2)); + allowSyscall(ctx, SCMP_SYS(dup3)); + allowSyscall(ctx, SCMP_SYS(epoll_create)); + allowSyscall(ctx, SCMP_SYS(epoll_create1)); + allowSyscall(ctx, SCMP_SYS(epoll_ctl)); + allowSyscall(ctx, SCMP_SYS(epoll_ctl_old)); + allowSyscall(ctx, SCMP_SYS(epoll_pwait)); + allowSyscall(ctx, SCMP_SYS(epoll_pwait2)); + allowSyscall(ctx, SCMP_SYS(epoll_wait)); + allowSyscall(ctx, SCMP_SYS(epoll_wait_old)); + allowSyscall(ctx, SCMP_SYS(eventfd)); + allowSyscall(ctx, SCMP_SYS(eventfd2)); + allowSyscall(ctx, SCMP_SYS(execve)); + allowSyscall(ctx, SCMP_SYS(execveat)); + allowSyscall(ctx, SCMP_SYS(exit)); + allowSyscall(ctx, SCMP_SYS(exit_group)); + allowSyscall(ctx, SCMP_SYS(faccessat)); + allowSyscall(ctx, SCMP_SYS(faccessat2)); + allowSyscall(ctx, SCMP_SYS(fadvise64)); + allowSyscall(ctx, SCMP_SYS(fadvise64_64)); + allowSyscall(ctx, SCMP_SYS(fallocate)); + allowSyscall(ctx, SCMP_SYS(fanotify_init)); + allowSyscall(ctx, SCMP_SYS(fanotify_mark)); + allowSyscall(ctx, SCMP_SYS(fchdir)); + // skip fchmod (dangerous) + // skip fchmodat (dangerous) + // skip fchmodat2 (dangerous) + allowSyscall(ctx, SCMP_SYS(fchown)); + allowSyscall(ctx, SCMP_SYS(fchown32)); + allowSyscall(ctx, SCMP_SYS(fchownat)); + allowSyscall(ctx, SCMP_SYS(fcntl)); + allowSyscall(ctx, SCMP_SYS(fcntl64)); + allowSyscall(ctx, SCMP_SYS(fdatasync)); + allowSyscall(ctx, SCMP_SYS(fgetxattr)); + allowSyscall(ctx, SCMP_SYS(finit_module)); + allowSyscall(ctx, SCMP_SYS(flistxattr)); + allowSyscall(ctx, SCMP_SYS(flock)); + allowSyscall(ctx, SCMP_SYS(fork)); + allowSyscall(ctx, SCMP_SYS(fremovexattr)); + allowSyscall(ctx, SCMP_SYS(fsconfig)); + // skip fsetxattr (dangerous) + allowSyscall(ctx, SCMP_SYS(fsmount)); + allowSyscall(ctx, SCMP_SYS(fsopen)); + allowSyscall(ctx, SCMP_SYS(fspick)); + allowSyscall(ctx, SCMP_SYS(fstat)); + allowSyscall(ctx, SCMP_SYS(fstat64)); + allowSyscall(ctx, SCMP_SYS(fstatat64)); + allowSyscall(ctx, SCMP_SYS(fstatfs)); + allowSyscall(ctx, SCMP_SYS(fstatfs64)); + allowSyscall(ctx, SCMP_SYS(fsync)); + allowSyscall(ctx, SCMP_SYS(ftime)); + allowSyscall(ctx, SCMP_SYS(ftruncate)); + allowSyscall(ctx, SCMP_SYS(ftruncate64)); + allowSyscall(ctx, SCMP_SYS(futex)); + allowSyscall(ctx, SCMP_SYS(futex_requeue)); + allowSyscall(ctx, SCMP_SYS(futex_time64)); + allowSyscall(ctx, SCMP_SYS(futex_wait)); + allowSyscall(ctx, SCMP_SYS(futex_waitv)); + allowSyscall(ctx, SCMP_SYS(futex_wake)); + allowSyscall(ctx, SCMP_SYS(futimesat)); + allowSyscall(ctx, SCMP_SYS(getcpu)); + allowSyscall(ctx, SCMP_SYS(getcwd)); + allowSyscall(ctx, SCMP_SYS(getdents)); + allowSyscall(ctx, SCMP_SYS(getdents64)); + allowSyscall(ctx, SCMP_SYS(getegid)); + allowSyscall(ctx, SCMP_SYS(getegid32)); + allowSyscall(ctx, SCMP_SYS(geteuid)); + allowSyscall(ctx, SCMP_SYS(geteuid32)); + allowSyscall(ctx, SCMP_SYS(getgid)); + allowSyscall(ctx, SCMP_SYS(getgid32)); + allowSyscall(ctx, SCMP_SYS(getgroups)); + allowSyscall(ctx, SCMP_SYS(getgroups32)); + allowSyscall(ctx, SCMP_SYS(getitimer)); + allowSyscall(ctx, SCMP_SYS(get_kernel_syms)); + allowSyscall(ctx, SCMP_SYS(get_mempolicy)); + allowSyscall(ctx, SCMP_SYS(getpeername)); + allowSyscall(ctx, SCMP_SYS(getpgid)); + allowSyscall(ctx, SCMP_SYS(getpgrp)); + allowSyscall(ctx, SCMP_SYS(getpid)); + allowSyscall(ctx, SCMP_SYS(getpmsg)); + allowSyscall(ctx, SCMP_SYS(getppid)); + allowSyscall(ctx, SCMP_SYS(getpriority)); + allowSyscall(ctx, SCMP_SYS(getrandom)); + allowSyscall(ctx, SCMP_SYS(getresgid)); + allowSyscall(ctx, SCMP_SYS(getresgid32)); + allowSyscall(ctx, SCMP_SYS(getresuid)); + allowSyscall(ctx, SCMP_SYS(getresuid32)); + allowSyscall(ctx, SCMP_SYS(getrlimit)); + allowSyscall(ctx, SCMP_SYS(get_robust_list)); + allowSyscall(ctx, SCMP_SYS(getrusage)); + allowSyscall(ctx, SCMP_SYS(getsid)); + allowSyscall(ctx, SCMP_SYS(getsockname)); + allowSyscall(ctx, SCMP_SYS(getsockopt)); + allowSyscall(ctx, SCMP_SYS(get_thread_area)); + allowSyscall(ctx, SCMP_SYS(gettid)); + allowSyscall(ctx, SCMP_SYS(gettimeofday)); + allowSyscall(ctx, SCMP_SYS(get_tls)); + allowSyscall(ctx, SCMP_SYS(getuid)); + allowSyscall(ctx, SCMP_SYS(getuid32)); + allowSyscall(ctx, SCMP_SYS(getxattr)); + allowSyscall(ctx, SCMP_SYS(gtty)); + allowSyscall(ctx, SCMP_SYS(idle)); + allowSyscall(ctx, SCMP_SYS(init_module)); + allowSyscall(ctx, SCMP_SYS(inotify_add_watch)); + allowSyscall(ctx, SCMP_SYS(inotify_init)); + allowSyscall(ctx, SCMP_SYS(inotify_init1)); + allowSyscall(ctx, SCMP_SYS(inotify_rm_watch)); + allowSyscall(ctx, SCMP_SYS(io_cancel)); + allowSyscall(ctx, SCMP_SYS(ioctl)); + allowSyscall(ctx, SCMP_SYS(io_destroy)); + allowSyscall(ctx, SCMP_SYS(io_getevents)); + allowSyscall(ctx, SCMP_SYS(ioperm)); + allowSyscall(ctx, SCMP_SYS(io_pgetevents)); + allowSyscall(ctx, SCMP_SYS(io_pgetevents_time64)); + allowSyscall(ctx, SCMP_SYS(iopl)); + allowSyscall(ctx, SCMP_SYS(ioprio_get)); + allowSyscall(ctx, SCMP_SYS(ioprio_set)); + allowSyscall(ctx, SCMP_SYS(io_setup)); + allowSyscall(ctx, SCMP_SYS(io_submit)); + allowSyscall(ctx, SCMP_SYS(io_uring_enter)); + allowSyscall(ctx, SCMP_SYS(io_uring_register)); + allowSyscall(ctx, SCMP_SYS(io_uring_setup)); + allowSyscall(ctx, SCMP_SYS(ipc)); + allowSyscall(ctx, SCMP_SYS(kcmp)); + allowSyscall(ctx, SCMP_SYS(kexec_file_load)); + allowSyscall(ctx, SCMP_SYS(kexec_load)); + allowSyscall(ctx, SCMP_SYS(keyctl)); + allowSyscall(ctx, SCMP_SYS(kill)); + allowSyscall(ctx, SCMP_SYS(landlock_add_rule)); + allowSyscall(ctx, SCMP_SYS(landlock_create_ruleset)); + allowSyscall(ctx, SCMP_SYS(landlock_restrict_self)); + allowSyscall(ctx, SCMP_SYS(lchown)); + allowSyscall(ctx, SCMP_SYS(lchown32)); + allowSyscall(ctx, SCMP_SYS(lgetxattr)); + allowSyscall(ctx, SCMP_SYS(link)); + allowSyscall(ctx, SCMP_SYS(linkat)); + allowSyscall(ctx, SCMP_SYS(listen)); + allowSyscall(ctx, SCMP_SYS(listxattr)); + allowSyscall(ctx, SCMP_SYS(llistxattr)); + allowSyscall(ctx, SCMP_SYS(_llseek)); + allowSyscall(ctx, SCMP_SYS(lock)); + allowSyscall(ctx, SCMP_SYS(lookup_dcookie)); + allowSyscall(ctx, SCMP_SYS(lremovexattr)); + allowSyscall(ctx, SCMP_SYS(lseek)); + // skip lsetxattr (dangerous) + allowSyscall(ctx, SCMP_SYS(lstat)); + allowSyscall(ctx, SCMP_SYS(lstat64)); + allowSyscall(ctx, SCMP_SYS(madvise)); + allowSyscall(ctx, SCMP_SYS(map_shadow_stack)); + allowSyscall(ctx, SCMP_SYS(mbind)); + allowSyscall(ctx, SCMP_SYS(membarrier)); + allowSyscall(ctx, SCMP_SYS(memfd_create)); + allowSyscall(ctx, SCMP_SYS(memfd_secret)); + allowSyscall(ctx, SCMP_SYS(migrate_pages)); + allowSyscall(ctx, SCMP_SYS(mincore)); + allowSyscall(ctx, SCMP_SYS(mkdir)); + allowSyscall(ctx, SCMP_SYS(mkdirat)); + allowSyscall(ctx, SCMP_SYS(mknod)); + allowSyscall(ctx, SCMP_SYS(mknodat)); + allowSyscall(ctx, SCMP_SYS(mlock)); + allowSyscall(ctx, SCMP_SYS(mlock2)); + allowSyscall(ctx, SCMP_SYS(mlockall)); + allowSyscall(ctx, SCMP_SYS(mmap)); + allowSyscall(ctx, SCMP_SYS(mmap2)); + allowSyscall(ctx, SCMP_SYS(modify_ldt)); + allowSyscall(ctx, SCMP_SYS(mount)); + allowSyscall(ctx, SCMP_SYS(mount_setattr)); + allowSyscall(ctx, SCMP_SYS(move_mount)); + allowSyscall(ctx, SCMP_SYS(move_pages)); + allowSyscall(ctx, SCMP_SYS(mprotect)); + allowSyscall(ctx, SCMP_SYS(mpx)); + allowSyscall(ctx, SCMP_SYS(mq_getsetattr)); + allowSyscall(ctx, SCMP_SYS(mq_notify)); + allowSyscall(ctx, SCMP_SYS(mq_open)); + allowSyscall(ctx, SCMP_SYS(mq_timedreceive)); + allowSyscall(ctx, SCMP_SYS(mq_timedreceive_time64)); + allowSyscall(ctx, SCMP_SYS(mq_timedsend)); + allowSyscall(ctx, SCMP_SYS(mq_timedsend_time64)); + allowSyscall(ctx, SCMP_SYS(mq_unlink)); + allowSyscall(ctx, SCMP_SYS(mremap)); + allowSyscall(ctx, SCMP_SYS(msgctl)); + allowSyscall(ctx, SCMP_SYS(msgget)); + allowSyscall(ctx, SCMP_SYS(msgrcv)); + allowSyscall(ctx, SCMP_SYS(msgsnd)); + allowSyscall(ctx, SCMP_SYS(msync)); + allowSyscall(ctx, SCMP_SYS(multiplexer)); + allowSyscall(ctx, SCMP_SYS(munlock)); + allowSyscall(ctx, SCMP_SYS(munlockall)); + allowSyscall(ctx, SCMP_SYS(munmap)); + allowSyscall(ctx, SCMP_SYS(name_to_handle_at)); + allowSyscall(ctx, SCMP_SYS(nanosleep)); + allowSyscall(ctx, SCMP_SYS(newfstatat)); + allowSyscall(ctx, SCMP_SYS(_newselect)); + allowSyscall(ctx, SCMP_SYS(nfsservctl)); + allowSyscall(ctx, SCMP_SYS(nice)); + allowSyscall(ctx, SCMP_SYS(oldfstat)); + allowSyscall(ctx, SCMP_SYS(oldlstat)); + allowSyscall(ctx, SCMP_SYS(oldolduname)); + allowSyscall(ctx, SCMP_SYS(oldstat)); + allowSyscall(ctx, SCMP_SYS(olduname)); + allowSyscall(ctx, SCMP_SYS(open)); + allowSyscall(ctx, SCMP_SYS(openat)); + allowSyscall(ctx, SCMP_SYS(openat2)); + allowSyscall(ctx, SCMP_SYS(open_by_handle_at)); + allowSyscall(ctx, SCMP_SYS(open_tree)); + allowSyscall(ctx, SCMP_SYS(pause)); + allowSyscall(ctx, SCMP_SYS(pciconfig_iobase)); + allowSyscall(ctx, SCMP_SYS(pciconfig_read)); + allowSyscall(ctx, SCMP_SYS(pciconfig_write)); + allowSyscall(ctx, SCMP_SYS(perf_event_open)); + allowSyscall(ctx, SCMP_SYS(personality)); + allowSyscall(ctx, SCMP_SYS(pidfd_getfd)); + allowSyscall(ctx, SCMP_SYS(pidfd_open)); + allowSyscall(ctx, SCMP_SYS(pidfd_send_signal)); + allowSyscall(ctx, SCMP_SYS(pipe)); + allowSyscall(ctx, SCMP_SYS(pipe2)); + allowSyscall(ctx, SCMP_SYS(pivot_root)); + allowSyscall(ctx, SCMP_SYS(pkey_alloc)); + allowSyscall(ctx, SCMP_SYS(pkey_free)); + allowSyscall(ctx, SCMP_SYS(pkey_mprotect)); + allowSyscall(ctx, SCMP_SYS(poll)); + allowSyscall(ctx, SCMP_SYS(ppoll)); + allowSyscall(ctx, SCMP_SYS(ppoll_time64)); + allowSyscall(ctx, SCMP_SYS(prctl)); + allowSyscall(ctx, SCMP_SYS(pread64)); + allowSyscall(ctx, SCMP_SYS(preadv)); + allowSyscall(ctx, SCMP_SYS(preadv2)); + allowSyscall(ctx, SCMP_SYS(prlimit64)); + allowSyscall(ctx, SCMP_SYS(process_madvise)); + allowSyscall(ctx, SCMP_SYS(process_mrelease)); + allowSyscall(ctx, SCMP_SYS(process_vm_readv)); + allowSyscall(ctx, SCMP_SYS(process_vm_writev)); + allowSyscall(ctx, SCMP_SYS(prof)); + allowSyscall(ctx, SCMP_SYS(profil)); + allowSyscall(ctx, SCMP_SYS(pselect6)); + allowSyscall(ctx, SCMP_SYS(pselect6_time64)); + allowSyscall(ctx, SCMP_SYS(ptrace)); + allowSyscall(ctx, SCMP_SYS(putpmsg)); + allowSyscall(ctx, SCMP_SYS(pwrite64)); + allowSyscall(ctx, SCMP_SYS(pwritev)); + allowSyscall(ctx, SCMP_SYS(pwritev2)); + allowSyscall(ctx, SCMP_SYS(query_module)); + allowSyscall(ctx, SCMP_SYS(quotactl)); + allowSyscall(ctx, SCMP_SYS(quotactl_fd)); + allowSyscall(ctx, SCMP_SYS(read)); + allowSyscall(ctx, SCMP_SYS(readahead)); + allowSyscall(ctx, SCMP_SYS(readdir)); + allowSyscall(ctx, SCMP_SYS(readlink)); + allowSyscall(ctx, SCMP_SYS(readlinkat)); + allowSyscall(ctx, SCMP_SYS(readv)); + allowSyscall(ctx, SCMP_SYS(reboot)); + allowSyscall(ctx, SCMP_SYS(recv)); + allowSyscall(ctx, SCMP_SYS(recvfrom)); + allowSyscall(ctx, SCMP_SYS(recvmmsg)); + allowSyscall(ctx, SCMP_SYS(recvmmsg_time64)); + allowSyscall(ctx, SCMP_SYS(recvmsg)); + allowSyscall(ctx, SCMP_SYS(remap_file_pages)); + allowSyscall(ctx, SCMP_SYS(removexattr)); + allowSyscall(ctx, SCMP_SYS(rename)); + allowSyscall(ctx, SCMP_SYS(renameat)); + allowSyscall(ctx, SCMP_SYS(renameat2)); + allowSyscall(ctx, SCMP_SYS(request_key)); + allowSyscall(ctx, SCMP_SYS(restart_syscall)); + allowSyscall(ctx, SCMP_SYS(riscv_flush_icache)); + allowSyscall(ctx, SCMP_SYS(rmdir)); + allowSyscall(ctx, SCMP_SYS(rseq)); + allowSyscall(ctx, SCMP_SYS(rtas)); + allowSyscall(ctx, SCMP_SYS(rt_sigaction)); + allowSyscall(ctx, SCMP_SYS(rt_sigpending)); + allowSyscall(ctx, SCMP_SYS(rt_sigprocmask)); + allowSyscall(ctx, SCMP_SYS(rt_sigqueueinfo)); + allowSyscall(ctx, SCMP_SYS(rt_sigreturn)); + allowSyscall(ctx, SCMP_SYS(rt_sigsuspend)); + allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait)); + allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait_time64)); + allowSyscall(ctx, SCMP_SYS(rt_tgsigqueueinfo)); + allowSyscall(ctx, SCMP_SYS(s390_guarded_storage)); + allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_read)); + allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_write)); + allowSyscall(ctx, SCMP_SYS(s390_runtime_instr)); + allowSyscall(ctx, SCMP_SYS(s390_sthyi)); + allowSyscall(ctx, SCMP_SYS(sched_getaffinity)); + allowSyscall(ctx, SCMP_SYS(sched_getattr)); + allowSyscall(ctx, SCMP_SYS(sched_getparam)); + allowSyscall(ctx, SCMP_SYS(sched_get_priority_max)); + allowSyscall(ctx, SCMP_SYS(sched_get_priority_min)); + allowSyscall(ctx, SCMP_SYS(sched_getscheduler)); + allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval)); + allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval_time64)); + allowSyscall(ctx, SCMP_SYS(sched_setaffinity)); + allowSyscall(ctx, SCMP_SYS(sched_setattr)); + allowSyscall(ctx, SCMP_SYS(sched_setparam)); + allowSyscall(ctx, SCMP_SYS(sched_setscheduler)); + allowSyscall(ctx, SCMP_SYS(sched_yield)); + allowSyscall(ctx, SCMP_SYS(seccomp)); + allowSyscall(ctx, SCMP_SYS(security)); + allowSyscall(ctx, SCMP_SYS(select)); + allowSyscall(ctx, SCMP_SYS(semctl)); + allowSyscall(ctx, SCMP_SYS(semget)); + allowSyscall(ctx, SCMP_SYS(semop)); + allowSyscall(ctx, SCMP_SYS(semtimedop)); + allowSyscall(ctx, SCMP_SYS(semtimedop_time64)); + allowSyscall(ctx, SCMP_SYS(send)); + allowSyscall(ctx, SCMP_SYS(sendfile)); + allowSyscall(ctx, SCMP_SYS(sendfile64)); + allowSyscall(ctx, SCMP_SYS(sendmmsg)); + allowSyscall(ctx, SCMP_SYS(sendmsg)); + allowSyscall(ctx, SCMP_SYS(sendto)); + allowSyscall(ctx, SCMP_SYS(setdomainname)); + allowSyscall(ctx, SCMP_SYS(setfsgid)); + allowSyscall(ctx, SCMP_SYS(setfsgid32)); + allowSyscall(ctx, SCMP_SYS(setfsuid)); + allowSyscall(ctx, SCMP_SYS(setfsuid32)); + allowSyscall(ctx, SCMP_SYS(setgid)); + allowSyscall(ctx, SCMP_SYS(setgid32)); + allowSyscall(ctx, SCMP_SYS(setgroups)); + allowSyscall(ctx, SCMP_SYS(setgroups32)); + allowSyscall(ctx, SCMP_SYS(sethostname)); + allowSyscall(ctx, SCMP_SYS(setitimer)); + allowSyscall(ctx, SCMP_SYS(set_mempolicy)); + allowSyscall(ctx, SCMP_SYS(set_mempolicy_home_node)); + allowSyscall(ctx, SCMP_SYS(setns)); + allowSyscall(ctx, SCMP_SYS(setpgid)); + allowSyscall(ctx, SCMP_SYS(setpriority)); + allowSyscall(ctx, SCMP_SYS(setregid)); + allowSyscall(ctx, SCMP_SYS(setregid32)); + allowSyscall(ctx, SCMP_SYS(setresgid)); + allowSyscall(ctx, SCMP_SYS(setresgid32)); + allowSyscall(ctx, SCMP_SYS(setresuid)); + allowSyscall(ctx, SCMP_SYS(setresuid32)); + allowSyscall(ctx, SCMP_SYS(setreuid)); + allowSyscall(ctx, SCMP_SYS(setreuid32)); + allowSyscall(ctx, SCMP_SYS(setrlimit)); + allowSyscall(ctx, SCMP_SYS(set_robust_list)); + allowSyscall(ctx, SCMP_SYS(setsid)); + allowSyscall(ctx, SCMP_SYS(setsockopt)); + allowSyscall(ctx, SCMP_SYS(set_thread_area)); + allowSyscall(ctx, SCMP_SYS(set_tid_address)); + allowSyscall(ctx, SCMP_SYS(settimeofday)); + allowSyscall(ctx, SCMP_SYS(set_tls)); + allowSyscall(ctx, SCMP_SYS(setuid)); + allowSyscall(ctx, SCMP_SYS(setuid32)); + // skip setxattr (dangerous) + allowSyscall(ctx, SCMP_SYS(sgetmask)); + allowSyscall(ctx, SCMP_SYS(shmat)); + allowSyscall(ctx, SCMP_SYS(shmctl)); + allowSyscall(ctx, SCMP_SYS(shmdt)); + allowSyscall(ctx, SCMP_SYS(shmget)); + allowSyscall(ctx, SCMP_SYS(shutdown)); + allowSyscall(ctx, SCMP_SYS(sigaction)); + allowSyscall(ctx, SCMP_SYS(sigaltstack)); + allowSyscall(ctx, SCMP_SYS(signal)); + allowSyscall(ctx, SCMP_SYS(signalfd)); + allowSyscall(ctx, SCMP_SYS(signalfd4)); + allowSyscall(ctx, SCMP_SYS(sigpending)); + allowSyscall(ctx, SCMP_SYS(sigprocmask)); + allowSyscall(ctx, SCMP_SYS(sigreturn)); + allowSyscall(ctx, SCMP_SYS(sigsuspend)); + allowSyscall(ctx, SCMP_SYS(socket)); + allowSyscall(ctx, SCMP_SYS(socketcall)); + allowSyscall(ctx, SCMP_SYS(socketpair)); + allowSyscall(ctx, SCMP_SYS(splice)); + allowSyscall(ctx, SCMP_SYS(spu_create)); + allowSyscall(ctx, SCMP_SYS(spu_run)); + allowSyscall(ctx, SCMP_SYS(ssetmask)); + allowSyscall(ctx, SCMP_SYS(stat)); + allowSyscall(ctx, SCMP_SYS(stat64)); + allowSyscall(ctx, SCMP_SYS(statfs)); + allowSyscall(ctx, SCMP_SYS(statfs64)); + allowSyscall(ctx, SCMP_SYS(statx)); + allowSyscall(ctx, SCMP_SYS(stime)); + allowSyscall(ctx, SCMP_SYS(stty)); + allowSyscall(ctx, SCMP_SYS(subpage_prot)); + allowSyscall(ctx, SCMP_SYS(swapcontext)); + allowSyscall(ctx, SCMP_SYS(swapoff)); + allowSyscall(ctx, SCMP_SYS(swapon)); + allowSyscall(ctx, SCMP_SYS(switch_endian)); + allowSyscall(ctx, SCMP_SYS(symlink)); + allowSyscall(ctx, SCMP_SYS(symlinkat)); + allowSyscall(ctx, SCMP_SYS(sync)); + allowSyscall(ctx, SCMP_SYS(sync_file_range)); + allowSyscall(ctx, SCMP_SYS(sync_file_range2)); + allowSyscall(ctx, SCMP_SYS(syncfs)); + allowSyscall(ctx, SCMP_SYS(syscall)); + allowSyscall(ctx, SCMP_SYS(_sysctl)); + allowSyscall(ctx, SCMP_SYS(sys_debug_setcontext)); + allowSyscall(ctx, SCMP_SYS(sysfs)); + allowSyscall(ctx, SCMP_SYS(sysinfo)); + allowSyscall(ctx, SCMP_SYS(syslog)); + allowSyscall(ctx, SCMP_SYS(sysmips)); + allowSyscall(ctx, SCMP_SYS(tee)); + allowSyscall(ctx, SCMP_SYS(tgkill)); + allowSyscall(ctx, SCMP_SYS(time)); + allowSyscall(ctx, SCMP_SYS(timer_create)); + allowSyscall(ctx, SCMP_SYS(timer_delete)); + allowSyscall(ctx, SCMP_SYS(timerfd)); + allowSyscall(ctx, SCMP_SYS(timerfd_create)); + allowSyscall(ctx, SCMP_SYS(timerfd_gettime)); + allowSyscall(ctx, SCMP_SYS(timerfd_gettime64)); + allowSyscall(ctx, SCMP_SYS(timerfd_settime)); + allowSyscall(ctx, SCMP_SYS(timerfd_settime64)); + allowSyscall(ctx, SCMP_SYS(timer_getoverrun)); + allowSyscall(ctx, SCMP_SYS(timer_gettime)); + allowSyscall(ctx, SCMP_SYS(timer_gettime64)); + allowSyscall(ctx, SCMP_SYS(timer_settime)); + allowSyscall(ctx, SCMP_SYS(timer_settime64)); + allowSyscall(ctx, SCMP_SYS(times)); + allowSyscall(ctx, SCMP_SYS(tkill)); + allowSyscall(ctx, SCMP_SYS(truncate)); + allowSyscall(ctx, SCMP_SYS(truncate64)); + allowSyscall(ctx, SCMP_SYS(tuxcall)); + allowSyscall(ctx, SCMP_SYS(ugetrlimit)); + allowSyscall(ctx, SCMP_SYS(ulimit)); + allowSyscall(ctx, SCMP_SYS(umask)); + allowSyscall(ctx, SCMP_SYS(umount)); + allowSyscall(ctx, SCMP_SYS(umount2)); + allowSyscall(ctx, SCMP_SYS(uname)); + allowSyscall(ctx, SCMP_SYS(unlink)); + allowSyscall(ctx, SCMP_SYS(unlinkat)); + allowSyscall(ctx, SCMP_SYS(unshare)); + allowSyscall(ctx, SCMP_SYS(uselib)); + allowSyscall(ctx, SCMP_SYS(userfaultfd)); + allowSyscall(ctx, SCMP_SYS(usr26)); + allowSyscall(ctx, SCMP_SYS(usr32)); + allowSyscall(ctx, SCMP_SYS(ustat)); + allowSyscall(ctx, SCMP_SYS(utime)); + allowSyscall(ctx, SCMP_SYS(utimensat)); + allowSyscall(ctx, SCMP_SYS(utimensat_time64)); + allowSyscall(ctx, SCMP_SYS(utimes)); + allowSyscall(ctx, SCMP_SYS(vfork)); + allowSyscall(ctx, SCMP_SYS(vhangup)); + allowSyscall(ctx, SCMP_SYS(vm86)); + allowSyscall(ctx, SCMP_SYS(vm86old)); + allowSyscall(ctx, SCMP_SYS(vmsplice)); + allowSyscall(ctx, SCMP_SYS(vserver)); + allowSyscall(ctx, SCMP_SYS(wait4)); + allowSyscall(ctx, SCMP_SYS(waitid)); + allowSyscall(ctx, SCMP_SYS(waitpid)); + allowSyscall(ctx, SCMP_SYS(write)); + allowSyscall(ctx, SCMP_SYS(writev)); + // END extract-syscalls - if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmod), 1, - SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0) - throw SysError("unable to add seccomp rule"); + // chmod family: prevent adding setuid/setgid bits to existing files. + // The Nix store does not support setuid/setgid, and even their temporary creation can weaken the security of the sandbox. + ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(chmod), 1); + ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmod), 1); + ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmodat), 2); + ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmodat2), 2); - if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmodat), 1, - SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0) - throw SysError("unable to add seccomp rule"); - - if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), NIX_SYSCALL_FCHMODAT2, 1, - SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0) - throw SysError("unable to add seccomp rule"); - } - - /* Prevent builders from creating EAs or ACLs. Not all filesystems - support these, and they're not allowed in the Nix store because - they're not representable in the NAR serialisation. */ + // setxattr family: prevent creation of extended attributes or ACLs. + // Not all filesystems support them, and they're incompatible with the NAR format. if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(setxattr), 0) != 0 || seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(lsetxattr), 0) != 0 || seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(fsetxattr), 0) != 0) diff --git a/src/libstore/linux/fchmodat2-compat.hh b/src/libstore/linux/fchmodat2-compat.hh deleted file mode 100644 index d5ef81e22..000000000 --- a/src/libstore/linux/fchmodat2-compat.hh +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Determine the syscall number for `fchmodat2`. - * - * On most platforms this is 452. Exceptions can be found on - * a glibc git checkout via `rg --pcre2 'define __NR_fchmodat2 (?!452)'`. - * - * The problem is that glibc 2.39 and libseccomp 2.5.5 are needed to - * get the syscall number. However, a Lix built against nixpkgs 23.11 - * (glibc 2.38) should still have the issue fixed without depending - * on the build environment. - * - * To achieve that, the macros below try to determine the platform and - * set the syscall number which is platform-specific, but - * in most cases 452. - * - * TODO: remove this when 23.11 is EOL and the entire (supported) ecosystem - * is on glibc 2.39. - */ - -#pragma once -///@file - -#if defined(__alpha__) -# define NIX_SYSCALL_FCHMODAT2 562 -#elif defined(__x86_64__) && SIZE_MAX == 0xFFFFFFFF // x32 -# define NIX_SYSCALL_FCHMODAT2 1073742276 -#elif defined(__mips__) && defined(__mips64) && defined(_ABIN64) // mips64/n64 -# define NIX_SYSCALL_FCHMODAT2 5452 -#elif defined(__mips__) && defined(__mips64) && defined(_ABIN32) // mips64/n32 -# define NIX_SYSCALL_FCHMODAT2 6452 -#elif defined(__mips__) && defined(_ABIO32) // mips32 -# define NIX_SYSCALL_FCHMODAT2 4452 -#else -# define NIX_SYSCALL_FCHMODAT2 452 -#endif diff --git a/tests/nixos/setuid/fchmodat2-suid.c b/tests/nixos/setuid/fchmodat2-suid.c index 931489ad7..7280331d5 100644 --- a/tests/nixos/setuid/fchmodat2-suid.c +++ b/tests/nixos/setuid/fchmodat2-suid.c @@ -12,10 +12,7 @@ int main(void) { fprintf(fd, "henlo :3"); fclose(fd); - // FIXME use something nicer here that's less - // platform-dependent as soon as we go to 24.05 - // and the glibc is new enough to support fchmodat2 - long rs = syscall(452, NULL, name, S_ISUID, 0); + long rs = syscall(SYS_fchmodat2, NULL, name, S_ISUID, 0); assert(rs == -1); assert(errno == EPERM); }