libstore/build: use an allowlist approach to syscall filtering

Previously, system call filtering (to prevent builders from storing files with
setuid/setgid permission bits or extended attributes) was performed using a
blocklist. While this looks simple at first, it actually carries significant
security and maintainability risks: after all, the kernel may add new syscalls
to achieve the same functionality one is trying to block, and it can even be
hard to actually add the syscall to the blocklist when building against a C
library that doesn't know about it yet. For a recent demonstration of this
happening in practice to Nix, see the introduction of fchmodat2 [0] [1].

The allowlist approach does not share the same drawback. While it does require
a rather large list of harmless syscalls to be maintained in the codebase,
failing to update this list (and roll out the update to all users) in time has
rather benign effects; at worst, very recent programs that already rely on new
syscalls will fail with an error the same way they would on a slightly older
kernel that doesn't support them yet. Most importantly, no unintended new ways
of performing dangerous operations will be silently allowed.

Another possible drawback is reduced system call performance due to the larger
filter created by the allowlist requiring more computation [2]. However, this
issue has not convincingly been demonstrated yet in practice, for example in
systemd or various browsers.

This commit tries to keep the behavior as close to unchanged as possible. Only
newer syscalls that are not supported by glibc 2.38 (as found in NixOS 23.11)
are blocked. Since this includes fchmodat2, the compatibility code added for
handling this syscall can be removed too.

[0] https://github.com/NixOS/nixpkgs/issues/300635
[1] https://github.com/NixOS/nix/issues/10424
[2] https://github.com/flatpak/flatpak/pull/4462#issuecomment-1061690607

Change-Id: I541be3ea9b249bcceddfed6a5a13ac10b11e16ad
This commit is contained in:
alois31 2024-05-26 10:35:03 +02:00
parent 226f707486
commit 9a170bdf94
Signed by: alois31
GPG key ID: E0F59EA5E5216914
8 changed files with 547 additions and 102 deletions

View file

@ -164,6 +164,7 @@
nixUnstable = prev.nixUnstable; nixUnstable = prev.nixUnstable;
check-headers = final.buildPackages.callPackage ./maintainers/check-headers.nix { }; check-headers = final.buildPackages.callPackage ./maintainers/check-headers.nix { };
check-syscalls = final.buildPackages.callPackage ./maintainers/check-syscalls.nix { };
clangbuildanalyzer = final.buildPackages.callPackage ./misc/clangbuildanalyzer.nix { }; clangbuildanalyzer = final.buildPackages.callPackage ./misc/clangbuildanalyzer.nix { };
default-busybox-sandbox-shell = final.busybox.override { default-busybox-sandbox-shell = final.busybox.override {

View file

@ -0,0 +1,16 @@
{
runCommandNoCC,
lib,
libseccomp,
writeShellScriptBin,
}:
let
syscalls-csv = runCommandNoCC "syscalls.csv" { } ''
echo ${lib.escapeShellArg libseccomp.src}
tar -xf ${lib.escapeShellArg libseccomp.src} --strip-components=2 ${libseccomp.name}/src/syscalls.csv
mv syscalls.csv "$out"
'';
in
writeShellScriptBin "check-syscalls" ''
${./check-syscalls.sh} ${syscalls-csv}
''

7
maintainers/check-syscalls.sh Executable file
View file

@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -e
diff -u <(awk < src/libstore/build/local-derivation-goal.cc '/BEGIN extract-syscalls/ { extracting = 1; next }
match($0, /allowSyscall\(ctx, SCMP_SYS\(([^)]*)\)\);|\/\/ skip ([^ ]*)/, result) { print result[1] result[2] }
/END extract-syscalls/ { extracting = 0; next }') <(tail -n+2 "$1" | cut -d, -f 1)

View file

@ -397,6 +397,7 @@ stdenv.mkDerivation (finalAttrs: {
llvmPackages, llvmPackages,
clangbuildanalyzer, clangbuildanalyzer,
contribNotice, contribNotice,
check-syscalls,
}: }:
let let
glibcFix = lib.optionalAttrs (buildPlatform.isLinux && glibcLocales != null) { glibcFix = lib.optionalAttrs (buildPlatform.isLinux && glibcLocales != null) {
@ -431,6 +432,7 @@ stdenv.mkDerivation (finalAttrs: {
packages = packages =
lib.optional (stdenv.cc.isClang && hostPlatform == buildPlatform) clang-tools_llvm lib.optional (stdenv.cc.isClang && hostPlatform == buildPlatform) clang-tools_llvm
++ [ ++ [
check-syscalls
just just
nixfmt nixfmt
# Load-bearing order. Must come before clang-unwrapped below, but after clang_tools above. # Load-bearing order. Must come before clang-unwrapped below, but after clang_tools above.

View file

@ -44,7 +44,6 @@
#include <sys/prctl.h> #include <sys/prctl.h>
#include <sys/syscall.h> #include <sys/syscall.h>
#if HAVE_SECCOMP #if HAVE_SECCOMP
#include "linux/fchmodat2-compat.hh"
#include <seccomp.h> #include <seccomp.h>
#endif #endif
#define pivot_root(new_root, put_old) (syscall(SYS_pivot_root, new_root, put_old)) #define pivot_root(new_root, put_old) (syscall(SYS_pivot_root, new_root, put_old))
@ -1614,6 +1613,12 @@ void LocalDerivationGoal::chownToBuilder(const Path & path)
throw SysError("cannot change ownership of '%1%'", path); throw SysError("cannot change ownership of '%1%'", path);
} }
#if HAVE_SECCOMP
void allowSyscall(scmp_filter_ctx ctx, int syscall) {
if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 0) != 0)
throw SysError("unable to add seccomp rule");
}
#endif
void setupSeccomp() void setupSeccomp()
{ {
@ -1621,7 +1626,9 @@ void setupSeccomp()
#if HAVE_SECCOMP #if HAVE_SECCOMP
scmp_filter_ctx ctx; scmp_filter_ctx ctx;
if (!(ctx = seccomp_init(SCMP_ACT_ALLOW))) // Pretend that syscalls we don't yet know about don't exist.
// This is the best option for compatibility: after all, they did in fact not exist not too long ago.
if (!(ctx = seccomp_init(SCMP_ACT_ERRNO(ENOSYS))))
throw SysError("unable to initialize seccomp mode 2"); throw SysError("unable to initialize seccomp mode 2");
Finally cleanup([&]() { Finally cleanup([&]() {
@ -1656,28 +1663,520 @@ void setupSeccomp()
seccomp_arch_add(ctx, SCMP_ARCH_MIPSEL64N32) != 0) seccomp_arch_add(ctx, SCMP_ARCH_MIPSEL64N32) != 0)
printError("unable to add mips64el-*abin32 seccomp architecture"); printError("unable to add mips64el-*abin32 seccomp architecture");
/* Prevent builders from creating setuid/setgid binaries. */ // This list is intended for machine consumption.
for (int perm : { S_ISUID, S_ISGID }) { // Please keep its format, order and BEGIN/END markers.
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(chmod), 1, //
SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0) // Currently, it is up to date with libseccomp 2.5.5 and glibc 2.38.
throw SysError("unable to add seccomp rule"); // Run check-syscalls to determine which new syscalls should be added.
// New syscalls must be audited and handled in a way that blocks the following dangerous operations:
// * Creation of non-empty setuid/setgid files
// * Creation of extended attributes (including ACLs)
//
// BEGIN extract-syscalls
allowSyscall(ctx, SCMP_SYS(accept));
allowSyscall(ctx, SCMP_SYS(accept4));
allowSyscall(ctx, SCMP_SYS(access));
allowSyscall(ctx, SCMP_SYS(acct));
allowSyscall(ctx, SCMP_SYS(add_key));
allowSyscall(ctx, SCMP_SYS(adjtimex));
allowSyscall(ctx, SCMP_SYS(afs_syscall));
allowSyscall(ctx, SCMP_SYS(alarm));
allowSyscall(ctx, SCMP_SYS(arch_prctl));
allowSyscall(ctx, SCMP_SYS(arm_fadvise64_64));
allowSyscall(ctx, SCMP_SYS(arm_sync_file_range));
allowSyscall(ctx, SCMP_SYS(bdflush));
allowSyscall(ctx, SCMP_SYS(bind));
allowSyscall(ctx, SCMP_SYS(bpf));
allowSyscall(ctx, SCMP_SYS(break));
allowSyscall(ctx, SCMP_SYS(breakpoint));
allowSyscall(ctx, SCMP_SYS(brk));
allowSyscall(ctx, SCMP_SYS(cachectl));
allowSyscall(ctx, SCMP_SYS(cacheflush));
allowSyscall(ctx, SCMP_SYS(cachestat));
allowSyscall(ctx, SCMP_SYS(capget));
allowSyscall(ctx, SCMP_SYS(capset));
allowSyscall(ctx, SCMP_SYS(chdir));
// skip chmod (dangerous)
allowSyscall(ctx, SCMP_SYS(chown));
allowSyscall(ctx, SCMP_SYS(chown32));
allowSyscall(ctx, SCMP_SYS(chroot));
allowSyscall(ctx, SCMP_SYS(clock_adjtime));
allowSyscall(ctx, SCMP_SYS(clock_adjtime64));
allowSyscall(ctx, SCMP_SYS(clock_getres));
allowSyscall(ctx, SCMP_SYS(clock_getres_time64));
allowSyscall(ctx, SCMP_SYS(clock_gettime));
allowSyscall(ctx, SCMP_SYS(clock_gettime64));
allowSyscall(ctx, SCMP_SYS(clock_nanosleep));
allowSyscall(ctx, SCMP_SYS(clock_nanosleep_time64));
allowSyscall(ctx, SCMP_SYS(clock_settime));
allowSyscall(ctx, SCMP_SYS(clock_settime64));
allowSyscall(ctx, SCMP_SYS(clone));
allowSyscall(ctx, SCMP_SYS(clone3));
allowSyscall(ctx, SCMP_SYS(close));
allowSyscall(ctx, SCMP_SYS(close_range));
allowSyscall(ctx, SCMP_SYS(connect));
allowSyscall(ctx, SCMP_SYS(copy_file_range));
allowSyscall(ctx, SCMP_SYS(creat));
allowSyscall(ctx, SCMP_SYS(create_module));
allowSyscall(ctx, SCMP_SYS(delete_module));
allowSyscall(ctx, SCMP_SYS(dup));
allowSyscall(ctx, SCMP_SYS(dup2));
allowSyscall(ctx, SCMP_SYS(dup3));
allowSyscall(ctx, SCMP_SYS(epoll_create));
allowSyscall(ctx, SCMP_SYS(epoll_create1));
allowSyscall(ctx, SCMP_SYS(epoll_ctl));
allowSyscall(ctx, SCMP_SYS(epoll_ctl_old));
allowSyscall(ctx, SCMP_SYS(epoll_pwait));
allowSyscall(ctx, SCMP_SYS(epoll_pwait2));
allowSyscall(ctx, SCMP_SYS(epoll_wait));
allowSyscall(ctx, SCMP_SYS(epoll_wait_old));
allowSyscall(ctx, SCMP_SYS(eventfd));
allowSyscall(ctx, SCMP_SYS(eventfd2));
allowSyscall(ctx, SCMP_SYS(execve));
allowSyscall(ctx, SCMP_SYS(execveat));
allowSyscall(ctx, SCMP_SYS(exit));
allowSyscall(ctx, SCMP_SYS(exit_group));
allowSyscall(ctx, SCMP_SYS(faccessat));
allowSyscall(ctx, SCMP_SYS(faccessat2));
allowSyscall(ctx, SCMP_SYS(fadvise64));
allowSyscall(ctx, SCMP_SYS(fadvise64_64));
allowSyscall(ctx, SCMP_SYS(fallocate));
allowSyscall(ctx, SCMP_SYS(fanotify_init));
allowSyscall(ctx, SCMP_SYS(fanotify_mark));
allowSyscall(ctx, SCMP_SYS(fchdir));
// skip fchmod (dangerous)
// skip fchmodat (dangerous)
// skip fchmodat2 (requires glibc 2.39, dangerous)
allowSyscall(ctx, SCMP_SYS(fchown));
allowSyscall(ctx, SCMP_SYS(fchown32));
allowSyscall(ctx, SCMP_SYS(fchownat));
allowSyscall(ctx, SCMP_SYS(fcntl));
allowSyscall(ctx, SCMP_SYS(fcntl64));
allowSyscall(ctx, SCMP_SYS(fdatasync));
allowSyscall(ctx, SCMP_SYS(fgetxattr));
allowSyscall(ctx, SCMP_SYS(finit_module));
allowSyscall(ctx, SCMP_SYS(flistxattr));
allowSyscall(ctx, SCMP_SYS(flock));
allowSyscall(ctx, SCMP_SYS(fork));
allowSyscall(ctx, SCMP_SYS(fremovexattr));
allowSyscall(ctx, SCMP_SYS(fsconfig));
// skip fsetxattr (dangerous)
allowSyscall(ctx, SCMP_SYS(fsmount));
allowSyscall(ctx, SCMP_SYS(fsopen));
allowSyscall(ctx, SCMP_SYS(fspick));
allowSyscall(ctx, SCMP_SYS(fstat));
allowSyscall(ctx, SCMP_SYS(fstat64));
allowSyscall(ctx, SCMP_SYS(fstatat64));
allowSyscall(ctx, SCMP_SYS(fstatfs));
allowSyscall(ctx, SCMP_SYS(fstatfs64));
allowSyscall(ctx, SCMP_SYS(fsync));
allowSyscall(ctx, SCMP_SYS(ftime));
allowSyscall(ctx, SCMP_SYS(ftruncate));
allowSyscall(ctx, SCMP_SYS(ftruncate64));
allowSyscall(ctx, SCMP_SYS(futex));
// skip futex_requeue (requires glibc 2.39)
allowSyscall(ctx, SCMP_SYS(futex_time64));
// skip futex_wait (requires glibc 2.39)
allowSyscall(ctx, SCMP_SYS(futex_waitv));
// skip futex_wake (requires glibc 2.39)
allowSyscall(ctx, SCMP_SYS(futimesat));
allowSyscall(ctx, SCMP_SYS(getcpu));
allowSyscall(ctx, SCMP_SYS(getcwd));
allowSyscall(ctx, SCMP_SYS(getdents));
allowSyscall(ctx, SCMP_SYS(getdents64));
allowSyscall(ctx, SCMP_SYS(getegid));
allowSyscall(ctx, SCMP_SYS(getegid32));
allowSyscall(ctx, SCMP_SYS(geteuid));
allowSyscall(ctx, SCMP_SYS(geteuid32));
allowSyscall(ctx, SCMP_SYS(getgid));
allowSyscall(ctx, SCMP_SYS(getgid32));
allowSyscall(ctx, SCMP_SYS(getgroups));
allowSyscall(ctx, SCMP_SYS(getgroups32));
allowSyscall(ctx, SCMP_SYS(getitimer));
allowSyscall(ctx, SCMP_SYS(get_kernel_syms));
allowSyscall(ctx, SCMP_SYS(get_mempolicy));
allowSyscall(ctx, SCMP_SYS(getpeername));
allowSyscall(ctx, SCMP_SYS(getpgid));
allowSyscall(ctx, SCMP_SYS(getpgrp));
allowSyscall(ctx, SCMP_SYS(getpid));
allowSyscall(ctx, SCMP_SYS(getpmsg));
allowSyscall(ctx, SCMP_SYS(getppid));
allowSyscall(ctx, SCMP_SYS(getpriority));
allowSyscall(ctx, SCMP_SYS(getrandom));
allowSyscall(ctx, SCMP_SYS(getresgid));
allowSyscall(ctx, SCMP_SYS(getresgid32));
allowSyscall(ctx, SCMP_SYS(getresuid));
allowSyscall(ctx, SCMP_SYS(getresuid32));
allowSyscall(ctx, SCMP_SYS(getrlimit));
allowSyscall(ctx, SCMP_SYS(get_robust_list));
allowSyscall(ctx, SCMP_SYS(getrusage));
allowSyscall(ctx, SCMP_SYS(getsid));
allowSyscall(ctx, SCMP_SYS(getsockname));
allowSyscall(ctx, SCMP_SYS(getsockopt));
allowSyscall(ctx, SCMP_SYS(get_thread_area));
allowSyscall(ctx, SCMP_SYS(gettid));
allowSyscall(ctx, SCMP_SYS(gettimeofday));
allowSyscall(ctx, SCMP_SYS(get_tls));
allowSyscall(ctx, SCMP_SYS(getuid));
allowSyscall(ctx, SCMP_SYS(getuid32));
allowSyscall(ctx, SCMP_SYS(getxattr));
allowSyscall(ctx, SCMP_SYS(gtty));
allowSyscall(ctx, SCMP_SYS(idle));
allowSyscall(ctx, SCMP_SYS(init_module));
allowSyscall(ctx, SCMP_SYS(inotify_add_watch));
allowSyscall(ctx, SCMP_SYS(inotify_init));
allowSyscall(ctx, SCMP_SYS(inotify_init1));
allowSyscall(ctx, SCMP_SYS(inotify_rm_watch));
allowSyscall(ctx, SCMP_SYS(io_cancel));
allowSyscall(ctx, SCMP_SYS(ioctl));
allowSyscall(ctx, SCMP_SYS(io_destroy));
allowSyscall(ctx, SCMP_SYS(io_getevents));
allowSyscall(ctx, SCMP_SYS(ioperm));
allowSyscall(ctx, SCMP_SYS(io_pgetevents));
allowSyscall(ctx, SCMP_SYS(io_pgetevents_time64));
allowSyscall(ctx, SCMP_SYS(iopl));
allowSyscall(ctx, SCMP_SYS(ioprio_get));
allowSyscall(ctx, SCMP_SYS(ioprio_set));
allowSyscall(ctx, SCMP_SYS(io_setup));
allowSyscall(ctx, SCMP_SYS(io_submit));
allowSyscall(ctx, SCMP_SYS(io_uring_enter));
allowSyscall(ctx, SCMP_SYS(io_uring_register));
allowSyscall(ctx, SCMP_SYS(io_uring_setup));
allowSyscall(ctx, SCMP_SYS(ipc));
allowSyscall(ctx, SCMP_SYS(kcmp));
allowSyscall(ctx, SCMP_SYS(kexec_file_load));
allowSyscall(ctx, SCMP_SYS(kexec_load));
allowSyscall(ctx, SCMP_SYS(keyctl));
allowSyscall(ctx, SCMP_SYS(kill));
allowSyscall(ctx, SCMP_SYS(landlock_add_rule));
allowSyscall(ctx, SCMP_SYS(landlock_create_ruleset));
allowSyscall(ctx, SCMP_SYS(landlock_restrict_self));
allowSyscall(ctx, SCMP_SYS(lchown));
allowSyscall(ctx, SCMP_SYS(lchown32));
allowSyscall(ctx, SCMP_SYS(lgetxattr));
allowSyscall(ctx, SCMP_SYS(link));
allowSyscall(ctx, SCMP_SYS(linkat));
allowSyscall(ctx, SCMP_SYS(listen));
allowSyscall(ctx, SCMP_SYS(listxattr));
allowSyscall(ctx, SCMP_SYS(llistxattr));
allowSyscall(ctx, SCMP_SYS(_llseek));
allowSyscall(ctx, SCMP_SYS(lock));
allowSyscall(ctx, SCMP_SYS(lookup_dcookie));
allowSyscall(ctx, SCMP_SYS(lremovexattr));
allowSyscall(ctx, SCMP_SYS(lseek));
// skip lsetxattr (dangerous)
allowSyscall(ctx, SCMP_SYS(lstat));
allowSyscall(ctx, SCMP_SYS(lstat64));
allowSyscall(ctx, SCMP_SYS(madvise));
// skip map_shadow_stack (requires glibc 2.39)
allowSyscall(ctx, SCMP_SYS(mbind));
allowSyscall(ctx, SCMP_SYS(membarrier));
allowSyscall(ctx, SCMP_SYS(memfd_create));
allowSyscall(ctx, SCMP_SYS(memfd_secret));
allowSyscall(ctx, SCMP_SYS(migrate_pages));
allowSyscall(ctx, SCMP_SYS(mincore));
allowSyscall(ctx, SCMP_SYS(mkdir));
allowSyscall(ctx, SCMP_SYS(mkdirat));
allowSyscall(ctx, SCMP_SYS(mknod));
allowSyscall(ctx, SCMP_SYS(mknodat));
allowSyscall(ctx, SCMP_SYS(mlock));
allowSyscall(ctx, SCMP_SYS(mlock2));
allowSyscall(ctx, SCMP_SYS(mlockall));
allowSyscall(ctx, SCMP_SYS(mmap));
allowSyscall(ctx, SCMP_SYS(mmap2));
allowSyscall(ctx, SCMP_SYS(modify_ldt));
allowSyscall(ctx, SCMP_SYS(mount));
allowSyscall(ctx, SCMP_SYS(mount_setattr));
allowSyscall(ctx, SCMP_SYS(move_mount));
allowSyscall(ctx, SCMP_SYS(move_pages));
allowSyscall(ctx, SCMP_SYS(mprotect));
allowSyscall(ctx, SCMP_SYS(mpx));
allowSyscall(ctx, SCMP_SYS(mq_getsetattr));
allowSyscall(ctx, SCMP_SYS(mq_notify));
allowSyscall(ctx, SCMP_SYS(mq_open));
allowSyscall(ctx, SCMP_SYS(mq_timedreceive));
allowSyscall(ctx, SCMP_SYS(mq_timedreceive_time64));
allowSyscall(ctx, SCMP_SYS(mq_timedsend));
allowSyscall(ctx, SCMP_SYS(mq_timedsend_time64));
allowSyscall(ctx, SCMP_SYS(mq_unlink));
allowSyscall(ctx, SCMP_SYS(mremap));
allowSyscall(ctx, SCMP_SYS(msgctl));
allowSyscall(ctx, SCMP_SYS(msgget));
allowSyscall(ctx, SCMP_SYS(msgrcv));
allowSyscall(ctx, SCMP_SYS(msgsnd));
allowSyscall(ctx, SCMP_SYS(msync));
allowSyscall(ctx, SCMP_SYS(multiplexer));
allowSyscall(ctx, SCMP_SYS(munlock));
allowSyscall(ctx, SCMP_SYS(munlockall));
allowSyscall(ctx, SCMP_SYS(munmap));
allowSyscall(ctx, SCMP_SYS(name_to_handle_at));
allowSyscall(ctx, SCMP_SYS(nanosleep));
allowSyscall(ctx, SCMP_SYS(newfstatat));
allowSyscall(ctx, SCMP_SYS(_newselect));
allowSyscall(ctx, SCMP_SYS(nfsservctl));
allowSyscall(ctx, SCMP_SYS(nice));
allowSyscall(ctx, SCMP_SYS(oldfstat));
allowSyscall(ctx, SCMP_SYS(oldlstat));
allowSyscall(ctx, SCMP_SYS(oldolduname));
allowSyscall(ctx, SCMP_SYS(oldstat));
allowSyscall(ctx, SCMP_SYS(olduname));
allowSyscall(ctx, SCMP_SYS(open));
allowSyscall(ctx, SCMP_SYS(openat));
allowSyscall(ctx, SCMP_SYS(openat2));
allowSyscall(ctx, SCMP_SYS(open_by_handle_at));
allowSyscall(ctx, SCMP_SYS(open_tree));
allowSyscall(ctx, SCMP_SYS(pause));
allowSyscall(ctx, SCMP_SYS(pciconfig_iobase));
allowSyscall(ctx, SCMP_SYS(pciconfig_read));
allowSyscall(ctx, SCMP_SYS(pciconfig_write));
allowSyscall(ctx, SCMP_SYS(perf_event_open));
allowSyscall(ctx, SCMP_SYS(personality));
allowSyscall(ctx, SCMP_SYS(pidfd_getfd));
allowSyscall(ctx, SCMP_SYS(pidfd_open));
allowSyscall(ctx, SCMP_SYS(pidfd_send_signal));
allowSyscall(ctx, SCMP_SYS(pipe));
allowSyscall(ctx, SCMP_SYS(pipe2));
allowSyscall(ctx, SCMP_SYS(pivot_root));
allowSyscall(ctx, SCMP_SYS(pkey_alloc));
allowSyscall(ctx, SCMP_SYS(pkey_free));
allowSyscall(ctx, SCMP_SYS(pkey_mprotect));
allowSyscall(ctx, SCMP_SYS(poll));
allowSyscall(ctx, SCMP_SYS(ppoll));
allowSyscall(ctx, SCMP_SYS(ppoll_time64));
allowSyscall(ctx, SCMP_SYS(prctl));
allowSyscall(ctx, SCMP_SYS(pread64));
allowSyscall(ctx, SCMP_SYS(preadv));
allowSyscall(ctx, SCMP_SYS(preadv2));
allowSyscall(ctx, SCMP_SYS(prlimit64));
allowSyscall(ctx, SCMP_SYS(process_madvise));
allowSyscall(ctx, SCMP_SYS(process_mrelease));
allowSyscall(ctx, SCMP_SYS(process_vm_readv));
allowSyscall(ctx, SCMP_SYS(process_vm_writev));
allowSyscall(ctx, SCMP_SYS(prof));
allowSyscall(ctx, SCMP_SYS(profil));
allowSyscall(ctx, SCMP_SYS(pselect6));
allowSyscall(ctx, SCMP_SYS(pselect6_time64));
allowSyscall(ctx, SCMP_SYS(ptrace));
allowSyscall(ctx, SCMP_SYS(putpmsg));
allowSyscall(ctx, SCMP_SYS(pwrite64));
allowSyscall(ctx, SCMP_SYS(pwritev));
allowSyscall(ctx, SCMP_SYS(pwritev2));
allowSyscall(ctx, SCMP_SYS(query_module));
allowSyscall(ctx, SCMP_SYS(quotactl));
allowSyscall(ctx, SCMP_SYS(quotactl_fd));
allowSyscall(ctx, SCMP_SYS(read));
allowSyscall(ctx, SCMP_SYS(readahead));
allowSyscall(ctx, SCMP_SYS(readdir));
allowSyscall(ctx, SCMP_SYS(readlink));
allowSyscall(ctx, SCMP_SYS(readlinkat));
allowSyscall(ctx, SCMP_SYS(readv));
allowSyscall(ctx, SCMP_SYS(reboot));
allowSyscall(ctx, SCMP_SYS(recv));
allowSyscall(ctx, SCMP_SYS(recvfrom));
allowSyscall(ctx, SCMP_SYS(recvmmsg));
allowSyscall(ctx, SCMP_SYS(recvmmsg_time64));
allowSyscall(ctx, SCMP_SYS(recvmsg));
allowSyscall(ctx, SCMP_SYS(remap_file_pages));
allowSyscall(ctx, SCMP_SYS(removexattr));
allowSyscall(ctx, SCMP_SYS(rename));
allowSyscall(ctx, SCMP_SYS(renameat));
allowSyscall(ctx, SCMP_SYS(renameat2));
allowSyscall(ctx, SCMP_SYS(request_key));
allowSyscall(ctx, SCMP_SYS(restart_syscall));
allowSyscall(ctx, SCMP_SYS(riscv_flush_icache));
allowSyscall(ctx, SCMP_SYS(rmdir));
allowSyscall(ctx, SCMP_SYS(rseq));
allowSyscall(ctx, SCMP_SYS(rtas));
allowSyscall(ctx, SCMP_SYS(rt_sigaction));
allowSyscall(ctx, SCMP_SYS(rt_sigpending));
allowSyscall(ctx, SCMP_SYS(rt_sigprocmask));
allowSyscall(ctx, SCMP_SYS(rt_sigqueueinfo));
allowSyscall(ctx, SCMP_SYS(rt_sigreturn));
allowSyscall(ctx, SCMP_SYS(rt_sigsuspend));
allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait));
allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait_time64));
allowSyscall(ctx, SCMP_SYS(rt_tgsigqueueinfo));
allowSyscall(ctx, SCMP_SYS(s390_guarded_storage));
allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_read));
allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_write));
allowSyscall(ctx, SCMP_SYS(s390_runtime_instr));
allowSyscall(ctx, SCMP_SYS(s390_sthyi));
allowSyscall(ctx, SCMP_SYS(sched_getaffinity));
allowSyscall(ctx, SCMP_SYS(sched_getattr));
allowSyscall(ctx, SCMP_SYS(sched_getparam));
allowSyscall(ctx, SCMP_SYS(sched_get_priority_max));
allowSyscall(ctx, SCMP_SYS(sched_get_priority_min));
allowSyscall(ctx, SCMP_SYS(sched_getscheduler));
allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval));
allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval_time64));
allowSyscall(ctx, SCMP_SYS(sched_setaffinity));
allowSyscall(ctx, SCMP_SYS(sched_setattr));
allowSyscall(ctx, SCMP_SYS(sched_setparam));
allowSyscall(ctx, SCMP_SYS(sched_setscheduler));
allowSyscall(ctx, SCMP_SYS(sched_yield));
allowSyscall(ctx, SCMP_SYS(seccomp));
allowSyscall(ctx, SCMP_SYS(security));
allowSyscall(ctx, SCMP_SYS(select));
allowSyscall(ctx, SCMP_SYS(semctl));
allowSyscall(ctx, SCMP_SYS(semget));
allowSyscall(ctx, SCMP_SYS(semop));
allowSyscall(ctx, SCMP_SYS(semtimedop));
allowSyscall(ctx, SCMP_SYS(semtimedop_time64));
allowSyscall(ctx, SCMP_SYS(send));
allowSyscall(ctx, SCMP_SYS(sendfile));
allowSyscall(ctx, SCMP_SYS(sendfile64));
allowSyscall(ctx, SCMP_SYS(sendmmsg));
allowSyscall(ctx, SCMP_SYS(sendmsg));
allowSyscall(ctx, SCMP_SYS(sendto));
allowSyscall(ctx, SCMP_SYS(setdomainname));
allowSyscall(ctx, SCMP_SYS(setfsgid));
allowSyscall(ctx, SCMP_SYS(setfsgid32));
allowSyscall(ctx, SCMP_SYS(setfsuid));
allowSyscall(ctx, SCMP_SYS(setfsuid32));
allowSyscall(ctx, SCMP_SYS(setgid));
allowSyscall(ctx, SCMP_SYS(setgid32));
allowSyscall(ctx, SCMP_SYS(setgroups));
allowSyscall(ctx, SCMP_SYS(setgroups32));
allowSyscall(ctx, SCMP_SYS(sethostname));
allowSyscall(ctx, SCMP_SYS(setitimer));
allowSyscall(ctx, SCMP_SYS(set_mempolicy));
allowSyscall(ctx, SCMP_SYS(set_mempolicy_home_node));
allowSyscall(ctx, SCMP_SYS(setns));
allowSyscall(ctx, SCMP_SYS(setpgid));
allowSyscall(ctx, SCMP_SYS(setpriority));
allowSyscall(ctx, SCMP_SYS(setregid));
allowSyscall(ctx, SCMP_SYS(setregid32));
allowSyscall(ctx, SCMP_SYS(setresgid));
allowSyscall(ctx, SCMP_SYS(setresgid32));
allowSyscall(ctx, SCMP_SYS(setresuid));
allowSyscall(ctx, SCMP_SYS(setresuid32));
allowSyscall(ctx, SCMP_SYS(setreuid));
allowSyscall(ctx, SCMP_SYS(setreuid32));
allowSyscall(ctx, SCMP_SYS(setrlimit));
allowSyscall(ctx, SCMP_SYS(set_robust_list));
allowSyscall(ctx, SCMP_SYS(setsid));
allowSyscall(ctx, SCMP_SYS(setsockopt));
allowSyscall(ctx, SCMP_SYS(set_thread_area));
allowSyscall(ctx, SCMP_SYS(set_tid_address));
allowSyscall(ctx, SCMP_SYS(settimeofday));
allowSyscall(ctx, SCMP_SYS(set_tls));
allowSyscall(ctx, SCMP_SYS(setuid));
allowSyscall(ctx, SCMP_SYS(setuid32));
// skip setxattr (dangerous)
allowSyscall(ctx, SCMP_SYS(sgetmask));
allowSyscall(ctx, SCMP_SYS(shmat));
allowSyscall(ctx, SCMP_SYS(shmctl));
allowSyscall(ctx, SCMP_SYS(shmdt));
allowSyscall(ctx, SCMP_SYS(shmget));
allowSyscall(ctx, SCMP_SYS(shutdown));
allowSyscall(ctx, SCMP_SYS(sigaction));
allowSyscall(ctx, SCMP_SYS(sigaltstack));
allowSyscall(ctx, SCMP_SYS(signal));
allowSyscall(ctx, SCMP_SYS(signalfd));
allowSyscall(ctx, SCMP_SYS(signalfd4));
allowSyscall(ctx, SCMP_SYS(sigpending));
allowSyscall(ctx, SCMP_SYS(sigprocmask));
allowSyscall(ctx, SCMP_SYS(sigreturn));
allowSyscall(ctx, SCMP_SYS(sigsuspend));
allowSyscall(ctx, SCMP_SYS(socket));
allowSyscall(ctx, SCMP_SYS(socketcall));
allowSyscall(ctx, SCMP_SYS(socketpair));
allowSyscall(ctx, SCMP_SYS(splice));
allowSyscall(ctx, SCMP_SYS(spu_create));
allowSyscall(ctx, SCMP_SYS(spu_run));
allowSyscall(ctx, SCMP_SYS(ssetmask));
allowSyscall(ctx, SCMP_SYS(stat));
allowSyscall(ctx, SCMP_SYS(stat64));
allowSyscall(ctx, SCMP_SYS(statfs));
allowSyscall(ctx, SCMP_SYS(statfs64));
allowSyscall(ctx, SCMP_SYS(statx));
allowSyscall(ctx, SCMP_SYS(stime));
allowSyscall(ctx, SCMP_SYS(stty));
allowSyscall(ctx, SCMP_SYS(subpage_prot));
allowSyscall(ctx, SCMP_SYS(swapcontext));
allowSyscall(ctx, SCMP_SYS(swapoff));
allowSyscall(ctx, SCMP_SYS(swapon));
allowSyscall(ctx, SCMP_SYS(switch_endian));
allowSyscall(ctx, SCMP_SYS(symlink));
allowSyscall(ctx, SCMP_SYS(symlinkat));
allowSyscall(ctx, SCMP_SYS(sync));
allowSyscall(ctx, SCMP_SYS(sync_file_range));
allowSyscall(ctx, SCMP_SYS(sync_file_range2));
allowSyscall(ctx, SCMP_SYS(syncfs));
allowSyscall(ctx, SCMP_SYS(syscall));
allowSyscall(ctx, SCMP_SYS(_sysctl));
allowSyscall(ctx, SCMP_SYS(sys_debug_setcontext));
allowSyscall(ctx, SCMP_SYS(sysfs));
allowSyscall(ctx, SCMP_SYS(sysinfo));
allowSyscall(ctx, SCMP_SYS(syslog));
allowSyscall(ctx, SCMP_SYS(sysmips));
allowSyscall(ctx, SCMP_SYS(tee));
allowSyscall(ctx, SCMP_SYS(tgkill));
allowSyscall(ctx, SCMP_SYS(time));
allowSyscall(ctx, SCMP_SYS(timer_create));
allowSyscall(ctx, SCMP_SYS(timer_delete));
allowSyscall(ctx, SCMP_SYS(timerfd));
allowSyscall(ctx, SCMP_SYS(timerfd_create));
allowSyscall(ctx, SCMP_SYS(timerfd_gettime));
allowSyscall(ctx, SCMP_SYS(timerfd_gettime64));
allowSyscall(ctx, SCMP_SYS(timerfd_settime));
allowSyscall(ctx, SCMP_SYS(timerfd_settime64));
allowSyscall(ctx, SCMP_SYS(timer_getoverrun));
allowSyscall(ctx, SCMP_SYS(timer_gettime));
allowSyscall(ctx, SCMP_SYS(timer_gettime64));
allowSyscall(ctx, SCMP_SYS(timer_settime));
allowSyscall(ctx, SCMP_SYS(timer_settime64));
allowSyscall(ctx, SCMP_SYS(times));
allowSyscall(ctx, SCMP_SYS(tkill));
allowSyscall(ctx, SCMP_SYS(truncate));
allowSyscall(ctx, SCMP_SYS(truncate64));
allowSyscall(ctx, SCMP_SYS(tuxcall));
allowSyscall(ctx, SCMP_SYS(ugetrlimit));
allowSyscall(ctx, SCMP_SYS(ulimit));
allowSyscall(ctx, SCMP_SYS(umask));
allowSyscall(ctx, SCMP_SYS(umount));
allowSyscall(ctx, SCMP_SYS(umount2));
allowSyscall(ctx, SCMP_SYS(uname));
allowSyscall(ctx, SCMP_SYS(unlink));
allowSyscall(ctx, SCMP_SYS(unlinkat));
allowSyscall(ctx, SCMP_SYS(unshare));
allowSyscall(ctx, SCMP_SYS(uselib));
allowSyscall(ctx, SCMP_SYS(userfaultfd));
allowSyscall(ctx, SCMP_SYS(usr26));
allowSyscall(ctx, SCMP_SYS(usr32));
allowSyscall(ctx, SCMP_SYS(ustat));
allowSyscall(ctx, SCMP_SYS(utime));
allowSyscall(ctx, SCMP_SYS(utimensat));
allowSyscall(ctx, SCMP_SYS(utimensat_time64));
allowSyscall(ctx, SCMP_SYS(utimes));
allowSyscall(ctx, SCMP_SYS(vfork));
allowSyscall(ctx, SCMP_SYS(vhangup));
allowSyscall(ctx, SCMP_SYS(vm86));
allowSyscall(ctx, SCMP_SYS(vm86old));
allowSyscall(ctx, SCMP_SYS(vmsplice));
allowSyscall(ctx, SCMP_SYS(vserver));
allowSyscall(ctx, SCMP_SYS(wait4));
allowSyscall(ctx, SCMP_SYS(waitid));
allowSyscall(ctx, SCMP_SYS(waitpid));
allowSyscall(ctx, SCMP_SYS(write));
allowSyscall(ctx, SCMP_SYS(writev));
// END extract-syscalls
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmod), 1, // chmod family: prevent adding setuid/setgid bits to existing files.
SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0) // The Nix store does not support setuid/setgid, and even their temporary creation can weaken the security of the sandbox.
throw SysError("unable to add seccomp rule"); if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(chmod), 1, SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISUID | S_ISGID, 0)) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(chmod), 1, SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(chmod), 1, SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(fchmod), 1, SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISUID | S_ISGID, 0)) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmod), 1, SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmod), 1, SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(fchmodat), 1, SCMP_A2(SCMP_CMP_MASKED_EQ, S_ISUID | S_ISGID, 0)) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmodat), 1, SCMP_A2(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmodat), 1, SCMP_A2(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) != 0)
throw SysError("unable to add seccomp rule");
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmodat), 1, // setxattr family: prevent creation of extended attributes or ACLs.
SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0) // Not all filesystems support them, and they're incompatible with the NAR format.
throw SysError("unable to add seccomp rule");
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), NIX_SYSCALL_FCHMODAT2, 1,
SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0)
throw SysError("unable to add seccomp rule");
}
/* Prevent builders from creating EAs or ACLs. Not all filesystems
support these, and they're not allowed in the Nix store because
they're not representable in the NAR serialisation. */
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(setxattr), 0) != 0 || if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(setxattr), 0) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(lsetxattr), 0) != 0 || seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(lsetxattr), 0) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(fsetxattr), 0) != 0) seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(fsetxattr), 0) != 0)

View file

@ -1,35 +0,0 @@
/*
* Determine the syscall number for `fchmodat2`.
*
* On most platforms this is 452. Exceptions can be found on
* a glibc git checkout via `rg --pcre2 'define __NR_fchmodat2 (?!452)'`.
*
* The problem is that glibc 2.39 and libseccomp 2.5.5 are needed to
* get the syscall number. However, a Nix built against nixpkgs 23.11
* (glibc 2.38) should still have the issue fixed without depending
* on the build environment.
*
* To achieve that, the macros below try to determine the platform and
* set the syscall number which is platform-specific, but
* in most cases 452.
*
* TODO: remove this when 23.11 is EOL and the entire (supported) ecosystem
* is on glibc 2.39.
*/
#pragma once
///@file
#if defined(__alpha__)
# define NIX_SYSCALL_FCHMODAT2 562
#elif defined(__x86_64__) && SIZE_MAX == 0xFFFFFFFF // x32
# define NIX_SYSCALL_FCHMODAT2 1073742276
#elif defined(__mips__) && defined(__mips64) && defined(_ABIN64) // mips64/n64
# define NIX_SYSCALL_FCHMODAT2 5452
#elif defined(__mips__) && defined(__mips64) && defined(_ABIN32) // mips64/n32
# define NIX_SYSCALL_FCHMODAT2 6452
#elif defined(__mips__) && defined(_ABIO32) // mips32
# define NIX_SYSCALL_FCHMODAT2 4452
#else
# define NIX_SYSCALL_FCHMODAT2 452
#endif

View file

@ -1,21 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <errno.h>
#include <unistd.h>
#include <assert.h>
int main(void) {
char *name = getenv("out");
FILE *fd = fopen(name, "w");
fprintf(fd, "henlo :3");
fclose(fd);
// FIXME use something nicer here that's less
// platform-dependent as soon as we go to 24.05
// and the glibc is new enough to support fchmodat2
long rs = syscall(452, NULL, name, S_ISUID, 0);
assert(rs == -1);
assert(errno == EPERM);
}

View file

@ -4,17 +4,6 @@
let let
pkgs = config.nodes.machine.nixpkgs.pkgs; pkgs = config.nodes.machine.nixpkgs.pkgs;
fchmodat2-builder = pkgs.runCommandCC "fchmodat2-suid" {
passAsFile = [ "code" ];
code = builtins.readFile ./fchmodat2-suid.c;
# Doesn't work with -O0, shuts up the warning about that.
hardeningDisable = [ "fortify" ];
} ''
mkdir -p $out/bin/
$CC -x c "$codePath" -O0 -g -o $out/bin/fchmodat2-suid
'';
in in
{ {
name = "setuid"; name = "setuid";
@ -27,26 +16,13 @@ in
virtualisation.additionalPaths = [ virtualisation.additionalPaths = [
pkgs.stdenvNoCC pkgs.stdenvNoCC
pkgs.pkgsi686Linux.stdenvNoCC pkgs.pkgsi686Linux.stdenvNoCC
fchmodat2-builder
]; ];
# need at least 6.6 to test for fchmodat2
boot.kernelPackages = pkgs.linuxKernel.packages.linux_6_6;
}; };
testScript = { nodes }: '' testScript = { nodes }: ''
# fmt: off # fmt: off
start_all() start_all()
with subtest("fchmodat2 suid regression test"):
machine.succeed("""
nix-build -E '(with import <nixpkgs> {}; runCommand "fchmodat2-suid" {
BUILDER = builtins.storePath ${fchmodat2-builder};
} "
exec \\"$BUILDER\\"/bin/fchmodat2-suid
")'
""")
# Copying to /tmp should succeed. # Copying to /tmp should succeed.
machine.succeed(r""" machine.succeed(r"""
nix-build --no-sandbox -E '(with import <nixpkgs> {}; runCommand "foo" {} " nix-build --no-sandbox -E '(with import <nixpkgs> {}; runCommand "foo" {} "