From 35eec921af1043fc6322edc0ad88c872d41623b8 Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Sat, 4 May 2024 12:55:10 +0200 Subject: [PATCH 01/13] libfetchers: make attribute / URL query handling consistent The original idea was to fix lix#174, but for a user friendly solution, I figured that we'd need more consistency: * Invalid query params will cause an error, just like invalid attributes. This has the following two consequences: * The `?dir=`-param from flakes will be removed before the URL to be fetched is passed to libfetchers. * The tarball fetcher doesn't allow URLs with custom query params anymore. I think this was questionable anyways given that an arbitrary set of query params was silently removed from the URL you wanted to fetch. The correct way is to use an attribute-set with a key `url` that contains the tarball URL to fetch. * Same for the git & mercurial fetchers: in that case it doesn't even matter though: both fetchers added unused query params to the URL that's passed from the input scheme to the fetcher (`url2` in the code). It turns out that this was never used since the query parameters were erased again in `getActualUrl`. * Validation happens for both attributes and URLs. Previously, a lot of fetchers validated e.g. refs/revs only when specified in a URL and the validity of attribute names only in `inputFromAttrs`. Now, all the validation is done in `inputFromAttrs` and `inputFromURL` constructs attributes that will be passed to `inputFromAttrs`. * Accept all attributes as URL query parameters. That also includes lesser used ones such as `narHash`. And "output" attributes like `lastModified`: these could be declared already when declaring inputs as attribute rather than URL. Now the behavior is at least consistent. Personally, I think we should differentiate in the future between "fetched input" (basically the attr-set that ends up in the lock-file) and "unfetched input" earlier: both inputFrom{Attrs,URL} entrypoints are probably OK for unfetched inputs, but for locked/fetched inputs a custom entrypoint should be used. Then, the current entrypoints wouldn't have to allow these attributes anymore. Change-Id: I1be1992249f7af8287cfc37891ab505ddaa2e8cd --- src/libexpr/flake/flakeref.cc | 8 ++- src/libfetchers/fetchers.hh | 31 +++++++++ src/libfetchers/git.cc | 17 ++--- src/libfetchers/github.cc | 114 ++++++++++++++++++++------------- src/libfetchers/indirect.cc | 34 ++++++---- src/libfetchers/mercurial.cc | 7 +- src/libfetchers/tarball.cc | 26 ++------ tests/functional/fetchers.sh | 91 ++++++++++++++++++++++++++ tests/functional/meson.build | 1 + tests/nixos/tarball-flakes.nix | 2 +- 10 files changed, 235 insertions(+), 96 deletions(-) create mode 100644 tests/functional/fetchers.sh diff --git a/src/libexpr/flake/flakeref.cc b/src/libexpr/flake/flakeref.cc index 1c90bfc43..8668961fe 100644 --- a/src/libexpr/flake/flakeref.cc +++ b/src/libexpr/flake/flakeref.cc @@ -204,7 +204,13 @@ std::pair parseFlakeRefWithFragment( std::string fragment; std::swap(fragment, parsedURL.fragment); - auto input = Input::fromURL(parsedURL, isFlake); + // This has a special meaning for flakes and must not be passed to libfetchers. + // Of course this means that libfetchers cannot have fetchers + // expecting an argument `dir` 🫠 + ParsedURL urlForFetchers(parsedURL); + urlForFetchers.query.erase("dir"); + + auto input = Input::fromURL(urlForFetchers, isFlake); input.parent = baseDir; return std::make_pair( diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index 2bb4248be..40f2b6294 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -159,6 +159,37 @@ struct InputScheme std::optional commitMsg) const; virtual std::pair fetch(ref store, const Input & input) = 0; + +protected: + void emplaceURLQueryIntoAttrs( + const ParsedURL & parsedURL, + Attrs & attrs, + const StringSet & numericParams, + const StringSet & booleanParams) const + { + for (auto &[name, value] : parsedURL.query) { + if (name == "url") { + throw BadURL( + "URL '%s' must not override url via query param!", + parsedURL.to_string() + ); + } else if (numericParams.count(name) != 0) { + if (auto n = string2Int(value)) { + attrs.insert_or_assign(name, *n); + } else { + throw BadURL( + "URL '%s' has non-numeric parameter '%s'", + parsedURL.to_string(), + name + ); + } + } else if (booleanParams.count(name) != 0) { + attrs.emplace(name, Explicit { value == "1" }); + } else { + attrs.emplace(name, value); + } + } + } }; void registerInputScheme(std::shared_ptr && fetcher); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 2817fde23..0cb826075 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -273,18 +273,15 @@ struct GitInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "git"); - - for (auto & [name, value] : url.query) { - if (name == "rev" || name == "ref") - attrs.emplace(name, value); - else if (name == "shallow" || name == "submodules" || name == "allRefs") - attrs.emplace(name, Explicit { value == "1" }); - else - url2.query.emplace(name, value); - } - attrs.emplace("url", url2.to_string()); + emplaceURLQueryIntoAttrs( + url, + attrs, + {"lastModified", "revCount"}, + {"shallow", "submodules", "allRefs"} + ); + return inputFromAttrs(attrs); } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 60fefd1f3..b971781ae 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -1,3 +1,4 @@ +#include "attrs.hh" #include "filetransfer.hh" #include "cache.hh" #include "globals.hh" @@ -36,18 +37,11 @@ struct GitArchiveInputScheme : InputScheme auto path = tokenizeString>(url.path, "/"); - std::optional rev; - std::optional ref; - std::optional host_url; + std::optional refOrRev; auto size = path.size(); if (size == 3) { - if (std::regex_match(path[2], revRegex)) - rev = Hash::parseAny(path[2], htSHA1); - else if (std::regex_match(path[2], refRegex)) - ref = path[2]; - else - throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]); + refOrRev = path[2]; } else if (size > 3) { std::string rs; for (auto i = std::next(path.begin(), 2); i != path.end(); i++) { @@ -58,61 +52,91 @@ struct GitArchiveInputScheme : InputScheme } if (std::regex_match(rs, refRegex)) { - ref = rs; + refOrRev = rs; } else { throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs); } } else if (size < 2) throw BadURL("URL '%s' is invalid", url.url); + Attrs attrs; + attrs.emplace("type", type()); + attrs.emplace("owner", path[0]); + attrs.emplace("repo", path[1]); + for (auto &[name, value] : url.query) { - if (name == "rev") { - if (rev) - throw BadURL("URL '%s' contains multiple commit hashes", url.url); - rev = Hash::parseAny(value, htSHA1); + if (name == "rev" || name == "ref") { + if (refOrRev) { + throw BadURL("URL '%s' already contains a ref or rev", url.url); + } else { + refOrRev = value; + } + } else if (name == "lastModified") { + if (auto n = string2Int(value)) { + attrs.emplace(name, *n); + } else { + throw Error( + "Attribute 'lastModified' in URL '%s' must be an integer", + url.to_string() + ); + } + } else { + attrs.emplace(name, value); } - else if (name == "ref") { - if (!std::regex_match(value, refRegex)) - throw BadURL("URL '%s' contains an invalid branch/tag name", url.url); - if (ref) - throw BadURL("URL '%s' contains multiple branch/tag names", url.url); - ref = value; - } - else if (name == "host") { - if (!std::regex_match(value, hostRegex)) - throw BadURL("URL '%s' contains an invalid instance host", url.url); - host_url = value; - } - // FIXME: barf on unsupported attributes } - if (ref && rev) - throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev()); + if (refOrRev) attrs.emplace("refOrRev", *refOrRev); - Input input; - input.attrs.insert_or_assign("type", type()); - input.attrs.insert_or_assign("owner", path[0]); - input.attrs.insert_or_assign("repo", path[1]); - if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); - if (ref) input.attrs.insert_or_assign("ref", *ref); - if (host_url) input.attrs.insert_or_assign("host", *host_url); - - return input; + return inputFromAttrs(attrs); } std::optional inputFromAttrs(const Attrs & attrs) const override { - if (maybeGetStrAttr(attrs, "type") != type()) return {}; + // Attributes can contain refOrRev and it needs to be figured out + // which one it is (see inputFromURL for when that may happen). + // The correct one (ref or rev) will be written into finalAttrs and + // it needs to be mutable for that. + Attrs finalAttrs(attrs); + auto type_ = maybeGetStrAttr(finalAttrs, "type"); + if (type_ != type()) return {}; - for (auto & [name, value] : attrs) - if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") + auto owner = getStrAttr(finalAttrs, "owner"); + auto repo = getStrAttr(finalAttrs, "repo"); + + auto url = fmt("%s:%s/%s", *type_, owner, repo); + if (auto host = maybeGetStrAttr(finalAttrs, "host")) { + if (!std::regex_match(*host, hostRegex)) { + throw BadURL("URL '%s' contains an invalid instance host", url); + } + } + + if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) { + finalAttrs.erase("refOrRev"); + if (std::regex_match(*refOrRev, revRegex)) { + finalAttrs.emplace("rev", *refOrRev); + } else if (std::regex_match(*refOrRev, refRegex)) { + finalAttrs.emplace("ref", *refOrRev); + } else { + throw Error( + "in URL '%s', '%s' is not a commit hash or a branch/tag name", + url, + *refOrRev + ); + } + } else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) { + throw BadURL("URL '%s' contains an invalid branch/tag name", url); + } + } + + for (auto & [name, value] : finalAttrs) { + if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") { throw Error("unsupported input attribute '%s'", name); - - getStrAttr(attrs, "owner"); - getStrAttr(attrs, "repo"); + } + } Input input; - input.attrs = attrs; + input.attrs = finalAttrs; return input; } diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index c73505b31..8c0176e84 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -17,6 +17,8 @@ struct IndirectInputScheme : InputScheme std::optional rev; std::optional ref; + Attrs attrs; + if (path.size() == 1) { } else if (path.size() == 2) { if (std::regex_match(path[1], revRegex)) @@ -26,29 +28,21 @@ struct IndirectInputScheme : InputScheme else throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]); } else if (path.size() == 3) { - if (!std::regex_match(path[1], refRegex)) - throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]); ref = path[1]; - if (!std::regex_match(path[2], revRegex)) - throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]); rev = Hash::parseAny(path[2], htSHA1); } else throw BadURL("GitHub URL '%s' is invalid", url.url); std::string id = path[0]; - if (!std::regex_match(id, flakeRegex)) - throw BadURL("'%s' is not a valid flake ID", id); - // FIXME: forbid query params? + attrs.emplace("type", "indirect"); + attrs.emplace("id", id); + if (rev) attrs.emplace("rev", rev->gitRev()); + if (ref) attrs.emplace("ref", *ref); - Input input; - input.direct = false; - input.attrs.insert_or_assign("type", "indirect"); - input.attrs.insert_or_assign("id", id); - if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); - if (ref) input.attrs.insert_or_assign("ref", *ref); + emplaceURLQueryIntoAttrs(url, attrs, {}, {}); - return input; + return inputFromAttrs(attrs); } std::optional inputFromAttrs(const Attrs & attrs) const override @@ -63,6 +57,18 @@ struct IndirectInputScheme : InputScheme if (!std::regex_match(id, flakeRegex)) throw BadURL("'%s' is not a valid flake ID", id); + // TODO come up with a nicer error message for those two. + if (auto rev = maybeGetStrAttr(attrs, "rev")) { + if (!std::regex_match(*rev, revRegex)) { + throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev); + } + } + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) { + throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref); + } + } + Input input; input.direct = false; input.attrs = attrs; diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 4fffa71d3..6015f8ec7 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -57,12 +57,7 @@ struct MercurialInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "hg"); - for (auto &[name, value] : url.query) { - if (name == "rev" || name == "ref") - attrs.emplace(name, value); - else - url2.query.emplace(name, value); - } + emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); attrs.emplace("url", url2.to_string()); diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index c903895e2..8dfdecda8 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -201,29 +201,17 @@ struct CurlInputScheme : InputScheme if (!isValidURL(_url, requireTree)) return std::nullopt; - Input input; - auto url = _url; + Attrs attrs; + attrs.emplace("type", inputType()); + url.scheme = parseUrlScheme(url.scheme).transport; - auto narHash = url.query.find("narHash"); - if (narHash != url.query.end()) - input.attrs.insert_or_assign("narHash", narHash->second); + emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); - if (auto i = get(url.query, "rev")) - input.attrs.insert_or_assign("rev", *i); - - if (auto i = get(url.query, "revCount")) - if (auto n = string2Int(*i)) - input.attrs.insert_or_assign("revCount", *n); - - url.query.erase("rev"); - url.query.erase("revCount"); - - input.attrs.insert_or_assign("type", inputType()); - input.attrs.insert_or_assign("url", url.to_string()); - return input; + attrs.emplace("url", url.to_string()); + return inputFromAttrs(attrs); } std::optional inputFromAttrs(const Attrs & attrs) const override @@ -235,7 +223,7 @@ struct CurlInputScheme : InputScheme std::set allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"}; for (auto & [name, value] : attrs) if (!allowedNames.count(name)) - throw Error("unsupported %s input attribute '%s'", *type, name); + throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name); Input input; input.attrs = attrs; diff --git a/tests/functional/fetchers.sh b/tests/functional/fetchers.sh new file mode 100644 index 000000000..0f888dc33 --- /dev/null +++ b/tests/functional/fetchers.sh @@ -0,0 +1,91 @@ +source common.sh + +requireGit + +clearStore + +testFetchTreeError() { + rawFetchTreeArg="${1?fetchTree arg missing}" + messageSubstring="${2?messageSubstring missing}" + + output="$(nix eval --impure --raw --expr "(builtins.fetchTree $rawFetchTreeArg).outPath" 2>&1)" && status=0 || status=$? + grepQuiet "$messageSubstring" <<<"$output" + test "$status" -ne 0 +} + +# github/gitlab/sourcehut fetcher input validation +for provider in github gitlab sourcehut; do + # ref/rev validation + testFetchTreeError \ + "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; ref = \",\"; }" \ + "URL '$provider:foo/bar' contains an invalid branch/tag name" + + testFetchTreeError \ + "\"$provider://host/foo/bar/,\"" \ + "URL '$provider:foo/bar', ',' is not a commit hash or a branch/tag name" + + testFetchTreeError \ + "\"$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc\"" \ + "URL '$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc' already contains a ref or rev" + + testFetchTreeError \ + "\"$provider://host/foo/bar/ref?ref=ref2\"" \ + "URL '$provider://host/foo/bar/ref?ref=ref2' already contains a ref or rev" + + # host validation + testFetchTreeError \ + "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; host = \"git_hub.com\"; }" \ + "URL '$provider:foo/bar' contains an invalid instance host" + + testFetchTreeError \ + "\"$provider://host/foo/bar/ref?host=git_hub.com\"" \ + "URL '$provider:foo/bar' contains an invalid instance host" + + # invalid attributes + testFetchTreeError \ + "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; wrong = true; }" \ + "unsupported input attribute 'wrong'" + + testFetchTreeError \ + "\"$provider://host/foo/bar/ref?wrong=1\"" \ + "unsupported input attribute 'wrong'" +done + +# unsupported attributes w/ tarball fetcher +testFetchTreeError \ + "\"https://host/foo?wrong=1\"" \ + "unsupported tarball input attribute 'wrong'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }" + +# test for unsupported attributes / validation in git fetcher +testFetchTreeError \ + "\"git+https://github.com/owner/repo?invalid=1\"" \ + "unsupported Git input attribute 'invalid'" + +testFetchTreeError \ + "\"git+https://github.com/owner/repo?url=foo\"" \ + "URL 'git+https://github.com/owner/repo?url=foo' must not override url via query param!" + +testFetchTreeError \ + "\"git+https://github.com/owner/repo?ref=foo.lock\"" \ + "invalid Git branch/tag name 'foo.lock'" + +testFetchTreeError \ + "{ type = \"git\"; url =\"https://github.com/owner/repo\"; ref = \"foo.lock\"; }" \ + "invalid Git branch/tag name 'foo.lock'" + +# same for mercurial +testFetchTreeError \ + "\"hg+https://forge.tld/owner/repo?invalid=1\"" \ + "unsupported Mercurial input attribute 'invalid'" + +testFetchTreeError \ + "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; invalid = 1; }" \ + "unsupported Mercurial input attribute 'invalid'" + +testFetchTreeError \ + "\"hg+https://forge.tld/owner/repo?ref=,\"" \ + "invalid Mercurial branch/tag name ','" + +testFetchTreeError \ + "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; ref = \",\"; }" \ + "invalid Mercurial branch/tag name ','" diff --git a/tests/functional/meson.build b/tests/functional/meson.build index a13dee001..d99f9bbd3 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -93,6 +93,7 @@ functional_tests_scripts = [ 'fetchGitRefs.sh', 'gc-runtime.sh', 'tarball.sh', + 'fetchers.sh', 'fetchGit.sh', 'fetchurl.sh', 'fetchPath.sh', diff --git a/tests/nixos/tarball-flakes.nix b/tests/nixos/tarball-flakes.nix index ca7627bd1..5deba4a12 100644 --- a/tests/nixos/tarball-flakes.nix +++ b/tests/nixos/tarball-flakes.nix @@ -69,7 +69,7 @@ in # Check that we got redirected to the immutable URL. locked_url = info["locked"]["url"] - assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" + assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" # Check that we got the rev and revCount attributes. revision = info["revision"] From fc6a1451af673cd332cc3210d018c98be5265e2f Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Sat, 22 Jun 2024 17:38:25 +0200 Subject: [PATCH 02/13] libstore: reduce loglevel of `waiting for a machine to build` This comes quite often when the available job slots on all remote builders are exhausted and this is pretty spammy. This isn't really an issue, but expected behavior. A better way to display this is a nom-like approach where all scheduled builds are shown in a tree and pending builds are being marked as such IMHO. Change-Id: I6bc14e6054f84e3eb0768127b490e263d8cdcf89 --- src/libstore/build/derivation-goal.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore/build/derivation-goal.cc b/src/libstore/build/derivation-goal.cc index 46399b0a8..c0cd523e6 100644 --- a/src/libstore/build/derivation-goal.cc +++ b/src/libstore/build/derivation-goal.cc @@ -760,7 +760,7 @@ void DerivationGoal::tryToBuild() /* Not now; wait until at least one child finishes or the wake-up timeout expires. */ if (!actLock) - actLock = std::make_unique(*logger, lvlWarn, actBuildWaiting, + actLock = std::make_unique(*logger, lvlTalkative, actBuildWaiting, fmt("waiting for a machine to build '%s'", Magenta(worker.store.printStorePath(drvPath)))); worker.waitForAWhile(shared_from_this()); outputLocks.unlock(); From 12f5d27363316df8e04af1e2e376c39588e12057 Mon Sep 17 00:00:00 2001 From: Artemis Tosini Date: Sun, 12 May 2024 21:09:26 +0000 Subject: [PATCH 03/13] libstore: Start creating LocalDerivationGoal subclasses LocalDerivationGoal includes a large number of low-level sandboxing primitives for Darwin and Linux, intermingled with ifdefs. Start creating platform-specific classes to make it easier to add new platforms and review platform-specific code. This change only creates support infrastructure and moves two function, more functions will be moved in future changes. Change-Id: I9fc29fa2a7345107d4fc96c46fa90b4eabf6bb89 --- src/libstore/build/local-derivation-goal.cc | 49 ++++----------------- src/libstore/build/local-derivation-goal.hh | 35 ++++++++++++++- src/libstore/build/worker.cc | 8 ++-- src/libstore/platform.cc | 40 +++++++++++++++++ src/libstore/platform/darwin.cc | 26 +++++++++++ src/libstore/platform/darwin.hh | 16 +++++++ src/libstore/platform/fallback.hh | 11 +++++ src/libstore/platform/linux.cc | 14 ++++++ src/libstore/platform/linux.hh | 13 ++++++ 9 files changed, 165 insertions(+), 47 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index bae821266..4e616c838 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -51,9 +51,6 @@ #endif #if __APPLE__ -#include -#include - /* This definition is undocumented but depended upon by all major browsers. */ extern "C" int sandbox_init_with_parameters(const char *profile, uint64_t flags, const char *const parameters[], char **errorbuf); #endif @@ -161,19 +158,7 @@ void LocalDerivationGoal::killChild() void LocalDerivationGoal::killSandbox(bool getStats) { - if (cgroup) { - #if __linux__ - auto stats = destroyCgroup(*cgroup); - if (getStats) { - buildResult.cpuUser = stats.cpuUser; - buildResult.cpuSystem = stats.cpuSystem; - } - #else - abort(); - #endif - } - - else if (buildUser) { + if (buildUser) { auto uid = buildUser->getUID(); assert(uid != 0); killUser(uid); @@ -2177,31 +2162,8 @@ void LocalDerivationGoal::runChild() } } -#if __APPLE__ - posix_spawnattr_t attrp; - - if (posix_spawnattr_init(&attrp)) - throw SysError("failed to initialize builder"); - - if (posix_spawnattr_setflags(&attrp, POSIX_SPAWN_SETEXEC)) - throw SysError("failed to initialize builder"); - - if (drv->platform == "aarch64-darwin") { - // Unset kern.curproc_arch_affinity so we can escape Rosetta - int affinity = 0; - sysctlbyname("kern.curproc_arch_affinity", NULL, NULL, &affinity, sizeof(affinity)); - - cpu_type_t cpu = CPU_TYPE_ARM64; - posix_spawnattr_setbinpref_np(&attrp, 1, &cpu, NULL); - } else if (drv->platform == "x86_64-darwin") { - cpu_type_t cpu = CPU_TYPE_X86_64; - posix_spawnattr_setbinpref_np(&attrp, 1, &cpu, NULL); - } - - posix_spawn(NULL, drv->builder.c_str(), NULL, &attrp, stringsToCharPtrs(args).data(), stringsToCharPtrs(envStrs).data()); -#else - execve(drv->builder.c_str(), stringsToCharPtrs(args).data(), stringsToCharPtrs(envStrs).data()); -#endif + execBuilder(drv->builder, args, envStrs); + // execBuilder should not return throw SysError("executing '%1%'", drv->builder); @@ -2217,6 +2179,11 @@ void LocalDerivationGoal::runChild() } } +void LocalDerivationGoal::execBuilder(std::string builder, Strings args, Strings envStrs) +{ + execve(builder.c_str(), stringsToCharPtrs(args).data(), stringsToCharPtrs(envStrs).data()); +} + SingleDrvOutputs LocalDerivationGoal::registerOutputs() { diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index f3a83d42f..91329ca35 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -178,7 +178,28 @@ struct LocalDerivationGoal : public DerivationGoal friend struct RestrictedStore; - using DerivationGoal::DerivationGoal; + /** + * Create a LocalDerivationGoal without an on-disk .drv file, + * possibly a platform-specific subclass + */ + static std::shared_ptr makeLocalDerivationGoal( + const StorePath & drvPath, + const OutputsSpec & wantedOutputs, + Worker & worker, + BuildMode buildMode + ); + + /** + * Create a LocalDerivationGoal for an on-disk .drv file, + * possibly a platform-specific subclass + */ + static std::shared_ptr makeLocalDerivationGoal( + const StorePath & drvPath, + const BasicDerivation & drv, + const OutputsSpec & wantedOutputs, + Worker & worker, + BuildMode buildMode + ); virtual ~LocalDerivationGoal() noexcept(false) override; @@ -282,7 +303,7 @@ struct LocalDerivationGoal : public DerivationGoal * Kill any processes running under the build user UID or in the * cgroup of the build. */ - void killSandbox(bool getStats); + virtual void killSandbox(bool getStats); /** * Create alternative path calculated from but distinct from the @@ -299,6 +320,16 @@ struct LocalDerivationGoal : public DerivationGoal * rewrites caught everything */ StorePath makeFallbackPath(OutputNameView outputName); + +protected: + using DerivationGoal::DerivationGoal; + + /** + * Execute the builder, replacing the current process. + * Generally this means an `execve` call. + */ + virtual void execBuilder(std::string builder, Strings args, Strings envStrs); + }; } diff --git a/src/libstore/build/worker.cc b/src/libstore/build/worker.cc index 04be0da99..a7a298c34 100644 --- a/src/libstore/build/worker.cc +++ b/src/libstore/build/worker.cc @@ -65,8 +65,8 @@ std::shared_ptr Worker::makeDerivationGoal(const StorePath & drv { return makeDerivationGoalCommon(drvPath, wantedOutputs, [&]() -> std::shared_ptr { return !dynamic_cast(&store) - ? std::make_shared(drvPath, wantedOutputs, *this, buildMode) - : std::make_shared(drvPath, wantedOutputs, *this, buildMode); + ? std::make_shared(drvPath, wantedOutputs, *this, buildMode) + : LocalDerivationGoal::makeLocalDerivationGoal(drvPath, wantedOutputs, *this, buildMode); }); } @@ -76,8 +76,8 @@ std::shared_ptr Worker::makeBasicDerivationGoal(const StorePath { return makeDerivationGoalCommon(drvPath, wantedOutputs, [&]() -> std::shared_ptr { return !dynamic_cast(&store) - ? std::make_shared(drvPath, drv, wantedOutputs, *this, buildMode) - : std::make_shared(drvPath, drv, wantedOutputs, *this, buildMode); + ? std::make_shared(drvPath, drv, wantedOutputs, *this, buildMode) + : LocalDerivationGoal::makeLocalDerivationGoal(drvPath, drv, wantedOutputs, *this, buildMode); }); } diff --git a/src/libstore/platform.cc b/src/libstore/platform.cc index acdedab99..d10d33f0e 100644 --- a/src/libstore/platform.cc +++ b/src/libstore/platform.cc @@ -1,4 +1,5 @@ #include "local-store.hh" +#include "build/local-derivation-goal.hh" #if __linux__ #include "platform/linux.hh" @@ -19,4 +20,43 @@ std::shared_ptr LocalStore::makeLocalStore(const Params & params) return std::shared_ptr(new FallbackLocalStore(params)); #endif } + +std::shared_ptr LocalDerivationGoal::makeLocalDerivationGoal( + const StorePath & drvPath, + const OutputsSpec & wantedOutputs, + Worker & worker, + BuildMode buildMode +) +{ +#if __linux__ + return std::make_shared(drvPath, wantedOutputs, worker, buildMode); +#elif __APPLE__ + return std::make_shared(drvPath, wantedOutputs, worker, buildMode); +#else + return std::make_shared(drvPath, wantedOutputs, worker, buildMode); +#endif +} + +std::shared_ptr LocalDerivationGoal::makeLocalDerivationGoal( + const StorePath & drvPath, + const BasicDerivation & drv, + const OutputsSpec & wantedOutputs, + Worker & worker, + BuildMode buildMode +) +{ +#if __linux__ + return std::make_shared( + drvPath, drv, wantedOutputs, worker, buildMode + ); +#elif __APPLE__ + return std::make_shared( + drvPath, drv, wantedOutputs, worker, buildMode + ); +#else + return std::make_shared( + drvPath, drv, wantedOutputs, worker, buildMode + ); +#endif +} } diff --git a/src/libstore/platform/darwin.cc b/src/libstore/platform/darwin.cc index bbb81784c..83b4b4183 100644 --- a/src/libstore/platform/darwin.cc +++ b/src/libstore/platform/darwin.cc @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -220,4 +221,29 @@ void DarwinLocalStore::findPlatformRoots(UncheckedRoots & unchecked) } } } + +void DarwinLocalDerivationGoal::execBuilder(std::string builder, Strings args, Strings envStrs) +{ + posix_spawnattr_t attrp; + + if (posix_spawnattr_init(&attrp)) + throw SysError("failed to initialize builder"); + + if (posix_spawnattr_setflags(&attrp, POSIX_SPAWN_SETEXEC)) + throw SysError("failed to initialize builder"); + + if (drv->platform == "aarch64-darwin") { + // Unset kern.curproc_arch_affinity so we can escape Rosetta + int affinity = 0; + sysctlbyname("kern.curproc_arch_affinity", NULL, NULL, &affinity, sizeof(affinity)); + + cpu_type_t cpu = CPU_TYPE_ARM64; + posix_spawnattr_setbinpref_np(&attrp, 1, &cpu, NULL); + } else if (drv->platform == "x86_64-darwin") { + cpu_type_t cpu = CPU_TYPE_X86_64; + posix_spawnattr_setbinpref_np(&attrp, 1, &cpu, NULL); + } + + posix_spawn(NULL, builder.c_str(), NULL, &attrp, stringsToCharPtrs(args).data(), stringsToCharPtrs(envStrs).data()); +} } diff --git a/src/libstore/platform/darwin.hh b/src/libstore/platform/darwin.hh index b7170aa05..0ac7077fb 100644 --- a/src/libstore/platform/darwin.hh +++ b/src/libstore/platform/darwin.hh @@ -1,6 +1,7 @@ #pragma once ///@file +#include "build/local-derivation-goal.hh" #include "gc-store.hh" #include "local-store.hh" @@ -32,4 +33,19 @@ private: void findPlatformRoots(UncheckedRoots & unchecked) override; }; +/** + * Darwin-specific implementation of LocalDerivationGoal + */ +class DarwinLocalDerivationGoal : public LocalDerivationGoal +{ +public: + using LocalDerivationGoal::LocalDerivationGoal; + +private: + /** + * Set process flags to enter or leave rosetta, then execute the builder + */ + void execBuilder(std::string builder, Strings args, Strings envStrs) override; +}; + } diff --git a/src/libstore/platform/fallback.hh b/src/libstore/platform/fallback.hh index fd27edbe6..3b77536bb 100644 --- a/src/libstore/platform/fallback.hh +++ b/src/libstore/platform/fallback.hh @@ -1,6 +1,7 @@ #pragma once ///@file +#include "build/local-derivation-goal.hh" #include "local-store.hh" namespace nix { @@ -28,4 +29,14 @@ public: } }; +/** + * Fallback platform implementation of LocalDerivationGoal + * Exists so we can make LocalDerivationGoal constructor protected + */ +class FallbackLocalDerivationGoal : public LocalDerivationGoal +{ +public: + using LocalDerivationGoal::LocalDerivationGoal; +}; + } diff --git a/src/libstore/platform/linux.cc b/src/libstore/platform/linux.cc index a34608894..6b94c01cc 100644 --- a/src/libstore/platform/linux.cc +++ b/src/libstore/platform/linux.cc @@ -1,3 +1,4 @@ +#include "cgroup.hh" #include "gc-store.hh" #include "signals.hh" #include "platform/linux.hh" @@ -114,4 +115,17 @@ void LinuxLocalStore::findPlatformRoots(UncheckedRoots & unchecked) readFileRoots("/proc/sys/kernel/fbsplash", unchecked); readFileRoots("/proc/sys/kernel/poweroff_cmd", unchecked); } + +void LinuxLocalDerivationGoal::killSandbox(bool getStats) +{ + if (cgroup) { + auto stats = destroyCgroup(*cgroup); + if (getStats) { + buildResult.cpuUser = stats.cpuUser; + buildResult.cpuSystem = stats.cpuSystem; + } + } else { + LocalDerivationGoal::killSandbox(getStats); + } +} } diff --git a/src/libstore/platform/linux.hh b/src/libstore/platform/linux.hh index 8b97e17c5..2cad001ea 100644 --- a/src/libstore/platform/linux.hh +++ b/src/libstore/platform/linux.hh @@ -1,6 +1,7 @@ #pragma once ///@file +#include "build/local-derivation-goal.hh" #include "gc-store.hh" #include "local-store.hh" @@ -32,4 +33,16 @@ private: void findPlatformRoots(UncheckedRoots & unchecked) override; }; +/** + * Linux-specific implementation of LocalDerivationGoal + */ +class LinuxLocalDerivationGoal : public LocalDerivationGoal +{ +public: + using LocalDerivationGoal::LocalDerivationGoal; + +private: + void killSandbox(bool getStatus) override; +}; + } From 39a1e248c9e7adea312c5b7e1743a8e55da63e6c Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sat, 6 Apr 2024 22:08:58 +0200 Subject: [PATCH 04/13] libutil: remove sinkToSource eof callback this is only used in one place, and only to set a nicer error message on EndOfFile. the only caller that actually *catches* this exception should provide an error message in that catch block rather than forcing support for setting error message so deep into the stack. copyStorePath is never called outside of PathSubstitutionGoal anyway, which catches everything. Change-Id: Ifbae8706d781c388737706faf4c8a8b7917ca278 --- src/libstore/build/substitution-goal.cc | 12 ++++++-- src/libstore/store-api.cc | 2 -- src/libutil/serialise.cc | 15 +++++----- src/libutil/serialise.hh | 6 +--- tests/functional/meson.build | 1 + tests/functional/substitute-truncated-nar.sh | 29 ++++++++++++++++++++ 6 files changed, 48 insertions(+), 17 deletions(-) create mode 100644 tests/functional/substitute-truncated-nar.sh diff --git a/src/libstore/build/substitution-goal.cc b/src/libstore/build/substitution-goal.cc index d8d9ba283..cc4cb3c8c 100644 --- a/src/libstore/build/substitution-goal.cc +++ b/src/libstore/build/substitution-goal.cc @@ -217,6 +217,7 @@ void PathSubstitutionGoal::tryToRun() promise = std::promise(); thr = std::thread([this]() { + auto & fetchPath = subPath ? *subPath : storePath; try { ReceiveInterrupts receiveInterrupts; @@ -226,10 +227,17 @@ void PathSubstitutionGoal::tryToRun() Activity act(*logger, actSubstitute, Logger::Fields{worker.store.printStorePath(storePath), sub->getUri()}); PushActivity pact(act.id); - copyStorePath(*sub, worker.store, - subPath ? *subPath : storePath, repair, sub->isTrusted ? NoCheckSigs : CheckSigs); + copyStorePath( + *sub, worker.store, fetchPath, repair, sub->isTrusted ? NoCheckSigs : CheckSigs + ); promise.set_value(); + } catch (const EndOfFile &) { + promise.set_exception(std::make_exception_ptr(EndOfFile( + "NAR for '%s' fetched from '%s' is incomplete", + sub->printStorePath(fetchPath), + sub->getUri() + ))); } catch (...) { promise.set_exception(std::current_exception()); } diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 244ecf256..7c0902978 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -1072,8 +1072,6 @@ void copyStorePath( }); TeeSink tee { sink, progressSink }; srcStore.narFromPath(storePath, tee); - }, [&]() { - throw EndOfFile("NAR for '%s' fetched from '%s' is incomplete", srcStore.printStorePath(storePath), srcStore.getUri()); }); dstStore.addToStore(*info, *source, repair, checkSigs); diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 3a8a01f16..80b111f08 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -266,20 +266,17 @@ std::unique_ptr sourceToSink(std::function fun) } -std::unique_ptr sinkToSource( - std::function fun, - std::function eof) +std::unique_ptr sinkToSource(std::function fun) { struct SinkToSource : Source { typedef boost::coroutines2::coroutine coro_t; std::function fun; - std::function eof; std::optional coro; - SinkToSource(std::function fun, std::function eof) - : fun(fun), eof(eof) + SinkToSource(std::function fun) + : fun(fun) { } @@ -298,7 +295,9 @@ std::unique_ptr sinkToSource( }); } - if (!*coro) { eof(); abort(); } + if (!*coro) { + throw EndOfFile("coroutine has finished"); + } if (pos == cur.size()) { if (!cur.empty()) { @@ -317,7 +316,7 @@ std::unique_ptr sinkToSource( } }; - return std::make_unique(fun, eof); + return std::make_unique(fun); } diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index c9294ba2d..0632e3109 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -338,11 +338,7 @@ std::unique_ptr sourceToSink(std::function fun); * Convert a function that feeds data into a Sink into a Source. The * Source executes the function as a coroutine. */ -std::unique_ptr sinkToSource( - std::function fun, - std::function eof = []() { - throw EndOfFile("coroutine has finished"); - }); +std::unique_ptr sinkToSource(std::function fun); void writePadding(size_t len, Sink & sink); diff --git a/tests/functional/meson.build b/tests/functional/meson.build index a13dee001..eede1834c 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -182,6 +182,7 @@ functional_tests_scripts = [ 'debugger.sh', 'test-libstoreconsumer.sh', 'extra-sandbox-profile.sh', + 'substitute-truncated-nar.sh', ] # Plugin tests require shared libraries support. diff --git a/tests/functional/substitute-truncated-nar.sh b/tests/functional/substitute-truncated-nar.sh new file mode 100644 index 000000000..1ac7efaf6 --- /dev/null +++ b/tests/functional/substitute-truncated-nar.sh @@ -0,0 +1,29 @@ +source common.sh + +BINARY_CACHE=file://$cacheDir + +build() { + nix-build --no-out-link "$@" --expr 'derivation { + name = "text"; + system = builtins.currentSystem; + builder = "/bin/sh"; + args = [ "-c" "echo some text to make the nar less empty > $out" ]; + }' +} + +path=$(build) +nix copy --to "$BINARY_CACHE" "$path" +nix-collect-garbage >/dev/null 2>&1 + +nar=0bylmx35yjy2b1b4k7gjsl7i4vc03cpmryb41grfb1mp40n3hifl.nar.xz + +[ -e $cacheDir/nar/$nar ] || fail "long nar missing?" + +xzcat $cacheDir/nar/$nar > $TEST_HOME/tmp +truncate -s $(( $(stat -c %s $TEST_HOME/tmp) - 10 )) $TEST_HOME/tmp +xz < $TEST_HOME/tmp > $cacheDir/nar/$nar + +# Copying back '$path' from the binary cache. This should fail as it is truncated +if build --option substituters "$BINARY_CACHE" --option require-sigs false -j0; then + fail "Importing a truncated nar should fail" +fi From a9949f4760fea60ee790c27629e9620bf3ab5e3f Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Thu, 21 Mar 2024 15:54:41 +0100 Subject: [PATCH 05/13] libutil: add some serialize.hh serializer tests Change-Id: I0116265a18bc44bba16c07bf419af70d5195f07d --- tests/unit/libutil/serialise.cc | 166 ++++++++++++++++++++++++++++++++ tests/unit/meson.build | 1 + 2 files changed, 167 insertions(+) create mode 100644 tests/unit/libutil/serialise.cc diff --git a/tests/unit/libutil/serialise.cc b/tests/unit/libutil/serialise.cc new file mode 100644 index 000000000..95ae43115 --- /dev/null +++ b/tests/unit/libutil/serialise.cc @@ -0,0 +1,166 @@ +#include "serialise.hh" +#include "error.hh" +#include "fmt.hh" +#include "pos-table.hh" +#include "ref.hh" +#include "types.hh" + +#include +#include + +#include + +namespace nix { + +TEST(Sink, uint64_t) +{ + StringSink s; + s << 42; + ASSERT_EQ(s.s, std::string({42, 0, 0, 0, 0, 0, 0, 0})); +} + +TEST(Sink, string_view) +{ + StringSink s; + s << ""; + // clang-format off + ASSERT_EQ( + s.s, + std::string({ + // length + 0, 0, 0, 0, 0, 0, 0, 0, + // data (omitted) + }) + ); + // clang-format on + + s = {}; + s << "test"; + // clang-format off + ASSERT_EQ( + s.s, + std::string({ + // length + 4, 0, 0, 0, 0, 0, 0, 0, + // data + 't', 'e', 's', 't', + // padding + 0, 0, 0, 0, + }) + ); + // clang-format on + + s = {}; + s << "longer string"; + // clang-format off + ASSERT_EQ( + s.s, + std::string({ + // length + 13, 0, 0, 0, 0, 0, 0, 0, + // data + 'l', 'o', 'n', 'g', 'e', 'r', ' ', 's', 't', 'r', 'i', 'n', 'g', + // padding + 0, 0, 0, + }) + ); + // clang-format on +} + +TEST(Sink, StringSet) +{ + StringSink s; + s << StringSet{}; + // clang-format off + ASSERT_EQ( + s.s, + std::string({ + // length + 0, 0, 0, 0, 0, 0, 0, 0, + // data (omitted) + }) + ); + // clang-format on + + s = {}; + s << StringSet{"a", ""}; + // clang-format off + ASSERT_EQ( + s.s, + std::string({ + // length + 2, 0, 0, 0, 0, 0, 0, 0, + // data "" + 0, 0, 0, 0, 0, 0, 0, 0, + // data "a" + 1, 0, 0, 0, 0, 0, 0, 0, 'a', 0, 0, 0, 0, 0, 0, 0, + }) + ); + // clang-format on +} + +TEST(Sink, Strings) +{ + StringSink s; + s << Strings{}; + // clang-format off + ASSERT_EQ( + s.s, + std::string({ + // length + 0, 0, 0, 0, 0, 0, 0, 0, + // data (omitted) + }) + ); + // clang-format on + + s = {}; + s << Strings{"a", ""}; + // clang-format off + ASSERT_EQ( + s.s, + std::string({ + // length + 2, 0, 0, 0, 0, 0, 0, 0, + // data "a" + 1, 0, 0, 0, 0, 0, 0, 0, 'a', 0, 0, 0, 0, 0, 0, 0, + // data "" + 0, 0, 0, 0, 0, 0, 0, 0, + }) + ); + // clang-format on +} + +TEST(Sink, Error) +{ + PosTable pt; + auto o = pt.addOrigin(Pos::String{make_ref("test")}, 4); + + StringSink s; + s << Error{ErrorInfo{ + .level = lvlInfo, + .msg = HintFmt("foo"), + .pos = pt[pt.add(o, 1)], + .traces = {{.pos = pt[pt.add(o, 2)], .hint = HintFmt("b %1%", "foo")}}, + }}; + // NOTE position of the error and all traces are ignored + // by the wire format + // clang-format off + ASSERT_EQ( + s.s, + std::string({ + 5, 0, 0, 0, 0, 0, 0, 0, 'E', 'r', 'r', 'o', 'r', 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, + 5, 0, 0, 0, 0, 0, 0, 0, 'E', 'r', 'r', 'o', 'r', 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 'f', 'o', 'o', 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 16, 0, 0, 0, 0, 0, 0, 0, + 'b', ' ', '\x1b', '[', '3', '5', ';', '1', 'm', 'f', 'o', 'o', '\x1b', '[', '0', 'm', + }) + ); + // clang-format on +} + +} diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 9f19e5fd9..2b5471526 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -49,6 +49,7 @@ libutil_tests_sources = files( 'libutil/paths-setting.cc', 'libutil/pool.cc', 'libutil/references.cc', + 'libutil/serialise.cc', 'libutil/suggestions.cc', 'libutil/tests.cc', 'libutil/url.cc', From b43a2e84c4b2fa7cb1167693652702e6dac95f53 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Fri, 5 Apr 2024 20:35:13 +0200 Subject: [PATCH 06/13] libutil: make Pid -> pid_t operations explicit Change-Id: I3137cc140590001fe7ba542844e735944a0a9255 --- src/libmain/shared.cc | 2 +- src/libstore/build/hook-instance.cc | 2 +- src/libstore/build/local-derivation-goal.cc | 16 ++++++++-------- src/libstore/ssh.cc | 2 +- src/libutil/processes.cc | 6 ------ src/libutil/processes.hh | 3 ++- 6 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/libmain/shared.cc b/src/libmain/shared.cc index f99777a20..29538a9ca 100644 --- a/src/libmain/shared.cc +++ b/src/libmain/shared.cc @@ -378,7 +378,7 @@ RunPager::RunPager() RunPager::~RunPager() { try { - if (pid != -1) { + if (pid) { std::cout.flush(); dup2(std_out, STDOUT_FILENO); pid.wait(); diff --git a/src/libstore/build/hook-instance.cc b/src/libstore/build/hook-instance.cc index 86f72486e..9a93646f4 100644 --- a/src/libstore/build/hook-instance.cc +++ b/src/libstore/build/hook-instance.cc @@ -79,7 +79,7 @@ HookInstance::~HookInstance() { try { toHook.writeSide.reset(); - if (pid != -1) pid.kill(); + if (pid) pid.kill(); } catch (...) { ignoreException(); } diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 4e616c838..eb6229c56 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -137,7 +137,7 @@ LocalStore & LocalDerivationGoal::getLocalStore() void LocalDerivationGoal::killChild() { - if (pid != -1) { + if (pid) { worker.childTerminated(this); /* If we're using a build user, then there is a tricky race @@ -145,7 +145,7 @@ void LocalDerivationGoal::killChild() done its setuid() to the build user uid, then it won't be killed, and we'll potentially lock up in pid.wait(). So also send a conventional kill to the child. */ - ::kill(-pid, SIGKILL); /* ignore the result */ + ::kill(-pid.get(), SIGKILL); /* ignore the result */ killSandbox(true); @@ -961,13 +961,13 @@ void LocalDerivationGoal::startBuilder() uid_t hostGid = buildUser ? buildUser->getGID() : getgid(); uid_t nrIds = buildUser ? buildUser->getUIDCount() : 1; - writeFile("/proc/" + std::to_string(pid) + "/uid_map", + writeFile("/proc/" + std::to_string(pid.get()) + "/uid_map", fmt("%d %d %d", sandboxUid(), hostUid, nrIds)); if (!buildUser || buildUser->getUIDCount() == 1) - writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); + writeFile("/proc/" + std::to_string(pid.get()) + "/setgroups", "deny"); - writeFile("/proc/" + std::to_string(pid) + "/gid_map", + writeFile("/proc/" + std::to_string(pid.get()) + "/gid_map", fmt("%d %d %d", sandboxGid(), hostGid, nrIds)); } else { debug("note: not using a user namespace"); @@ -990,19 +990,19 @@ void LocalDerivationGoal::startBuilder() /* Save the mount- and user namespace of the child. We have to do this *before* the child does a chroot. */ - sandboxMountNamespace = AutoCloseFD{open(fmt("/proc/%d/ns/mnt", (pid_t) pid).c_str(), O_RDONLY)}; + sandboxMountNamespace = AutoCloseFD{open(fmt("/proc/%d/ns/mnt", pid.get()).c_str(), O_RDONLY)}; if (sandboxMountNamespace.get() == -1) throw SysError("getting sandbox mount namespace"); if (usingUserNamespace) { - sandboxUserNamespace = AutoCloseFD{open(fmt("/proc/%d/ns/user", (pid_t) pid).c_str(), O_RDONLY)}; + sandboxUserNamespace = AutoCloseFD{open(fmt("/proc/%d/ns/user", pid.get()).c_str(), O_RDONLY)}; if (sandboxUserNamespace.get() == -1) throw SysError("getting sandbox user namespace"); } /* Move the child into its own cgroup. */ if (cgroup) - writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); + writeFile(*cgroup + "/cgroup.procs", fmt("%d", pid.get())); /* Signal the builder that we've updated its user namespace. */ writeFull(userNamespaceSync.writeSide.get(), "1"); diff --git a/src/libstore/ssh.cc b/src/libstore/ssh.cc index 932ebaa52..0d7bfa01d 100644 --- a/src/libstore/ssh.cc +++ b/src/libstore/ssh.cc @@ -131,7 +131,7 @@ Path SSHMaster::startMaster() auto state(state_.lock()); - if (state->sshMaster != -1) return state->socketPath; + if (state->sshMaster) return state->socketPath; state->socketPath = (Path) *state->tmpDir + "/ssh.sock"; diff --git a/src/libutil/processes.cc b/src/libutil/processes.cc index 8639a77f8..3bb419e45 100644 --- a/src/libutil/processes.cc +++ b/src/libutil/processes.cc @@ -55,12 +55,6 @@ void Pid::operator =(pid_t pid) } -Pid::operator pid_t() -{ - return pid; -} - - int Pid::kill() { assert(pid != -1); diff --git a/src/libutil/processes.hh b/src/libutil/processes.hh index b84fc7c4b..2079e1b36 100644 --- a/src/libutil/processes.hh +++ b/src/libutil/processes.hh @@ -29,13 +29,14 @@ public: Pid(pid_t pid); ~Pid() noexcept(false); void operator =(pid_t pid); - operator pid_t(); + explicit operator bool() const { return pid != -1; } int kill(); int wait(); void setSeparatePG(bool separatePG); void setKillSignal(int signal); pid_t release(); + pid_t get() const { return pid; } }; /** From 3d155fc509e19354ba3798b1cc1b9cbcdb789c85 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Fri, 5 Apr 2024 21:02:04 +0200 Subject: [PATCH 07/13] libutil: give Pid proper resource semantics copy-constructing or assigning from pid_t can easily lead to duplicate Pid instances for the same process if a pid_t was used carelessly, and Pid itself was copy-constructible. both could cause surprising results such as killing processes twice (which could become very problemantic, but luckily modern systems don't reuse PIDs all that quickly), or more than one piece of the code believing it owns a process when neither do Change-Id: Ifea7445f84200b34c1a1d0acc2cdffe0f01e20c6 --- src/libmain/shared.cc | 4 +-- src/libstore/build/hook-instance.cc | 4 +-- src/libstore/build/local-derivation-goal.cc | 10 +++---- src/libstore/ssh.cc | 8 +++--- src/libutil/namespaces.cc | 12 +++------ src/libutil/processes.cc | 30 +++++++++++---------- src/libutil/processes.hh | 5 ++-- src/libutil/unix-domain-socket.cc | 4 +-- 8 files changed, 37 insertions(+), 40 deletions(-) diff --git a/src/libmain/shared.cc b/src/libmain/shared.cc index 29538a9ca..5dabd0c8e 100644 --- a/src/libmain/shared.cc +++ b/src/libmain/shared.cc @@ -354,7 +354,7 @@ RunPager::RunPager() Pipe toPager; toPager.create(); - pid = startProcess([&]() { + pid = Pid{startProcess([&]() { if (dup2(toPager.readSide.get(), STDIN_FILENO) == -1) throw SysError("dupping stdin"); if (!getenv("LESS")) @@ -366,7 +366,7 @@ RunPager::RunPager() execlp("less", "less", nullptr); execlp("more", "more", nullptr); throw SysError("executing '%1%'", pager); - }); + })}; pid.setKillSignal(SIGINT); std_out = fcntl(STDOUT_FILENO, F_DUPFD_CLOEXEC, 0); diff --git a/src/libstore/build/hook-instance.cc b/src/libstore/build/hook-instance.cc index 9a93646f4..b8d8fe7a4 100644 --- a/src/libstore/build/hook-instance.cc +++ b/src/libstore/build/hook-instance.cc @@ -35,7 +35,7 @@ HookInstance::HookInstance() builderOut.create(); /* Fork the hook. */ - pid = startProcess([&]() { + pid = Pid{startProcess([&]() { if (dup2(fromHook.writeSide.get(), STDERR_FILENO) == -1) throw SysError("cannot pipe standard error into log file"); @@ -60,7 +60,7 @@ HookInstance::HookInstance() execv(buildHook.c_str(), stringsToCharPtrs(args).data()); throw SysError("executing '%s'", buildHook); - }); + })}; pid.setSeparatePG(true); fromHook.writeSide.reset(); diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index eb6229c56..a2ff495bf 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -906,7 +906,7 @@ void LocalDerivationGoal::startBuilder() Pipe sendPid; sendPid.create(); - Pid helper = startProcess([&]() { + Pid helper{startProcess([&]() { sendPid.readSide.close(); /* We need to open the slave early, before @@ -934,7 +934,7 @@ void LocalDerivationGoal::startBuilder() writeFull(sendPid.writeSide.get(), fmt("%d\n", child)); _exit(0); - }); + })}; sendPid.writeSide.close(); @@ -951,7 +951,7 @@ void LocalDerivationGoal::startBuilder() auto ss = tokenizeString>(readLine(sendPid.readSide.get())); assert(ss.size() == 1); - pid = string2Int(ss[0]).value(); + pid = Pid{string2Int(ss[0]).value()}; if (usingUserNamespace) { /* Set the UID/GID mapping of the builder's user namespace @@ -1010,10 +1010,10 @@ void LocalDerivationGoal::startBuilder() } else #endif { - pid = startProcess([&]() { + pid = Pid{startProcess([&]() { openSlave(); runChild(); - }); + })}; } /* parent */ diff --git a/src/libstore/ssh.cc b/src/libstore/ssh.cc index 0d7bfa01d..c40eba894 100644 --- a/src/libstore/ssh.cc +++ b/src/libstore/ssh.cc @@ -70,7 +70,7 @@ std::unique_ptr SSHMaster::startCommand(const std::string } Finally cleanup = [&]() { logger->resume(); }; - conn->sshPid = startProcess([&]() { + conn->sshPid = Pid{startProcess([&]() { restoreProcessContext(); close(in.writeSide.get()); @@ -99,7 +99,7 @@ std::unique_ptr SSHMaster::startCommand(const std::string // could not exec ssh/bash throw SysError("unable to execute '%s'", args.front()); - }, options); + }, options)}; in.readSide.reset(); @@ -147,7 +147,7 @@ Path SSHMaster::startMaster() if (isMasterRunning()) return state->socketPath; - state->sshMaster = startProcess([&]() { + state->sshMaster = Pid{startProcess([&]() { restoreProcessContext(); close(out.readSide.get()); @@ -160,7 +160,7 @@ Path SSHMaster::startMaster() execvp(args.begin()->c_str(), stringsToCharPtrs(args).data()); throw SysError("unable to execute '%s'", args.front()); - }, options); + }, options)}; out.writeSide.reset(); diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index 98d3cd306..4b7a1b577 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -94,12 +94,7 @@ bool userNamespacesSupported() static auto res = [&]() -> bool { try { - Pid pid = startProcess([&]() - { - _exit(0); - }, { - .cloneFlags = CLONE_NEWUSER - }); + Pid pid{startProcess([&]() { _exit(0); }, {.cloneFlags = CLONE_NEWUSER})}; auto r = pid.wait(); assert(!r); @@ -120,8 +115,7 @@ bool mountAndPidNamespacesSupported() { try { - Pid pid = startProcess([&]() - { + Pid pid{startProcess([&]() { /* Make sure we don't remount the parent's /proc. */ if (mount(0, "/", 0, MS_PRIVATE | MS_REC, 0) == -1) _exit(1); @@ -136,7 +130,7 @@ bool mountAndPidNamespacesSupported() _exit(0); }, { .cloneFlags = CLONE_NEWNS | CLONE_NEWPID | (userNamespacesSupported() ? CLONE_NEWUSER : 0) - }); + })}; if (pid.wait()) { debug("PID namespaces do not work on this system: cannot remount /proc"); diff --git a/src/libutil/processes.cc b/src/libutil/processes.cc index 3bb419e45..9c99f047c 100644 --- a/src/libutil/processes.cc +++ b/src/libutil/processes.cc @@ -35,9 +35,19 @@ Pid::Pid() } -Pid::Pid(pid_t pid) - : pid(pid) +Pid::Pid(Pid && other) : pid(other.pid), separatePG(other.separatePG), killSignal(other.killSignal) { + other.pid = -1; +} + + +Pid & Pid::operator=(Pid && other) +{ + Pid tmp(std::move(other)); + std::swap(pid, tmp.pid); + std::swap(separatePG, tmp.separatePG); + std::swap(killSignal, tmp.killSignal); + return *this; } @@ -47,14 +57,6 @@ Pid::~Pid() noexcept(false) } -void Pid::operator =(pid_t pid) -{ - if (this->pid != -1 && this->pid != pid) kill(); - this->pid = pid; - killSignal = SIGKILL; // reset signal to default -} - - int Pid::kill() { assert(pid != -1); @@ -125,7 +127,7 @@ void killUser(uid_t uid) users to which the current process can send signals. So we fork a process, switch to uid, and send a mass kill. */ - Pid pid = startProcess([&]() { + Pid pid{startProcess([&]() { if (setuid(uid) == -1) throw SysError("setting uid"); @@ -147,7 +149,7 @@ void killUser(uid_t uid) } _exit(0); - }); + })}; int status = pid.wait(); if (status != 0) @@ -288,7 +290,7 @@ void runProgram2(const RunOptions & options) } /* Fork. */ - Pid pid = startProcess([&]() { + Pid pid{startProcess([&]() { if (options.environment) replaceEnv(*options.environment); if (options.standardOut && dup2(out.writeSide.get(), STDOUT_FILENO) == -1) @@ -322,7 +324,7 @@ void runProgram2(const RunOptions & options) execv(options.program.c_str(), stringsToCharPtrs(args_).data()); throw SysError("executing '%1%'", options.program); - }, processOptions); + }, processOptions)}; out.writeSide.close(); diff --git a/src/libutil/processes.hh b/src/libutil/processes.hh index 2079e1b36..8fc241ab1 100644 --- a/src/libutil/processes.hh +++ b/src/libutil/processes.hh @@ -26,9 +26,10 @@ class Pid int killSignal = SIGKILL; public: Pid(); - Pid(pid_t pid); + explicit Pid(pid_t pid): pid(pid) {} + Pid(Pid && other); + Pid & operator=(Pid && other); ~Pid() noexcept(false); - void operator =(pid_t pid); explicit operator bool() const { return pid != -1; } int kill(); int wait(); diff --git a/src/libutil/unix-domain-socket.cc b/src/libutil/unix-domain-socket.cc index a6e46ca50..d4fc37fab 100644 --- a/src/libutil/unix-domain-socket.cc +++ b/src/libutil/unix-domain-socket.cc @@ -65,7 +65,7 @@ static void bindConnectProcHelper( if (path.size() + 1 >= sizeof(addr.sun_path)) { Pipe pipe; pipe.create(); - Pid pid = startProcess([&] { + Pid pid{startProcess([&] { try { pipe.readSide.close(); Path dir = dirOf(path); @@ -83,7 +83,7 @@ static void bindConnectProcHelper( } catch (...) { writeFull(pipe.writeSide.get(), "-1\n"); } - }); + })}; pipe.writeSide.close(); auto errNo = string2Int(chomp(drainFD(pipe.readSide.get()))); if (!errNo || *errNo == -1) From ce6cb14995e869cfea395570ccb300b0369c72dc Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Fri, 5 Apr 2024 21:15:22 +0200 Subject: [PATCH 08/13] libutil: return Pid from startProcess, not pid_t Change-Id: Icc8a15090c77f54ea7d9220aadedcd4a19922814 --- src/libmain/shared.cc | 4 ++-- src/libstore/build/hook-instance.cc | 4 ++-- src/libstore/build/local-derivation-goal.cc | 14 +++++++------- src/libstore/ssh.cc | 8 ++++---- src/libutil/namespaces.cc | 6 +++--- src/libutil/processes.cc | 4 ++-- src/libutil/processes.hh | 3 ++- src/nix/daemon.cc | 2 +- .../repl_characterization/test-session.cc | 4 ++-- .../repl_characterization/test-session.hh | 3 ++- 10 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/libmain/shared.cc b/src/libmain/shared.cc index 5dabd0c8e..29538a9ca 100644 --- a/src/libmain/shared.cc +++ b/src/libmain/shared.cc @@ -354,7 +354,7 @@ RunPager::RunPager() Pipe toPager; toPager.create(); - pid = Pid{startProcess([&]() { + pid = startProcess([&]() { if (dup2(toPager.readSide.get(), STDIN_FILENO) == -1) throw SysError("dupping stdin"); if (!getenv("LESS")) @@ -366,7 +366,7 @@ RunPager::RunPager() execlp("less", "less", nullptr); execlp("more", "more", nullptr); throw SysError("executing '%1%'", pager); - })}; + }); pid.setKillSignal(SIGINT); std_out = fcntl(STDOUT_FILENO, F_DUPFD_CLOEXEC, 0); diff --git a/src/libstore/build/hook-instance.cc b/src/libstore/build/hook-instance.cc index b8d8fe7a4..9a93646f4 100644 --- a/src/libstore/build/hook-instance.cc +++ b/src/libstore/build/hook-instance.cc @@ -35,7 +35,7 @@ HookInstance::HookInstance() builderOut.create(); /* Fork the hook. */ - pid = Pid{startProcess([&]() { + pid = startProcess([&]() { if (dup2(fromHook.writeSide.get(), STDERR_FILENO) == -1) throw SysError("cannot pipe standard error into log file"); @@ -60,7 +60,7 @@ HookInstance::HookInstance() execv(buildHook.c_str(), stringsToCharPtrs(args).data()); throw SysError("executing '%s'", buildHook); - })}; + }); pid.setSeparatePG(true); fromHook.writeSide.reset(); diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index a2ff495bf..efba648a4 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -906,7 +906,7 @@ void LocalDerivationGoal::startBuilder() Pipe sendPid; sendPid.create(); - Pid helper{startProcess([&]() { + Pid helper = startProcess([&]() { sendPid.readSide.close(); /* We need to open the slave early, before @@ -930,11 +930,11 @@ void LocalDerivationGoal::startBuilder() if (usingUserNamespace) options.cloneFlags |= CLONE_NEWUSER; - pid_t child = startProcess([&]() { runChild(); }, options); + pid_t child = startProcess([&]() { runChild(); }, options).release(); writeFull(sendPid.writeSide.get(), fmt("%d\n", child)); _exit(0); - })}; + }); sendPid.writeSide.close(); @@ -1010,10 +1010,10 @@ void LocalDerivationGoal::startBuilder() } else #endif { - pid = Pid{startProcess([&]() { + pid = startProcess([&]() { openSlave(); runChild(); - })}; + }); } /* parent */ @@ -1570,7 +1570,7 @@ void LocalDerivationGoal::addDependency(const StorePath & path) entering its mount namespace, which is not possible in multithreaded programs. So we do this in a child process.*/ - Pid child(startProcess([&]() { + Pid child = startProcess([&]() { if (usingUserNamespace && (setns(sandboxUserNamespace.get(), 0) == -1)) throw SysError("entering sandbox user namespace"); @@ -1581,7 +1581,7 @@ void LocalDerivationGoal::addDependency(const StorePath & path) doBind(source, target); _exit(0); - })); + }); int status = child.wait(); if (status != 0) diff --git a/src/libstore/ssh.cc b/src/libstore/ssh.cc index c40eba894..0d7bfa01d 100644 --- a/src/libstore/ssh.cc +++ b/src/libstore/ssh.cc @@ -70,7 +70,7 @@ std::unique_ptr SSHMaster::startCommand(const std::string } Finally cleanup = [&]() { logger->resume(); }; - conn->sshPid = Pid{startProcess([&]() { + conn->sshPid = startProcess([&]() { restoreProcessContext(); close(in.writeSide.get()); @@ -99,7 +99,7 @@ std::unique_ptr SSHMaster::startCommand(const std::string // could not exec ssh/bash throw SysError("unable to execute '%s'", args.front()); - }, options)}; + }, options); in.readSide.reset(); @@ -147,7 +147,7 @@ Path SSHMaster::startMaster() if (isMasterRunning()) return state->socketPath; - state->sshMaster = Pid{startProcess([&]() { + state->sshMaster = startProcess([&]() { restoreProcessContext(); close(out.readSide.get()); @@ -160,7 +160,7 @@ Path SSHMaster::startMaster() execvp(args.begin()->c_str(), stringsToCharPtrs(args).data()); throw SysError("unable to execute '%s'", args.front()); - }, options)}; + }, options); out.writeSide.reset(); diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index 4b7a1b577..247fba2c4 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -94,7 +94,7 @@ bool userNamespacesSupported() static auto res = [&]() -> bool { try { - Pid pid{startProcess([&]() { _exit(0); }, {.cloneFlags = CLONE_NEWUSER})}; + Pid pid = startProcess([&]() { _exit(0); }, {.cloneFlags = CLONE_NEWUSER}); auto r = pid.wait(); assert(!r); @@ -115,7 +115,7 @@ bool mountAndPidNamespacesSupported() { try { - Pid pid{startProcess([&]() { + Pid pid = startProcess([&]() { /* Make sure we don't remount the parent's /proc. */ if (mount(0, "/", 0, MS_PRIVATE | MS_REC, 0) == -1) _exit(1); @@ -130,7 +130,7 @@ bool mountAndPidNamespacesSupported() _exit(0); }, { .cloneFlags = CLONE_NEWNS | CLONE_NEWPID | (userNamespacesSupported() ? CLONE_NEWUSER : 0) - })}; + }); if (pid.wait()) { debug("PID namespaces do not work on this system: cannot remount /proc"); diff --git a/src/libutil/processes.cc b/src/libutil/processes.cc index 9c99f047c..e8af12fbd 100644 --- a/src/libutil/processes.cc +++ b/src/libutil/processes.cc @@ -183,7 +183,7 @@ static int childEntry(void * arg) #endif -pid_t startProcess(std::function fun, const ProcessOptions & options) +Pid startProcess(std::function fun, const ProcessOptions & options) { std::function wrapper = [&]() { logger = makeSimpleLogger(); @@ -227,7 +227,7 @@ pid_t startProcess(std::function fun, const ProcessOptions & options) if (pid == -1) throw SysError("unable to fork"); - return pid; + return Pid{pid}; } std::string runProgram(Path program, bool searchPath, const Strings & args, diff --git a/src/libutil/processes.hh b/src/libutil/processes.hh index 8fc241ab1..001e1fa12 100644 --- a/src/libutil/processes.hh +++ b/src/libutil/processes.hh @@ -62,7 +62,8 @@ struct ProcessOptions int cloneFlags = 0; }; -pid_t startProcess(std::function fun, const ProcessOptions & options = ProcessOptions()); +[[nodiscard]] +Pid startProcess(std::function fun, const ProcessOptions & options = ProcessOptions()); /** diff --git a/src/nix/daemon.cc b/src/nix/daemon.cc index f1cc1ee9c..a9211d64a 100644 --- a/src/nix/daemon.cc +++ b/src/nix/daemon.cc @@ -369,7 +369,7 @@ static void daemonLoop(std::optional forceTrustClientOpt) processConnection(openUncachedStore(), from, to, trusted, NotRecursive); exit(0); - }, options); + }, options).release(); } catch (Interrupted & e) { return; diff --git a/tests/functional/repl_characterization/test-session.cc b/tests/functional/repl_characterization/test-session.cc index e59064fc5..96f101cdd 100644 --- a/tests/functional/repl_characterization/test-session.cc +++ b/tests/functional/repl_characterization/test-session.cc @@ -22,7 +22,7 @@ RunningProcess RunningProcess::start(std::string executable, Strings args) procStdout.create(); // This is separate from runProgram2 because we have different IO requirements - pid_t pid = startProcess([&]() { + auto pid = startProcess([&]() { if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1) { throw SysError("dupping stdout"); } @@ -42,7 +42,7 @@ RunningProcess RunningProcess::start(std::string executable, Strings args) procStdin.readSide.close(); return RunningProcess{ - .pid = pid, + .pid = std::move(pid), .procStdin = std::move(procStdin), .procStdout = std::move(procStdout), }; diff --git a/tests/functional/repl_characterization/test-session.hh b/tests/functional/repl_characterization/test-session.hh index c77cce6d5..a9ba9cf0c 100644 --- a/tests/functional/repl_characterization/test-session.hh +++ b/tests/functional/repl_characterization/test-session.hh @@ -7,13 +7,14 @@ #include #include "file-descriptor.hh" +#include "processes.hh" #include "tests/terminal-code-eater.hh" namespace nix { struct RunningProcess { - pid_t pid; + Pid pid; Pipe procStdin; Pipe procStdout; From 44b0d74370768cdfeabc003183be90c9bc1eeffa Mon Sep 17 00:00:00 2001 From: Qyriad Date: Mon, 24 Jun 2024 11:45:16 -0600 Subject: [PATCH 09/13] add Expr::kind() -> string method, purely for debugging --- src/libexpr/nixexpr.cc | 140 +++++++++++++++++++++++++++++++++++++++++ src/libexpr/nixexpr.hh | 10 ++- 2 files changed, 148 insertions(+), 2 deletions(-) diff --git a/src/libexpr/nixexpr.cc b/src/libexpr/nixexpr.cc index bc53ca053..2d36f21b9 100644 --- a/src/libexpr/nixexpr.cc +++ b/src/libexpr/nixexpr.cc @@ -581,6 +581,146 @@ std::string ExprLambda::showNamePos(const EvalState & state) const } +std::string_view Expr::kind() const +{ + return "abstract"; +} + +std::string_view ExprInt::kind() const +{ + return "int"; +} + +std::string_view ExprFloat::kind() const +{ + return "float"; +} + +std::string_view ExprString::kind() const +{ + return "string"; +} + +std::string_view ExprPath::kind() const +{ + return "path"; +} + +std::string_view ExprVar::kind() const +{ + return "var"; +} + +std::string_view ExprInheritFrom::kind() const +{ + return "inherit from"; +} + +std::string_view ExprSelect::kind() const +{ + return "select"; +} + +std::string_view ExprOpHasAttr::kind() const +{ + return "hasattr operator"; +} + +std::string_view ExprAttrs::kind() const +{ + return "attrs"; +} + +std::string_view ExprList::kind() const +{ + return "list"; +} + +std::string_view ExprLambda::kind() const +{ + return "lambda"; +} + +std::string_view ExprCall::kind() const +{ + return "call"; +} + +std::string_view ExprLet::kind() const +{ + return "let"; +} + +std::string_view ExprWith::kind() const +{ + return "with"; +} + +std::string_view ExprIf::kind() const +{ + return "if"; +} + +std::string_view ExprAssert::kind() const +{ + return "assert"; +} + +std::string_view ExprOpNot::kind() const +{ + return "not operator"; +} + +std::string_view ExprOpEq::kind() const +{ + return "equality operator"; +} + +std::string_view ExprOpNEq::kind() const +{ + return "inequality operator"; +} + +std::string_view ExprOpAnd::kind() const +{ + return "logical AND operator"; +} + +std::string_view ExprOpOr::kind() const +{ + return "logical OR operator"; +} + +std::string_view ExprOpImpl::kind() const +{ + return "logical implication operator"; +} + +std::string_view ExprOpUpdate::kind() const +{ + return "update operator"; +} + +std::string_view ExprOpConcatLists::kind() const +{ + return "list concatenation operator"; +} + +std::string_view ExprConcatStrings::kind() const +{ + return "string concatenation"; +} + +std::string_view ExprPos::kind() const +{ + return "position"; +} + +std::string_view ExprBlackHole::kind() const +{ + return "blackhole"; +} + /* Position table. */ diff --git a/src/libexpr/nixexpr.hh b/src/libexpr/nixexpr.hh index 418f888b3..be367585c 100644 --- a/src/libexpr/nixexpr.hh +++ b/src/libexpr/nixexpr.hh @@ -63,12 +63,15 @@ public: virtual Value * maybeThunk(EvalState & state, Env & env); virtual void setName(Symbol name); virtual PosIdx getPos() const { return noPos; } + /** Entirely for debugging. */ + virtual std::string_view kind() const; }; #define COMMON_METHODS \ void show(const SymbolTable & symbols, std::ostream & str) const override; \ void eval(EvalState & state, Env & env, Value & v) override; \ - void bindVars(EvalState & es, const std::shared_ptr & env) override; + void bindVars(EvalState & es, const std::shared_ptr & env) override; \ + virtual std::string_view kind() const override; struct ExprInt : Expr { @@ -151,7 +154,8 @@ struct ExprInheritFrom : ExprVar this->fromWith = nullptr; } - void bindVars(EvalState & es, const std::shared_ptr & env); + void bindVars(EvalState & es, const std::shared_ptr & env) override; + std::string_view kind() const override; }; struct ExprSelect : Expr @@ -418,6 +422,7 @@ struct ExprOpNot : Expr } \ void eval(EvalState & state, Env & env, Value & v) override; \ PosIdx getPos() const override { return pos; } \ + std::string_view kind() const override; \ }; MakeBinOp(ExprOpEq, "==") @@ -453,6 +458,7 @@ struct ExprBlackHole : Expr void show(const SymbolTable & symbols, std::ostream & str) const override {} void eval(EvalState & state, Env & env, Value & v) override; void bindVars(EvalState & es, const std::shared_ptr & env) override {} + std::string_view kind() const override; }; extern ExprBlackHole eBlackHole; From af94db77cfc6a69f8385bd587a8cc4293ad250a8 Mon Sep 17 00:00:00 2001 From: Qyriad Date: Sat, 22 Jun 2024 21:17:41 -0600 Subject: [PATCH 10/13] add an ExprPrinter class, like ValuePrinter To be used Shortly Change-Id: I9def7975aa55f251eb8486391677771f7352d7ce --- src/libexpr/print.cc | 6 ++++++ src/libexpr/print.hh | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/libexpr/print.cc b/src/libexpr/print.cc index e387a09fb..87db004b2 100644 --- a/src/libexpr/print.cc +++ b/src/libexpr/print.cc @@ -574,4 +574,10 @@ fmt_internal::HintFmt & fmt_internal::HintFmt::operator%(const ValuePrinter & va return *this; } +std::ostream & operator<<(std::ostream & output, ExprPrinter const & printer) +{ + printer.expr.show(printer.state.symbols, output); + return output; +} + } diff --git a/src/libexpr/print.hh b/src/libexpr/print.hh index 42826d94d..6ae6b2610 100644 --- a/src/libexpr/print.hh +++ b/src/libexpr/print.hh @@ -15,6 +15,8 @@ namespace nix { +struct Expr; + class EvalState; struct Value; @@ -50,6 +52,12 @@ void printValue(EvalState & state, std::ostream & str, Value & v, PrintOptions o /** * A partially-applied form of `printValue` which can be formatted using `<<` * without allocating an intermediate string. + * This class should not outlive the eval state or it will UAF. + * FIXME: This should take `nix::ref`s, to avoid that, but our eval methods all have + * EvalState &, not ref, and constructing a new shared_ptr to data that + * already has a shared_ptr is a much bigger footgun. In the current architecture of + * libexpr, using a ValuePrinter after an EvalState has been destroyed would be + * pretty hard. */ class ValuePrinter { friend std::ostream & operator << (std::ostream & output, const ValuePrinter & printer); @@ -73,4 +81,26 @@ std::ostream & operator<<(std::ostream & output, const ValuePrinter & printer); template<> fmt_internal::HintFmt & fmt_internal::HintFmt::operator%(const ValuePrinter & value); +/** + * A partially-applied form of Expr::show(), which can be formatted using `<<` + * without allocating an intermediate string. + * This class should not outlive the eval state or it will UAF. + * FIXME: This should take `nix::ref`s, to avoid that, but our eval methods all have + * EvalState &, not ref, and constructing a new shared_ptr to data that + * already has a shared_ptr is a much bigger footgun. In the current architecture of + * libexpr, using an ExprPrinter after an EvalState has been destroyed would be + * pretty hard. + */ +class ExprPrinter +{ + /** The eval state used to get symbols. */ + EvalState & state; + /** The expression to print. */ + Expr & expr; + +public: + ExprPrinter(EvalState & state, Expr & expr) : state(state), expr(expr) { } + friend std::ostream & operator << (std::ostream & output, ExprPrinter const & printer); +}; + } From 013b7ae114517e741005835edd5720f8b125b151 Mon Sep 17 00:00:00 2001 From: Qyriad Date: Sat, 22 Jun 2024 21:22:29 -0600 Subject: [PATCH 11/13] trace when the `foo` part of `foo.bar.baz` errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns errors like: let errpkg = throw "invalid foobar"; in errpkg.meta error: … while calling the 'throw' builtin at «string»:2:12: 1| let 2| errpkg = throw "invalid foobar"; | ^ 3| in errpkg.meta error: invalid foobar into errors like: let errpkg = throw "invalid foobar"; in errpkg.meta error: … while evaluating 'errpkg' to select 'meta' on it at «string»:3:4: 2| errpkg = throw "invalid foobar"; 3| in errpkg.meta | ^ … while calling the 'throw' builtin at «string»:2:12: 1| let 2| errpkg = throw "invalid foobar"; | ^ 3| in errpkg.meta error: invalid foobar For the low price of one try/catch, you too can have the incorrect line of code actually show up in the trace! Change-Id: If8d6200ec1567706669d405c34adcd7e2d2cd29d --- src/libexpr/eval.cc | 17 ++++++++++++++++- .../functional/lang/eval-fail-recursion.err.exp | 6 ++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index afee89420..dbfd0fe33 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -1433,7 +1433,22 @@ void ExprSelect::eval(EvalState & state, Env & env, Value & v) Value * vCurrent = &vFirst; // Position for the current attrset Value in this select chain. PosIdx posCurrent; - e->eval(state, env, vFirst); + + try { + e->eval(state, env, vFirst); + } catch (Error & e) { + assert(this->e != nullptr); + std::stringstream selectedOn; + this->e->show(state.symbols, selectedOn); + state.addErrorTrace( + e, + getPos(), + "while evaluating '%s' to select '%s' on it", + "a", + "foo" + ); + throw; + } try { auto dts = state.debugRepl diff --git a/tests/functional/lang/eval-fail-recursion.err.exp b/tests/functional/lang/eval-fail-recursion.err.exp index 19380dc65..f0057b2d5 100644 --- a/tests/functional/lang/eval-fail-recursion.err.exp +++ b/tests/functional/lang/eval-fail-recursion.err.exp @@ -1,4 +1,10 @@ error: + … while evaluating 'a' to select 'foo' on it + at /pwd/lang/eval-fail-recursion.nix:1:21: + 1| let a = {} // a; in a.foo + | ^ + 2| + … in the right operand of the update (//) operator at /pwd/lang/eval-fail-recursion.nix:1:12: 1| let a = {} // a; in a.foo From b84666c3503c0500d407a058b155f6aee5ceda67 Mon Sep 17 00:00:00 2001 From: Qyriad Date: Sat, 22 Jun 2024 21:59:47 -0600 Subject: [PATCH 12/13] trace which part of `foo.bar.baz` errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns errors like: let somepkg.src = throw "invalid foobar"; in somepkg.src.meta error: … while evaluating the attribute 'src.meta' at «string»:2:3: 1| let 2| somepkg.src = throw "invalid foobar"; | ^ 3| in somepkg.src.meta … while calling the 'throw' builtin at «string»:2:17: 1| let 2| somepkg.src = throw "invalid foobar"; | ^ 3| in somepkg.src.meta error: invalid foobar into errors like: let somepkg.src = throw "invalid foobar"; in somepkg.src.meta error: … while evaluating the attribute 'src.meta' at «string»:2:3: 1| let 2| somepkg.src = throw "invalid foobar"; | ^ 3| in somepkg.src.meta … while evaluating 'somepkg.src' to select 'meta' on it at «string»:3:4: 2| somepkg.src = throw "invalid foobar"; 3| in somepkg.src.meta | ^ … while calling the 'throw' builtin at «string»:2:17: 1| let 2| somepkg.src = throw "invalid foobar"; | ^ 3| in somepkg.src.meta error: invalid foobar And for type errors, from: let somepkg.src = "I'm not an attrset"; in somepkg.src.meta error: … while evaluating the attribute 'src.meta' at «string»:2:3: 1| let 2| somepkg.src = "I'm not an attrset"; | ^ 3| in somepkg.src.meta … while selecting an attribute at «string»:3:4: 2| somepkg.src = "I'm not an attrset"; 3| in somepkg.src.meta | ^ error: expected a set but found a string: "I'm not an attrset" into: let somepkg.src = "I'm not an attrset"; in somepkg.src.meta error: … while evaluating the attribute 'src.meta' at «string»:2:3: 1| let 2| somepkg.src = "I'm not an attrset"; | ^ 3| in somepkg.src.meta … while selecting 'meta' on 'somepkg.src' at «string»:3:4: 2| somepkg.src = "I'm not an attrset"; 3| in somepkg.src.meta | ^ error: expected a set but found a string: "I'm not an attrset" For the low price of an enumerate() and a lambda you too can have the incorrect line of code actually show up in the trace! Change-Id: Ic1491c86e33c167891bdac9adad6224784760bd6 --- src/libexpr/eval.cc | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index dbfd0fe33..8a9023fe8 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -1461,12 +1461,45 @@ void ExprSelect::eval(EvalState & state, Env & env, Value & v) showAttrPath(state, env, attrPath)) : nullptr; - for (auto const & currentAttrName : attrPath) { + for (auto const & [partIdx, currentAttrName] : enumerate(attrPath)) { state.nrLookups++; Symbol const name = getName(currentAttrName, state, env); - state.forceValue(*vCurrent, pos); + // For formatting errors, which should be done only when needed. + auto partsSoFar = [&]() -> std::string { + std::stringstream ss; + // We start with the base thing this ExprSelect is selecting on. + assert(this->e != nullptr); + this->e->show(state.symbols, ss); + // Then grab each part of the attr path up to this one. + assert(attrPath.size() < partIdx); + std::span const parts( + attrPath.begin(), + attrPath.begin() + partIdx + ); + // And convert them to strings and join them. + for (auto const & part : parts) { + auto const partName = getName(part, state, env); + assert(static_cast(partName)); + ss << "." << state.symbols[partName]; + } + + return ss.str(); + }; + + try { + state.forceValue(*vCurrent, pos); + } catch (Error & e) { + state.addErrorTrace( + e, + getPos(), + "while evaluating '%s' to select '%s' on it", + partsSoFar(), + state.symbols[name] + ); + throw; + } if (vCurrent->type() != nAttrs) { @@ -1482,7 +1515,10 @@ void ExprSelect::eval(EvalState & state, Env & env, Value & v) "expected a set but found %s: %s", showType(*vCurrent), ValuePrinter(state, *vCurrent, errorPrintOptions) - ).withTrace(pos, "while selecting an attribute").debugThrow(); + ).addTrace( + pos, + HintFmt("while selecting '%s' on '%s'", state.symbols[name], partsSoFar()) + ).debugThrow(); } // Now that we know this is actually an attrset, try to find an attr From cd7aa4ac326fb3f763cd87c1e86916d8a9f417ef Mon Sep 17 00:00:00 2001 From: Qyriad Date: Sun, 23 Jun 2024 11:05:41 -0600 Subject: [PATCH 13/13] distinguish between throws & errors during throw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns errors like this: let throwMsg = a: throw (a + " invalid bar"); in throwMsg "bullshit" error: … from call site at «string»:3:4: 2| throwMsg = a: throw (a + " invalid bar"); 3| in throwMsg "bullshit" | ^ … while calling 'throwMsg' at «string»:2:14: 1| let 2| throwMsg = a: throw (a + " invalid bar"); | ^ 3| in throwMsg "bullshit" … while calling the 'throw' builtin at «string»:2:17: 1| let 2| throwMsg = a: throw (a + " invalid bar"); | ^ 3| in throwMsg "bullshit" error: bullshit invalid bar into errors like this: let throwMsg = a: throw (a + " invalid bar"); in throwMsg "bullshit" error: … from call site at «string»:3:4: 2| throwMsg = a: throw (a + " invalid bar"); 3| in throwMsg "bullshit" | ^ … while calling 'throwMsg' at «string»:2:14: 1| let 2| throwMsg = a: throw (a + " invalid bar"); | ^ 3| in throwMsg "bullshit" … caused by explicit throw at «string»:2:17: 1| let 2| throwMsg = a: throw (a + " invalid bar"); | ^ 3| in throwMsg "bullshit" error: bullshit invalid bar Change-Id: I593688928ece20f97999d1bf03b2b46d9ac338cb --- src/libexpr/eval.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 8a9023fe8..471d1f3a4 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -1809,6 +1809,15 @@ void EvalState::callFunction(Value & fun, size_t nrArgs, Value * * args, Value & try { fn->fun(*this, vCur.determinePos(noPos), args, vCur); + } catch (ThrownError & e) { + // Distinguish between an error that simply happened while "throw" + // was being evaluated and an explicit thrown error. + if (fn->name == "throw") { + addErrorTrace(e, pos, "caused by explicit %s", "throw"); + } else { + addErrorTrace(e, pos, "while calling the '%s' builtin", fn->name); + } + throw; } catch (Error & e) { addErrorTrace(e, pos, "while calling the '%1%' builtin", fn->name); throw;