From 87fd1f024c7c979e5d96c41af4ef7e8bdb5792e1 Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Tue, 25 Jun 2024 12:31:52 +0200 Subject: [PATCH] Reapply "libfetchers: make attribute / URL query handling consistent" The original attempt at this introduced a regression; this commit reverts the revert and fixes the regression. This reverts commit 3e151d4d77b5296b9da8c3ad209932d1dfa44c68. Fix to the regression: flakeref: fix handling of `?dir=` param for flakes in subdirs As reported in #419[1], accessing a flake in a subdir of a Git repository fails with the previous commit[2] applied with the error error: unsupported Git input attribute 'dir' The problem is that the `dir`-param is inserted into the parsed URL if a flake is fetched from the subdir of a Git repository. However, for the fetching part this isn't even needed. The fix is to just pass `subdir` as second argument to `FlakeRef` (which needs a `basedir` that can be empty) and leave the parsedURL as-is. Added a regression test to make sure we don't run into this again. [1] https://git.lix.systems/lix-project/lix/issues/419 [2] e22172aaf6b6a366cecd3c025590e68fa2b91bcc, originally 3e151d4d77b5296b9da8c3ad209932d1dfa44c68 Change-Id: I2c72d5a32e406a7ca308e271730bd0af01c5d18b --- src/libexpr/flake/flakeref.cc | 11 ++- src/libfetchers/fetchers.hh | 31 +++++++ src/libfetchers/git.cc | 17 ++-- src/libfetchers/github.cc | 114 ++++++++++++++---------- src/libfetchers/indirect.cc | 34 ++++--- src/libfetchers/mercurial.cc | 7 +- src/libfetchers/tarball.cc | 26 ++---- tests/functional/fetchers.sh | 91 +++++++++++++++++++ tests/functional/flakes/subdir-flake.sh | 20 +++++ tests/functional/meson.build | 2 + tests/nixos/tarball-flakes.nix | 2 +- 11 files changed, 257 insertions(+), 98 deletions(-) create mode 100644 tests/functional/fetchers.sh create mode 100644 tests/functional/flakes/subdir-flake.sh diff --git a/src/libexpr/flake/flakeref.cc b/src/libexpr/flake/flakeref.cc index a95df04ba..3be4ea550 100644 --- a/src/libexpr/flake/flakeref.cc +++ b/src/libexpr/flake/flakeref.cc @@ -169,14 +169,13 @@ std::pair parseFlakeRefWithFragment( if (subdir != "") { if (parsedURL.query.count("dir")) throw Error("flake URL '%s' has an inconsistent 'dir' parameter", url); - parsedURL.query.insert_or_assign("dir", subdir); } if (pathExists(flakeRoot + "/.git/shallow")) parsedURL.query.insert_or_assign("shallow", "1"); return std::make_pair( - FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), + FlakeRef(Input::fromURL(parsedURL, isFlake), subdir), fragment); } @@ -204,7 +203,13 @@ std::pair parseFlakeRefWithFragment( std::string fragment; std::swap(fragment, parsedURL.fragment); - auto input = Input::fromURL(parsedURL, isFlake); + // This has a special meaning for flakes and must not be passed to libfetchers. + // Of course this means that libfetchers cannot have fetchers + // expecting an argument `dir` 🫠 + ParsedURL urlForFetchers(parsedURL); + urlForFetchers.query.erase("dir"); + + auto input = Input::fromURL(urlForFetchers, isFlake); input.parent = baseDir; return std::make_pair( diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index 2bb4248be..40f2b6294 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -159,6 +159,37 @@ struct InputScheme std::optional commitMsg) const; virtual std::pair fetch(ref store, const Input & input) = 0; + +protected: + void emplaceURLQueryIntoAttrs( + const ParsedURL & parsedURL, + Attrs & attrs, + const StringSet & numericParams, + const StringSet & booleanParams) const + { + for (auto &[name, value] : parsedURL.query) { + if (name == "url") { + throw BadURL( + "URL '%s' must not override url via query param!", + parsedURL.to_string() + ); + } else if (numericParams.count(name) != 0) { + if (auto n = string2Int(value)) { + attrs.insert_or_assign(name, *n); + } else { + throw BadURL( + "URL '%s' has non-numeric parameter '%s'", + parsedURL.to_string(), + name + ); + } + } else if (booleanParams.count(name) != 0) { + attrs.emplace(name, Explicit { value == "1" }); + } else { + attrs.emplace(name, value); + } + } + } }; void registerInputScheme(std::shared_ptr && fetcher); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 9fd8d7bbf..8e3165ff6 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -273,18 +273,15 @@ struct GitInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "git"); - - for (auto & [name, value] : url.query) { - if (name == "rev" || name == "ref") - attrs.emplace(name, value); - else if (name == "shallow" || name == "submodules" || name == "allRefs") - attrs.emplace(name, Explicit { value == "1" }); - else - url2.query.emplace(name, value); - } - attrs.emplace("url", url2.to_string()); + emplaceURLQueryIntoAttrs( + url, + attrs, + {"lastModified", "revCount"}, + {"shallow", "submodules", "allRefs"} + ); + return inputFromAttrs(attrs); } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 60fefd1f3..b971781ae 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -1,3 +1,4 @@ +#include "attrs.hh" #include "filetransfer.hh" #include "cache.hh" #include "globals.hh" @@ -36,18 +37,11 @@ struct GitArchiveInputScheme : InputScheme auto path = tokenizeString>(url.path, "/"); - std::optional rev; - std::optional ref; - std::optional host_url; + std::optional refOrRev; auto size = path.size(); if (size == 3) { - if (std::regex_match(path[2], revRegex)) - rev = Hash::parseAny(path[2], htSHA1); - else if (std::regex_match(path[2], refRegex)) - ref = path[2]; - else - throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]); + refOrRev = path[2]; } else if (size > 3) { std::string rs; for (auto i = std::next(path.begin(), 2); i != path.end(); i++) { @@ -58,61 +52,91 @@ struct GitArchiveInputScheme : InputScheme } if (std::regex_match(rs, refRegex)) { - ref = rs; + refOrRev = rs; } else { throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs); } } else if (size < 2) throw BadURL("URL '%s' is invalid", url.url); + Attrs attrs; + attrs.emplace("type", type()); + attrs.emplace("owner", path[0]); + attrs.emplace("repo", path[1]); + for (auto &[name, value] : url.query) { - if (name == "rev") { - if (rev) - throw BadURL("URL '%s' contains multiple commit hashes", url.url); - rev = Hash::parseAny(value, htSHA1); + if (name == "rev" || name == "ref") { + if (refOrRev) { + throw BadURL("URL '%s' already contains a ref or rev", url.url); + } else { + refOrRev = value; + } + } else if (name == "lastModified") { + if (auto n = string2Int(value)) { + attrs.emplace(name, *n); + } else { + throw Error( + "Attribute 'lastModified' in URL '%s' must be an integer", + url.to_string() + ); + } + } else { + attrs.emplace(name, value); } - else if (name == "ref") { - if (!std::regex_match(value, refRegex)) - throw BadURL("URL '%s' contains an invalid branch/tag name", url.url); - if (ref) - throw BadURL("URL '%s' contains multiple branch/tag names", url.url); - ref = value; - } - else if (name == "host") { - if (!std::regex_match(value, hostRegex)) - throw BadURL("URL '%s' contains an invalid instance host", url.url); - host_url = value; - } - // FIXME: barf on unsupported attributes } - if (ref && rev) - throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev()); + if (refOrRev) attrs.emplace("refOrRev", *refOrRev); - Input input; - input.attrs.insert_or_assign("type", type()); - input.attrs.insert_or_assign("owner", path[0]); - input.attrs.insert_or_assign("repo", path[1]); - if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); - if (ref) input.attrs.insert_or_assign("ref", *ref); - if (host_url) input.attrs.insert_or_assign("host", *host_url); - - return input; + return inputFromAttrs(attrs); } std::optional inputFromAttrs(const Attrs & attrs) const override { - if (maybeGetStrAttr(attrs, "type") != type()) return {}; + // Attributes can contain refOrRev and it needs to be figured out + // which one it is (see inputFromURL for when that may happen). + // The correct one (ref or rev) will be written into finalAttrs and + // it needs to be mutable for that. + Attrs finalAttrs(attrs); + auto type_ = maybeGetStrAttr(finalAttrs, "type"); + if (type_ != type()) return {}; - for (auto & [name, value] : attrs) - if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") + auto owner = getStrAttr(finalAttrs, "owner"); + auto repo = getStrAttr(finalAttrs, "repo"); + + auto url = fmt("%s:%s/%s", *type_, owner, repo); + if (auto host = maybeGetStrAttr(finalAttrs, "host")) { + if (!std::regex_match(*host, hostRegex)) { + throw BadURL("URL '%s' contains an invalid instance host", url); + } + } + + if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) { + finalAttrs.erase("refOrRev"); + if (std::regex_match(*refOrRev, revRegex)) { + finalAttrs.emplace("rev", *refOrRev); + } else if (std::regex_match(*refOrRev, refRegex)) { + finalAttrs.emplace("ref", *refOrRev); + } else { + throw Error( + "in URL '%s', '%s' is not a commit hash or a branch/tag name", + url, + *refOrRev + ); + } + } else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) { + throw BadURL("URL '%s' contains an invalid branch/tag name", url); + } + } + + for (auto & [name, value] : finalAttrs) { + if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") { throw Error("unsupported input attribute '%s'", name); - - getStrAttr(attrs, "owner"); - getStrAttr(attrs, "repo"); + } + } Input input; - input.attrs = attrs; + input.attrs = finalAttrs; return input; } diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index c73505b31..8c0176e84 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -17,6 +17,8 @@ struct IndirectInputScheme : InputScheme std::optional rev; std::optional ref; + Attrs attrs; + if (path.size() == 1) { } else if (path.size() == 2) { if (std::regex_match(path[1], revRegex)) @@ -26,29 +28,21 @@ struct IndirectInputScheme : InputScheme else throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]); } else if (path.size() == 3) { - if (!std::regex_match(path[1], refRegex)) - throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]); ref = path[1]; - if (!std::regex_match(path[2], revRegex)) - throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]); rev = Hash::parseAny(path[2], htSHA1); } else throw BadURL("GitHub URL '%s' is invalid", url.url); std::string id = path[0]; - if (!std::regex_match(id, flakeRegex)) - throw BadURL("'%s' is not a valid flake ID", id); - // FIXME: forbid query params? + attrs.emplace("type", "indirect"); + attrs.emplace("id", id); + if (rev) attrs.emplace("rev", rev->gitRev()); + if (ref) attrs.emplace("ref", *ref); - Input input; - input.direct = false; - input.attrs.insert_or_assign("type", "indirect"); - input.attrs.insert_or_assign("id", id); - if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); - if (ref) input.attrs.insert_or_assign("ref", *ref); + emplaceURLQueryIntoAttrs(url, attrs, {}, {}); - return input; + return inputFromAttrs(attrs); } std::optional inputFromAttrs(const Attrs & attrs) const override @@ -63,6 +57,18 @@ struct IndirectInputScheme : InputScheme if (!std::regex_match(id, flakeRegex)) throw BadURL("'%s' is not a valid flake ID", id); + // TODO come up with a nicer error message for those two. + if (auto rev = maybeGetStrAttr(attrs, "rev")) { + if (!std::regex_match(*rev, revRegex)) { + throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev); + } + } + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) { + throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref); + } + } + Input input; input.direct = false; input.attrs = attrs; diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 23cf7b51d..b4150e9df 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -56,12 +56,7 @@ struct MercurialInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "hg"); - for (auto &[name, value] : url.query) { - if (name == "rev" || name == "ref") - attrs.emplace(name, value); - else - url2.query.emplace(name, value); - } + emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); attrs.emplace("url", url2.to_string()); diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 6ce35aeb2..b11665805 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -201,29 +201,17 @@ struct CurlInputScheme : InputScheme if (!isValidURL(_url, requireTree)) return std::nullopt; - Input input; - auto url = _url; + Attrs attrs; + attrs.emplace("type", inputType()); + url.scheme = parseUrlScheme(url.scheme).transport; - auto narHash = url.query.find("narHash"); - if (narHash != url.query.end()) - input.attrs.insert_or_assign("narHash", narHash->second); + emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); - if (auto i = get(url.query, "rev")) - input.attrs.insert_or_assign("rev", *i); - - if (auto i = get(url.query, "revCount")) - if (auto n = string2Int(*i)) - input.attrs.insert_or_assign("revCount", *n); - - url.query.erase("rev"); - url.query.erase("revCount"); - - input.attrs.insert_or_assign("type", inputType()); - input.attrs.insert_or_assign("url", url.to_string()); - return input; + attrs.emplace("url", url.to_string()); + return inputFromAttrs(attrs); } std::optional inputFromAttrs(const Attrs & attrs) const override @@ -235,7 +223,7 @@ struct CurlInputScheme : InputScheme std::set allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"}; for (auto & [name, value] : attrs) if (!allowedNames.count(name)) - throw Error("unsupported %s input attribute '%s'", *type, name); + throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name); Input input; input.attrs = attrs; diff --git a/tests/functional/fetchers.sh b/tests/functional/fetchers.sh new file mode 100644 index 000000000..0f888dc33 --- /dev/null +++ b/tests/functional/fetchers.sh @@ -0,0 +1,91 @@ +source common.sh + +requireGit + +clearStore + +testFetchTreeError() { + rawFetchTreeArg="${1?fetchTree arg missing}" + messageSubstring="${2?messageSubstring missing}" + + output="$(nix eval --impure --raw --expr "(builtins.fetchTree $rawFetchTreeArg).outPath" 2>&1)" && status=0 || status=$? + grepQuiet "$messageSubstring" <<<"$output" + test "$status" -ne 0 +} + +# github/gitlab/sourcehut fetcher input validation +for provider in github gitlab sourcehut; do + # ref/rev validation + testFetchTreeError \ + "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; ref = \",\"; }" \ + "URL '$provider:foo/bar' contains an invalid branch/tag name" + + testFetchTreeError \ + "\"$provider://host/foo/bar/,\"" \ + "URL '$provider:foo/bar', ',' is not a commit hash or a branch/tag name" + + testFetchTreeError \ + "\"$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc\"" \ + "URL '$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc' already contains a ref or rev" + + testFetchTreeError \ + "\"$provider://host/foo/bar/ref?ref=ref2\"" \ + "URL '$provider://host/foo/bar/ref?ref=ref2' already contains a ref or rev" + + # host validation + testFetchTreeError \ + "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; host = \"git_hub.com\"; }" \ + "URL '$provider:foo/bar' contains an invalid instance host" + + testFetchTreeError \ + "\"$provider://host/foo/bar/ref?host=git_hub.com\"" \ + "URL '$provider:foo/bar' contains an invalid instance host" + + # invalid attributes + testFetchTreeError \ + "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; wrong = true; }" \ + "unsupported input attribute 'wrong'" + + testFetchTreeError \ + "\"$provider://host/foo/bar/ref?wrong=1\"" \ + "unsupported input attribute 'wrong'" +done + +# unsupported attributes w/ tarball fetcher +testFetchTreeError \ + "\"https://host/foo?wrong=1\"" \ + "unsupported tarball input attribute 'wrong'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }" + +# test for unsupported attributes / validation in git fetcher +testFetchTreeError \ + "\"git+https://github.com/owner/repo?invalid=1\"" \ + "unsupported Git input attribute 'invalid'" + +testFetchTreeError \ + "\"git+https://github.com/owner/repo?url=foo\"" \ + "URL 'git+https://github.com/owner/repo?url=foo' must not override url via query param!" + +testFetchTreeError \ + "\"git+https://github.com/owner/repo?ref=foo.lock\"" \ + "invalid Git branch/tag name 'foo.lock'" + +testFetchTreeError \ + "{ type = \"git\"; url =\"https://github.com/owner/repo\"; ref = \"foo.lock\"; }" \ + "invalid Git branch/tag name 'foo.lock'" + +# same for mercurial +testFetchTreeError \ + "\"hg+https://forge.tld/owner/repo?invalid=1\"" \ + "unsupported Mercurial input attribute 'invalid'" + +testFetchTreeError \ + "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; invalid = 1; }" \ + "unsupported Mercurial input attribute 'invalid'" + +testFetchTreeError \ + "\"hg+https://forge.tld/owner/repo?ref=,\"" \ + "invalid Mercurial branch/tag name ','" + +testFetchTreeError \ + "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; ref = \",\"; }" \ + "invalid Mercurial branch/tag name ','" diff --git a/tests/functional/flakes/subdir-flake.sh b/tests/functional/flakes/subdir-flake.sh new file mode 100644 index 000000000..399518502 --- /dev/null +++ b/tests/functional/flakes/subdir-flake.sh @@ -0,0 +1,20 @@ +source common.sh +requireGit +clearStore + +container="$TEST_HOME"/flake-container +flake_dir="$container"/flake-dir + +createGitRepo "$container" +mkdir -p "$flake_dir" +writeSimpleFlake "$flake_dir" +git -C "$container" add flake-dir + +pushd "$flake_dir" &>/dev/null + info="$(nix flake info --json)" + [[ "$(jq -r '.resolvedUrl' <<<"$info")" == git+file://*/flake-container?dir=flake-dir ]] + [[ "$(jq -r '.url' <<<"$info")" == git+file://*/flake-container?dir=flake-dir ]] + + # Make sure we can actually access & build stuff in this flake. + nix build "path:$flake_dir#foo" -L +popd &>/dev/null diff --git a/tests/functional/meson.build b/tests/functional/meson.build index cbf6a1563..7a9c7182f 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -71,6 +71,7 @@ functional_tests_scripts = [ 'flakes/build-paths.sh', 'flakes/flake-registry.sh', 'flakes/flake-in-submodule.sh', + 'flakes/subdir-flake.sh', 'gc.sh', 'nix-collect-garbage-d.sh', 'nix-collect-garbage-dry-run.sh', @@ -94,6 +95,7 @@ functional_tests_scripts = [ 'fetchGitRefs.sh', 'gc-runtime.sh', 'tarball.sh', + 'fetchers.sh', 'fetchGit.sh', 'fetchurl.sh', 'fetchPath.sh', diff --git a/tests/nixos/tarball-flakes.nix b/tests/nixos/tarball-flakes.nix index ca7627bd1..5deba4a12 100644 --- a/tests/nixos/tarball-flakes.nix +++ b/tests/nixos/tarball-flakes.nix @@ -69,7 +69,7 @@ in # Check that we got redirected to the immutable URL. locked_url = info["locked"]["url"] - assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" + assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" # Check that we got the rev and revCount attributes. revision = info["revision"]