diff --git a/src/libexpr/flake/flakeref.cc b/src/libexpr/flake/flakeref.cc index 1c90bfc43..8668961fe 100644 --- a/src/libexpr/flake/flakeref.cc +++ b/src/libexpr/flake/flakeref.cc @@ -204,7 +204,13 @@ std::pair parseFlakeRefWithFragment( std::string fragment; std::swap(fragment, parsedURL.fragment); - auto input = Input::fromURL(parsedURL, isFlake); + // This has a special meaning for flakes and must not be passed to libfetchers. + // Of course this means that libfetchers cannot have fetchers + // expecting an argument `dir` 🫠 + ParsedURL urlForFetchers(parsedURL); + urlForFetchers.query.erase("dir"); + + auto input = Input::fromURL(urlForFetchers, isFlake); input.parent = baseDir; return std::make_pair( diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index 2bb4248be..40f2b6294 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -159,6 +159,37 @@ struct InputScheme std::optional commitMsg) const; virtual std::pair fetch(ref store, const Input & input) = 0; + +protected: + void emplaceURLQueryIntoAttrs( + const ParsedURL & parsedURL, + Attrs & attrs, + const StringSet & numericParams, + const StringSet & booleanParams) const + { + for (auto &[name, value] : parsedURL.query) { + if (name == "url") { + throw BadURL( + "URL '%s' must not override url via query param!", + parsedURL.to_string() + ); + } else if (numericParams.count(name) != 0) { + if (auto n = string2Int(value)) { + attrs.insert_or_assign(name, *n); + } else { + throw BadURL( + "URL '%s' has non-numeric parameter '%s'", + parsedURL.to_string(), + name + ); + } + } else if (booleanParams.count(name) != 0) { + attrs.emplace(name, Explicit { value == "1" }); + } else { + attrs.emplace(name, value); + } + } + } }; void registerInputScheme(std::shared_ptr && fetcher); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 2817fde23..0cb826075 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -273,18 +273,15 @@ struct GitInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "git"); - - for (auto & [name, value] : url.query) { - if (name == "rev" || name == "ref") - attrs.emplace(name, value); - else if (name == "shallow" || name == "submodules" || name == "allRefs") - attrs.emplace(name, Explicit { value == "1" }); - else - url2.query.emplace(name, value); - } - attrs.emplace("url", url2.to_string()); + emplaceURLQueryIntoAttrs( + url, + attrs, + {"lastModified", "revCount"}, + {"shallow", "submodules", "allRefs"} + ); + return inputFromAttrs(attrs); } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 60fefd1f3..b971781ae 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -1,3 +1,4 @@ +#include "attrs.hh" #include "filetransfer.hh" #include "cache.hh" #include "globals.hh" @@ -36,18 +37,11 @@ struct GitArchiveInputScheme : InputScheme auto path = tokenizeString>(url.path, "/"); - std::optional rev; - std::optional ref; - std::optional host_url; + std::optional refOrRev; auto size = path.size(); if (size == 3) { - if (std::regex_match(path[2], revRegex)) - rev = Hash::parseAny(path[2], htSHA1); - else if (std::regex_match(path[2], refRegex)) - ref = path[2]; - else - throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]); + refOrRev = path[2]; } else if (size > 3) { std::string rs; for (auto i = std::next(path.begin(), 2); i != path.end(); i++) { @@ -58,61 +52,91 @@ struct GitArchiveInputScheme : InputScheme } if (std::regex_match(rs, refRegex)) { - ref = rs; + refOrRev = rs; } else { throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs); } } else if (size < 2) throw BadURL("URL '%s' is invalid", url.url); + Attrs attrs; + attrs.emplace("type", type()); + attrs.emplace("owner", path[0]); + attrs.emplace("repo", path[1]); + for (auto &[name, value] : url.query) { - if (name == "rev") { - if (rev) - throw BadURL("URL '%s' contains multiple commit hashes", url.url); - rev = Hash::parseAny(value, htSHA1); + if (name == "rev" || name == "ref") { + if (refOrRev) { + throw BadURL("URL '%s' already contains a ref or rev", url.url); + } else { + refOrRev = value; + } + } else if (name == "lastModified") { + if (auto n = string2Int(value)) { + attrs.emplace(name, *n); + } else { + throw Error( + "Attribute 'lastModified' in URL '%s' must be an integer", + url.to_string() + ); + } + } else { + attrs.emplace(name, value); } - else if (name == "ref") { - if (!std::regex_match(value, refRegex)) - throw BadURL("URL '%s' contains an invalid branch/tag name", url.url); - if (ref) - throw BadURL("URL '%s' contains multiple branch/tag names", url.url); - ref = value; - } - else if (name == "host") { - if (!std::regex_match(value, hostRegex)) - throw BadURL("URL '%s' contains an invalid instance host", url.url); - host_url = value; - } - // FIXME: barf on unsupported attributes } - if (ref && rev) - throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev()); + if (refOrRev) attrs.emplace("refOrRev", *refOrRev); - Input input; - input.attrs.insert_or_assign("type", type()); - input.attrs.insert_or_assign("owner", path[0]); - input.attrs.insert_or_assign("repo", path[1]); - if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); - if (ref) input.attrs.insert_or_assign("ref", *ref); - if (host_url) input.attrs.insert_or_assign("host", *host_url); - - return input; + return inputFromAttrs(attrs); } std::optional inputFromAttrs(const Attrs & attrs) const override { - if (maybeGetStrAttr(attrs, "type") != type()) return {}; + // Attributes can contain refOrRev and it needs to be figured out + // which one it is (see inputFromURL for when that may happen). + // The correct one (ref or rev) will be written into finalAttrs and + // it needs to be mutable for that. + Attrs finalAttrs(attrs); + auto type_ = maybeGetStrAttr(finalAttrs, "type"); + if (type_ != type()) return {}; - for (auto & [name, value] : attrs) - if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") + auto owner = getStrAttr(finalAttrs, "owner"); + auto repo = getStrAttr(finalAttrs, "repo"); + + auto url = fmt("%s:%s/%s", *type_, owner, repo); + if (auto host = maybeGetStrAttr(finalAttrs, "host")) { + if (!std::regex_match(*host, hostRegex)) { + throw BadURL("URL '%s' contains an invalid instance host", url); + } + } + + if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) { + finalAttrs.erase("refOrRev"); + if (std::regex_match(*refOrRev, revRegex)) { + finalAttrs.emplace("rev", *refOrRev); + } else if (std::regex_match(*refOrRev, refRegex)) { + finalAttrs.emplace("ref", *refOrRev); + } else { + throw Error( + "in URL '%s', '%s' is not a commit hash or a branch/tag name", + url, + *refOrRev + ); + } + } else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) { + throw BadURL("URL '%s' contains an invalid branch/tag name", url); + } + } + + for (auto & [name, value] : finalAttrs) { + if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") { throw Error("unsupported input attribute '%s'", name); - - getStrAttr(attrs, "owner"); - getStrAttr(attrs, "repo"); + } + } Input input; - input.attrs = attrs; + input.attrs = finalAttrs; return input; } diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index c73505b31..8c0176e84 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -17,6 +17,8 @@ struct IndirectInputScheme : InputScheme std::optional rev; std::optional ref; + Attrs attrs; + if (path.size() == 1) { } else if (path.size() == 2) { if (std::regex_match(path[1], revRegex)) @@ -26,29 +28,21 @@ struct IndirectInputScheme : InputScheme else throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]); } else if (path.size() == 3) { - if (!std::regex_match(path[1], refRegex)) - throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]); ref = path[1]; - if (!std::regex_match(path[2], revRegex)) - throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]); rev = Hash::parseAny(path[2], htSHA1); } else throw BadURL("GitHub URL '%s' is invalid", url.url); std::string id = path[0]; - if (!std::regex_match(id, flakeRegex)) - throw BadURL("'%s' is not a valid flake ID", id); - // FIXME: forbid query params? + attrs.emplace("type", "indirect"); + attrs.emplace("id", id); + if (rev) attrs.emplace("rev", rev->gitRev()); + if (ref) attrs.emplace("ref", *ref); - Input input; - input.direct = false; - input.attrs.insert_or_assign("type", "indirect"); - input.attrs.insert_or_assign("id", id); - if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); - if (ref) input.attrs.insert_or_assign("ref", *ref); + emplaceURLQueryIntoAttrs(url, attrs, {}, {}); - return input; + return inputFromAttrs(attrs); } std::optional inputFromAttrs(const Attrs & attrs) const override @@ -63,6 +57,18 @@ struct IndirectInputScheme : InputScheme if (!std::regex_match(id, flakeRegex)) throw BadURL("'%s' is not a valid flake ID", id); + // TODO come up with a nicer error message for those two. + if (auto rev = maybeGetStrAttr(attrs, "rev")) { + if (!std::regex_match(*rev, revRegex)) { + throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev); + } + } + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) { + throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref); + } + } + Input input; input.direct = false; input.attrs = attrs; diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 4fffa71d3..6015f8ec7 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -57,12 +57,7 @@ struct MercurialInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "hg"); - for (auto &[name, value] : url.query) { - if (name == "rev" || name == "ref") - attrs.emplace(name, value); - else - url2.query.emplace(name, value); - } + emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); attrs.emplace("url", url2.to_string()); diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index c903895e2..8dfdecda8 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -201,29 +201,17 @@ struct CurlInputScheme : InputScheme if (!isValidURL(_url, requireTree)) return std::nullopt; - Input input; - auto url = _url; + Attrs attrs; + attrs.emplace("type", inputType()); + url.scheme = parseUrlScheme(url.scheme).transport; - auto narHash = url.query.find("narHash"); - if (narHash != url.query.end()) - input.attrs.insert_or_assign("narHash", narHash->second); + emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); - if (auto i = get(url.query, "rev")) - input.attrs.insert_or_assign("rev", *i); - - if (auto i = get(url.query, "revCount")) - if (auto n = string2Int(*i)) - input.attrs.insert_or_assign("revCount", *n); - - url.query.erase("rev"); - url.query.erase("revCount"); - - input.attrs.insert_or_assign("type", inputType()); - input.attrs.insert_or_assign("url", url.to_string()); - return input; + attrs.emplace("url", url.to_string()); + return inputFromAttrs(attrs); } std::optional inputFromAttrs(const Attrs & attrs) const override @@ -235,7 +223,7 @@ struct CurlInputScheme : InputScheme std::set allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"}; for (auto & [name, value] : attrs) if (!allowedNames.count(name)) - throw Error("unsupported %s input attribute '%s'", *type, name); + throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name); Input input; input.attrs = attrs; diff --git a/tests/functional/fetchers.sh b/tests/functional/fetchers.sh new file mode 100644 index 000000000..0f888dc33 --- /dev/null +++ b/tests/functional/fetchers.sh @@ -0,0 +1,91 @@ +source common.sh + +requireGit + +clearStore + +testFetchTreeError() { + rawFetchTreeArg="${1?fetchTree arg missing}" + messageSubstring="${2?messageSubstring missing}" + + output="$(nix eval --impure --raw --expr "(builtins.fetchTree $rawFetchTreeArg).outPath" 2>&1)" && status=0 || status=$? + grepQuiet "$messageSubstring" <<<"$output" + test "$status" -ne 0 +} + +# github/gitlab/sourcehut fetcher input validation +for provider in github gitlab sourcehut; do + # ref/rev validation + testFetchTreeError \ + "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; ref = \",\"; }" \ + "URL '$provider:foo/bar' contains an invalid branch/tag name" + + testFetchTreeError \ + "\"$provider://host/foo/bar/,\"" \ + "URL '$provider:foo/bar', ',' is not a commit hash or a branch/tag name" + + testFetchTreeError \ + "\"$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc\"" \ + "URL '$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc' already contains a ref or rev" + + testFetchTreeError \ + "\"$provider://host/foo/bar/ref?ref=ref2\"" \ + "URL '$provider://host/foo/bar/ref?ref=ref2' already contains a ref or rev" + + # host validation + testFetchTreeError \ + "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; host = \"git_hub.com\"; }" \ + "URL '$provider:foo/bar' contains an invalid instance host" + + testFetchTreeError \ + "\"$provider://host/foo/bar/ref?host=git_hub.com\"" \ + "URL '$provider:foo/bar' contains an invalid instance host" + + # invalid attributes + testFetchTreeError \ + "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; wrong = true; }" \ + "unsupported input attribute 'wrong'" + + testFetchTreeError \ + "\"$provider://host/foo/bar/ref?wrong=1\"" \ + "unsupported input attribute 'wrong'" +done + +# unsupported attributes w/ tarball fetcher +testFetchTreeError \ + "\"https://host/foo?wrong=1\"" \ + "unsupported tarball input attribute 'wrong'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }" + +# test for unsupported attributes / validation in git fetcher +testFetchTreeError \ + "\"git+https://github.com/owner/repo?invalid=1\"" \ + "unsupported Git input attribute 'invalid'" + +testFetchTreeError \ + "\"git+https://github.com/owner/repo?url=foo\"" \ + "URL 'git+https://github.com/owner/repo?url=foo' must not override url via query param!" + +testFetchTreeError \ + "\"git+https://github.com/owner/repo?ref=foo.lock\"" \ + "invalid Git branch/tag name 'foo.lock'" + +testFetchTreeError \ + "{ type = \"git\"; url =\"https://github.com/owner/repo\"; ref = \"foo.lock\"; }" \ + "invalid Git branch/tag name 'foo.lock'" + +# same for mercurial +testFetchTreeError \ + "\"hg+https://forge.tld/owner/repo?invalid=1\"" \ + "unsupported Mercurial input attribute 'invalid'" + +testFetchTreeError \ + "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; invalid = 1; }" \ + "unsupported Mercurial input attribute 'invalid'" + +testFetchTreeError \ + "\"hg+https://forge.tld/owner/repo?ref=,\"" \ + "invalid Mercurial branch/tag name ','" + +testFetchTreeError \ + "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; ref = \",\"; }" \ + "invalid Mercurial branch/tag name ','" diff --git a/tests/functional/meson.build b/tests/functional/meson.build index eede1834c..f1c97f996 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -93,6 +93,7 @@ functional_tests_scripts = [ 'fetchGitRefs.sh', 'gc-runtime.sh', 'tarball.sh', + 'fetchers.sh', 'fetchGit.sh', 'fetchurl.sh', 'fetchPath.sh', diff --git a/tests/nixos/tarball-flakes.nix b/tests/nixos/tarball-flakes.nix index ca7627bd1..5deba4a12 100644 --- a/tests/nixos/tarball-flakes.nix +++ b/tests/nixos/tarball-flakes.nix @@ -69,7 +69,7 @@ in # Check that we got redirected to the immutable URL. locked_url = info["locked"]["url"] - assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" + assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" # Check that we got the rev and revCount attributes. revision = info["revision"]