diff --git a/src/libexpr/flake/flakeref.cc b/src/libexpr/flake/flakeref.cc index 8668961fe..1c90bfc43 100644 --- a/src/libexpr/flake/flakeref.cc +++ b/src/libexpr/flake/flakeref.cc @@ -204,13 +204,7 @@ std::pair parseFlakeRefWithFragment( std::string fragment; std::swap(fragment, parsedURL.fragment); - // This has a special meaning for flakes and must not be passed to libfetchers. - // Of course this means that libfetchers cannot have fetchers - // expecting an argument `dir` 🫠 - ParsedURL urlForFetchers(parsedURL); - urlForFetchers.query.erase("dir"); - - auto input = Input::fromURL(urlForFetchers, isFlake); + auto input = Input::fromURL(parsedURL, isFlake); input.parent = baseDir; return std::make_pair( diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index 40f2b6294..2bb4248be 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -159,37 +159,6 @@ struct InputScheme std::optional commitMsg) const; virtual std::pair fetch(ref store, const Input & input) = 0; - -protected: - void emplaceURLQueryIntoAttrs( - const ParsedURL & parsedURL, - Attrs & attrs, - const StringSet & numericParams, - const StringSet & booleanParams) const - { - for (auto &[name, value] : parsedURL.query) { - if (name == "url") { - throw BadURL( - "URL '%s' must not override url via query param!", - parsedURL.to_string() - ); - } else if (numericParams.count(name) != 0) { - if (auto n = string2Int(value)) { - attrs.insert_or_assign(name, *n); - } else { - throw BadURL( - "URL '%s' has non-numeric parameter '%s'", - parsedURL.to_string(), - name - ); - } - } else if (booleanParams.count(name) != 0) { - attrs.emplace(name, Explicit { value == "1" }); - } else { - attrs.emplace(name, value); - } - } - } }; void registerInputScheme(std::shared_ptr && fetcher); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 0cb826075..2817fde23 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -273,14 +273,17 @@ struct GitInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "git"); - attrs.emplace("url", url2.to_string()); - emplaceURLQueryIntoAttrs( - url, - attrs, - {"lastModified", "revCount"}, - {"shallow", "submodules", "allRefs"} - ); + for (auto & [name, value] : url.query) { + if (name == "rev" || name == "ref") + attrs.emplace(name, value); + else if (name == "shallow" || name == "submodules" || name == "allRefs") + attrs.emplace(name, Explicit { value == "1" }); + else + url2.query.emplace(name, value); + } + + attrs.emplace("url", url2.to_string()); return inputFromAttrs(attrs); } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index b971781ae..60fefd1f3 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -1,4 +1,3 @@ -#include "attrs.hh" #include "filetransfer.hh" #include "cache.hh" #include "globals.hh" @@ -37,11 +36,18 @@ struct GitArchiveInputScheme : InputScheme auto path = tokenizeString>(url.path, "/"); - std::optional refOrRev; + std::optional rev; + std::optional ref; + std::optional host_url; auto size = path.size(); if (size == 3) { - refOrRev = path[2]; + if (std::regex_match(path[2], revRegex)) + rev = Hash::parseAny(path[2], htSHA1); + else if (std::regex_match(path[2], refRegex)) + ref = path[2]; + else + throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]); } else if (size > 3) { std::string rs; for (auto i = std::next(path.begin(), 2); i != path.end(); i++) { @@ -52,91 +58,61 @@ struct GitArchiveInputScheme : InputScheme } if (std::regex_match(rs, refRegex)) { - refOrRev = rs; + ref = rs; } else { throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs); } } else if (size < 2) throw BadURL("URL '%s' is invalid", url.url); - Attrs attrs; - attrs.emplace("type", type()); - attrs.emplace("owner", path[0]); - attrs.emplace("repo", path[1]); - for (auto &[name, value] : url.query) { - if (name == "rev" || name == "ref") { - if (refOrRev) { - throw BadURL("URL '%s' already contains a ref or rev", url.url); - } else { - refOrRev = value; - } - } else if (name == "lastModified") { - if (auto n = string2Int(value)) { - attrs.emplace(name, *n); - } else { - throw Error( - "Attribute 'lastModified' in URL '%s' must be an integer", - url.to_string() - ); - } - } else { - attrs.emplace(name, value); + if (name == "rev") { + if (rev) + throw BadURL("URL '%s' contains multiple commit hashes", url.url); + rev = Hash::parseAny(value, htSHA1); } + else if (name == "ref") { + if (!std::regex_match(value, refRegex)) + throw BadURL("URL '%s' contains an invalid branch/tag name", url.url); + if (ref) + throw BadURL("URL '%s' contains multiple branch/tag names", url.url); + ref = value; + } + else if (name == "host") { + if (!std::regex_match(value, hostRegex)) + throw BadURL("URL '%s' contains an invalid instance host", url.url); + host_url = value; + } + // FIXME: barf on unsupported attributes } - if (refOrRev) attrs.emplace("refOrRev", *refOrRev); + if (ref && rev) + throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev()); - return inputFromAttrs(attrs); + Input input; + input.attrs.insert_or_assign("type", type()); + input.attrs.insert_or_assign("owner", path[0]); + input.attrs.insert_or_assign("repo", path[1]); + if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); + if (ref) input.attrs.insert_or_assign("ref", *ref); + if (host_url) input.attrs.insert_or_assign("host", *host_url); + + return input; } std::optional inputFromAttrs(const Attrs & attrs) const override { - // Attributes can contain refOrRev and it needs to be figured out - // which one it is (see inputFromURL for when that may happen). - // The correct one (ref or rev) will be written into finalAttrs and - // it needs to be mutable for that. - Attrs finalAttrs(attrs); - auto type_ = maybeGetStrAttr(finalAttrs, "type"); - if (type_ != type()) return {}; + if (maybeGetStrAttr(attrs, "type") != type()) return {}; - auto owner = getStrAttr(finalAttrs, "owner"); - auto repo = getStrAttr(finalAttrs, "repo"); - - auto url = fmt("%s:%s/%s", *type_, owner, repo); - if (auto host = maybeGetStrAttr(finalAttrs, "host")) { - if (!std::regex_match(*host, hostRegex)) { - throw BadURL("URL '%s' contains an invalid instance host", url); - } - } - - if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) { - finalAttrs.erase("refOrRev"); - if (std::regex_match(*refOrRev, revRegex)) { - finalAttrs.emplace("rev", *refOrRev); - } else if (std::regex_match(*refOrRev, refRegex)) { - finalAttrs.emplace("ref", *refOrRev); - } else { - throw Error( - "in URL '%s', '%s' is not a commit hash or a branch/tag name", - url, - *refOrRev - ); - } - } else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) { - if (!std::regex_match(*ref, refRegex)) { - throw BadURL("URL '%s' contains an invalid branch/tag name", url); - } - } - - for (auto & [name, value] : finalAttrs) { - if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") { + for (auto & [name, value] : attrs) + if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") throw Error("unsupported input attribute '%s'", name); - } - } + + getStrAttr(attrs, "owner"); + getStrAttr(attrs, "repo"); Input input; - input.attrs = finalAttrs; + input.attrs = attrs; return input; } diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index 8c0176e84..c73505b31 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -17,8 +17,6 @@ struct IndirectInputScheme : InputScheme std::optional rev; std::optional ref; - Attrs attrs; - if (path.size() == 1) { } else if (path.size() == 2) { if (std::regex_match(path[1], revRegex)) @@ -28,21 +26,29 @@ struct IndirectInputScheme : InputScheme else throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]); } else if (path.size() == 3) { + if (!std::regex_match(path[1], refRegex)) + throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]); ref = path[1]; + if (!std::regex_match(path[2], revRegex)) + throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]); rev = Hash::parseAny(path[2], htSHA1); } else throw BadURL("GitHub URL '%s' is invalid", url.url); std::string id = path[0]; + if (!std::regex_match(id, flakeRegex)) + throw BadURL("'%s' is not a valid flake ID", id); - attrs.emplace("type", "indirect"); - attrs.emplace("id", id); - if (rev) attrs.emplace("rev", rev->gitRev()); - if (ref) attrs.emplace("ref", *ref); + // FIXME: forbid query params? - emplaceURLQueryIntoAttrs(url, attrs, {}, {}); + Input input; + input.direct = false; + input.attrs.insert_or_assign("type", "indirect"); + input.attrs.insert_or_assign("id", id); + if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); + if (ref) input.attrs.insert_or_assign("ref", *ref); - return inputFromAttrs(attrs); + return input; } std::optional inputFromAttrs(const Attrs & attrs) const override @@ -57,18 +63,6 @@ struct IndirectInputScheme : InputScheme if (!std::regex_match(id, flakeRegex)) throw BadURL("'%s' is not a valid flake ID", id); - // TODO come up with a nicer error message for those two. - if (auto rev = maybeGetStrAttr(attrs, "rev")) { - if (!std::regex_match(*rev, revRegex)) { - throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev); - } - } - if (auto ref = maybeGetStrAttr(attrs, "ref")) { - if (!std::regex_match(*ref, refRegex)) { - throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref); - } - } - Input input; input.direct = false; input.attrs = attrs; diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 6015f8ec7..4fffa71d3 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -57,7 +57,12 @@ struct MercurialInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "hg"); - emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); + for (auto &[name, value] : url.query) { + if (name == "rev" || name == "ref") + attrs.emplace(name, value); + else + url2.query.emplace(name, value); + } attrs.emplace("url", url2.to_string()); diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 8dfdecda8..c903895e2 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -201,17 +201,29 @@ struct CurlInputScheme : InputScheme if (!isValidURL(_url, requireTree)) return std::nullopt; - auto url = _url; + Input input; - Attrs attrs; - attrs.emplace("type", inputType()); + auto url = _url; url.scheme = parseUrlScheme(url.scheme).transport; - emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); + auto narHash = url.query.find("narHash"); + if (narHash != url.query.end()) + input.attrs.insert_or_assign("narHash", narHash->second); - attrs.emplace("url", url.to_string()); - return inputFromAttrs(attrs); + if (auto i = get(url.query, "rev")) + input.attrs.insert_or_assign("rev", *i); + + if (auto i = get(url.query, "revCount")) + if (auto n = string2Int(*i)) + input.attrs.insert_or_assign("revCount", *n); + + url.query.erase("rev"); + url.query.erase("revCount"); + + input.attrs.insert_or_assign("type", inputType()); + input.attrs.insert_or_assign("url", url.to_string()); + return input; } std::optional inputFromAttrs(const Attrs & attrs) const override @@ -223,7 +235,7 @@ struct CurlInputScheme : InputScheme std::set allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"}; for (auto & [name, value] : attrs) if (!allowedNames.count(name)) - throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name); + throw Error("unsupported %s input attribute '%s'", *type, name); Input input; input.attrs = attrs; diff --git a/tests/functional/fetchers.sh b/tests/functional/fetchers.sh deleted file mode 100644 index 0f888dc33..000000000 --- a/tests/functional/fetchers.sh +++ /dev/null @@ -1,91 +0,0 @@ -source common.sh - -requireGit - -clearStore - -testFetchTreeError() { - rawFetchTreeArg="${1?fetchTree arg missing}" - messageSubstring="${2?messageSubstring missing}" - - output="$(nix eval --impure --raw --expr "(builtins.fetchTree $rawFetchTreeArg).outPath" 2>&1)" && status=0 || status=$? - grepQuiet "$messageSubstring" <<<"$output" - test "$status" -ne 0 -} - -# github/gitlab/sourcehut fetcher input validation -for provider in github gitlab sourcehut; do - # ref/rev validation - testFetchTreeError \ - "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; ref = \",\"; }" \ - "URL '$provider:foo/bar' contains an invalid branch/tag name" - - testFetchTreeError \ - "\"$provider://host/foo/bar/,\"" \ - "URL '$provider:foo/bar', ',' is not a commit hash or a branch/tag name" - - testFetchTreeError \ - "\"$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc\"" \ - "URL '$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc' already contains a ref or rev" - - testFetchTreeError \ - "\"$provider://host/foo/bar/ref?ref=ref2\"" \ - "URL '$provider://host/foo/bar/ref?ref=ref2' already contains a ref or rev" - - # host validation - testFetchTreeError \ - "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; host = \"git_hub.com\"; }" \ - "URL '$provider:foo/bar' contains an invalid instance host" - - testFetchTreeError \ - "\"$provider://host/foo/bar/ref?host=git_hub.com\"" \ - "URL '$provider:foo/bar' contains an invalid instance host" - - # invalid attributes - testFetchTreeError \ - "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; wrong = true; }" \ - "unsupported input attribute 'wrong'" - - testFetchTreeError \ - "\"$provider://host/foo/bar/ref?wrong=1\"" \ - "unsupported input attribute 'wrong'" -done - -# unsupported attributes w/ tarball fetcher -testFetchTreeError \ - "\"https://host/foo?wrong=1\"" \ - "unsupported tarball input attribute 'wrong'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }" - -# test for unsupported attributes / validation in git fetcher -testFetchTreeError \ - "\"git+https://github.com/owner/repo?invalid=1\"" \ - "unsupported Git input attribute 'invalid'" - -testFetchTreeError \ - "\"git+https://github.com/owner/repo?url=foo\"" \ - "URL 'git+https://github.com/owner/repo?url=foo' must not override url via query param!" - -testFetchTreeError \ - "\"git+https://github.com/owner/repo?ref=foo.lock\"" \ - "invalid Git branch/tag name 'foo.lock'" - -testFetchTreeError \ - "{ type = \"git\"; url =\"https://github.com/owner/repo\"; ref = \"foo.lock\"; }" \ - "invalid Git branch/tag name 'foo.lock'" - -# same for mercurial -testFetchTreeError \ - "\"hg+https://forge.tld/owner/repo?invalid=1\"" \ - "unsupported Mercurial input attribute 'invalid'" - -testFetchTreeError \ - "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; invalid = 1; }" \ - "unsupported Mercurial input attribute 'invalid'" - -testFetchTreeError \ - "\"hg+https://forge.tld/owner/repo?ref=,\"" \ - "invalid Mercurial branch/tag name ','" - -testFetchTreeError \ - "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; ref = \",\"; }" \ - "invalid Mercurial branch/tag name ','" diff --git a/tests/functional/meson.build b/tests/functional/meson.build index d99f9bbd3..a13dee001 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -93,7 +93,6 @@ functional_tests_scripts = [ 'fetchGitRefs.sh', 'gc-runtime.sh', 'tarball.sh', - 'fetchers.sh', 'fetchGit.sh', 'fetchurl.sh', 'fetchPath.sh', diff --git a/tests/nixos/tarball-flakes.nix b/tests/nixos/tarball-flakes.nix index 5deba4a12..ca7627bd1 100644 --- a/tests/nixos/tarball-flakes.nix +++ b/tests/nixos/tarball-flakes.nix @@ -69,7 +69,7 @@ in # Check that we got redirected to the immutable URL. locked_url = info["locked"]["url"] - assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" + assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" # Check that we got the rev and revCount attributes. revision = info["revision"]