forked from lix-project/lix
Merge "libfetchers: make attribute / URL query handling consistent" into main
This commit is contained in:
commit
5f0062285c
|
@ -204,7 +204,13 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
|
|||
std::string fragment;
|
||||
std::swap(fragment, parsedURL.fragment);
|
||||
|
||||
auto input = Input::fromURL(parsedURL, isFlake);
|
||||
// This has a special meaning for flakes and must not be passed to libfetchers.
|
||||
// Of course this means that libfetchers cannot have fetchers
|
||||
// expecting an argument `dir` 🫠
|
||||
ParsedURL urlForFetchers(parsedURL);
|
||||
urlForFetchers.query.erase("dir");
|
||||
|
||||
auto input = Input::fromURL(urlForFetchers, isFlake);
|
||||
input.parent = baseDir;
|
||||
|
||||
return std::make_pair(
|
||||
|
|
|
@ -159,6 +159,37 @@ struct InputScheme
|
|||
std::optional<std::string> commitMsg) const;
|
||||
|
||||
virtual std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) = 0;
|
||||
|
||||
protected:
|
||||
void emplaceURLQueryIntoAttrs(
|
||||
const ParsedURL & parsedURL,
|
||||
Attrs & attrs,
|
||||
const StringSet & numericParams,
|
||||
const StringSet & booleanParams) const
|
||||
{
|
||||
for (auto &[name, value] : parsedURL.query) {
|
||||
if (name == "url") {
|
||||
throw BadURL(
|
||||
"URL '%s' must not override url via query param!",
|
||||
parsedURL.to_string()
|
||||
);
|
||||
} else if (numericParams.count(name) != 0) {
|
||||
if (auto n = string2Int<uint64_t>(value)) {
|
||||
attrs.insert_or_assign(name, *n);
|
||||
} else {
|
||||
throw BadURL(
|
||||
"URL '%s' has non-numeric parameter '%s'",
|
||||
parsedURL.to_string(),
|
||||
name
|
||||
);
|
||||
}
|
||||
} else if (booleanParams.count(name) != 0) {
|
||||
attrs.emplace(name, Explicit<bool> { value == "1" });
|
||||
} else {
|
||||
attrs.emplace(name, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void registerInputScheme(std::shared_ptr<InputScheme> && fetcher);
|
||||
|
|
|
@ -273,18 +273,15 @@ struct GitInputScheme : InputScheme
|
|||
|
||||
Attrs attrs;
|
||||
attrs.emplace("type", "git");
|
||||
|
||||
for (auto & [name, value] : url.query) {
|
||||
if (name == "rev" || name == "ref")
|
||||
attrs.emplace(name, value);
|
||||
else if (name == "shallow" || name == "submodules" || name == "allRefs")
|
||||
attrs.emplace(name, Explicit<bool> { value == "1" });
|
||||
else
|
||||
url2.query.emplace(name, value);
|
||||
}
|
||||
|
||||
attrs.emplace("url", url2.to_string());
|
||||
|
||||
emplaceURLQueryIntoAttrs(
|
||||
url,
|
||||
attrs,
|
||||
{"lastModified", "revCount"},
|
||||
{"shallow", "submodules", "allRefs"}
|
||||
);
|
||||
|
||||
return inputFromAttrs(attrs);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "attrs.hh"
|
||||
#include "filetransfer.hh"
|
||||
#include "cache.hh"
|
||||
#include "globals.hh"
|
||||
|
@ -36,18 +37,11 @@ struct GitArchiveInputScheme : InputScheme
|
|||
|
||||
auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
|
||||
|
||||
std::optional<Hash> rev;
|
||||
std::optional<std::string> ref;
|
||||
std::optional<std::string> host_url;
|
||||
std::optional<std::string> refOrRev;
|
||||
|
||||
auto size = path.size();
|
||||
if (size == 3) {
|
||||
if (std::regex_match(path[2], revRegex))
|
||||
rev = Hash::parseAny(path[2], htSHA1);
|
||||
else if (std::regex_match(path[2], refRegex))
|
||||
ref = path[2];
|
||||
else
|
||||
throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]);
|
||||
refOrRev = path[2];
|
||||
} else if (size > 3) {
|
||||
std::string rs;
|
||||
for (auto i = std::next(path.begin(), 2); i != path.end(); i++) {
|
||||
|
@ -58,61 +52,91 @@ struct GitArchiveInputScheme : InputScheme
|
|||
}
|
||||
|
||||
if (std::regex_match(rs, refRegex)) {
|
||||
ref = rs;
|
||||
refOrRev = rs;
|
||||
} else {
|
||||
throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs);
|
||||
}
|
||||
} else if (size < 2)
|
||||
throw BadURL("URL '%s' is invalid", url.url);
|
||||
|
||||
Attrs attrs;
|
||||
attrs.emplace("type", type());
|
||||
attrs.emplace("owner", path[0]);
|
||||
attrs.emplace("repo", path[1]);
|
||||
|
||||
for (auto &[name, value] : url.query) {
|
||||
if (name == "rev") {
|
||||
if (rev)
|
||||
throw BadURL("URL '%s' contains multiple commit hashes", url.url);
|
||||
rev = Hash::parseAny(value, htSHA1);
|
||||
if (name == "rev" || name == "ref") {
|
||||
if (refOrRev) {
|
||||
throw BadURL("URL '%s' already contains a ref or rev", url.url);
|
||||
} else {
|
||||
refOrRev = value;
|
||||
}
|
||||
} else if (name == "lastModified") {
|
||||
if (auto n = string2Int<uint64_t>(value)) {
|
||||
attrs.emplace(name, *n);
|
||||
} else {
|
||||
throw Error(
|
||||
"Attribute 'lastModified' in URL '%s' must be an integer",
|
||||
url.to_string()
|
||||
);
|
||||
}
|
||||
} else {
|
||||
attrs.emplace(name, value);
|
||||
}
|
||||
else if (name == "ref") {
|
||||
if (!std::regex_match(value, refRegex))
|
||||
throw BadURL("URL '%s' contains an invalid branch/tag name", url.url);
|
||||
if (ref)
|
||||
throw BadURL("URL '%s' contains multiple branch/tag names", url.url);
|
||||
ref = value;
|
||||
}
|
||||
else if (name == "host") {
|
||||
if (!std::regex_match(value, hostRegex))
|
||||
throw BadURL("URL '%s' contains an invalid instance host", url.url);
|
||||
host_url = value;
|
||||
}
|
||||
// FIXME: barf on unsupported attributes
|
||||
}
|
||||
|
||||
if (ref && rev)
|
||||
throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev());
|
||||
if (refOrRev) attrs.emplace("refOrRev", *refOrRev);
|
||||
|
||||
Input input;
|
||||
input.attrs.insert_or_assign("type", type());
|
||||
input.attrs.insert_or_assign("owner", path[0]);
|
||||
input.attrs.insert_or_assign("repo", path[1]);
|
||||
if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
|
||||
if (ref) input.attrs.insert_or_assign("ref", *ref);
|
||||
if (host_url) input.attrs.insert_or_assign("host", *host_url);
|
||||
|
||||
return input;
|
||||
return inputFromAttrs(attrs);
|
||||
}
|
||||
|
||||
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
|
||||
{
|
||||
if (maybeGetStrAttr(attrs, "type") != type()) return {};
|
||||
// Attributes can contain refOrRev and it needs to be figured out
|
||||
// which one it is (see inputFromURL for when that may happen).
|
||||
// The correct one (ref or rev) will be written into finalAttrs and
|
||||
// it needs to be mutable for that.
|
||||
Attrs finalAttrs(attrs);
|
||||
auto type_ = maybeGetStrAttr(finalAttrs, "type");
|
||||
if (type_ != type()) return {};
|
||||
|
||||
for (auto & [name, value] : attrs)
|
||||
if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host")
|
||||
auto owner = getStrAttr(finalAttrs, "owner");
|
||||
auto repo = getStrAttr(finalAttrs, "repo");
|
||||
|
||||
auto url = fmt("%s:%s/%s", *type_, owner, repo);
|
||||
if (auto host = maybeGetStrAttr(finalAttrs, "host")) {
|
||||
if (!std::regex_match(*host, hostRegex)) {
|
||||
throw BadURL("URL '%s' contains an invalid instance host", url);
|
||||
}
|
||||
}
|
||||
|
||||
if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) {
|
||||
finalAttrs.erase("refOrRev");
|
||||
if (std::regex_match(*refOrRev, revRegex)) {
|
||||
finalAttrs.emplace("rev", *refOrRev);
|
||||
} else if (std::regex_match(*refOrRev, refRegex)) {
|
||||
finalAttrs.emplace("ref", *refOrRev);
|
||||
} else {
|
||||
throw Error(
|
||||
"in URL '%s', '%s' is not a commit hash or a branch/tag name",
|
||||
url,
|
||||
*refOrRev
|
||||
);
|
||||
}
|
||||
} else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) {
|
||||
if (!std::regex_match(*ref, refRegex)) {
|
||||
throw BadURL("URL '%s' contains an invalid branch/tag name", url);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto & [name, value] : finalAttrs) {
|
||||
if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") {
|
||||
throw Error("unsupported input attribute '%s'", name);
|
||||
|
||||
getStrAttr(attrs, "owner");
|
||||
getStrAttr(attrs, "repo");
|
||||
}
|
||||
}
|
||||
|
||||
Input input;
|
||||
input.attrs = attrs;
|
||||
input.attrs = finalAttrs;
|
||||
return input;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@ struct IndirectInputScheme : InputScheme
|
|||
std::optional<Hash> rev;
|
||||
std::optional<std::string> ref;
|
||||
|
||||
Attrs attrs;
|
||||
|
||||
if (path.size() == 1) {
|
||||
} else if (path.size() == 2) {
|
||||
if (std::regex_match(path[1], revRegex))
|
||||
|
@ -26,29 +28,21 @@ struct IndirectInputScheme : InputScheme
|
|||
else
|
||||
throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]);
|
||||
} else if (path.size() == 3) {
|
||||
if (!std::regex_match(path[1], refRegex))
|
||||
throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]);
|
||||
ref = path[1];
|
||||
if (!std::regex_match(path[2], revRegex))
|
||||
throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]);
|
||||
rev = Hash::parseAny(path[2], htSHA1);
|
||||
} else
|
||||
throw BadURL("GitHub URL '%s' is invalid", url.url);
|
||||
|
||||
std::string id = path[0];
|
||||
if (!std::regex_match(id, flakeRegex))
|
||||
throw BadURL("'%s' is not a valid flake ID", id);
|
||||
|
||||
// FIXME: forbid query params?
|
||||
attrs.emplace("type", "indirect");
|
||||
attrs.emplace("id", id);
|
||||
if (rev) attrs.emplace("rev", rev->gitRev());
|
||||
if (ref) attrs.emplace("ref", *ref);
|
||||
|
||||
Input input;
|
||||
input.direct = false;
|
||||
input.attrs.insert_or_assign("type", "indirect");
|
||||
input.attrs.insert_or_assign("id", id);
|
||||
if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
|
||||
if (ref) input.attrs.insert_or_assign("ref", *ref);
|
||||
emplaceURLQueryIntoAttrs(url, attrs, {}, {});
|
||||
|
||||
return input;
|
||||
return inputFromAttrs(attrs);
|
||||
}
|
||||
|
||||
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
|
||||
|
@ -63,6 +57,18 @@ struct IndirectInputScheme : InputScheme
|
|||
if (!std::regex_match(id, flakeRegex))
|
||||
throw BadURL("'%s' is not a valid flake ID", id);
|
||||
|
||||
// TODO come up with a nicer error message for those two.
|
||||
if (auto rev = maybeGetStrAttr(attrs, "rev")) {
|
||||
if (!std::regex_match(*rev, revRegex)) {
|
||||
throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev);
|
||||
}
|
||||
}
|
||||
if (auto ref = maybeGetStrAttr(attrs, "ref")) {
|
||||
if (!std::regex_match(*ref, refRegex)) {
|
||||
throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref);
|
||||
}
|
||||
}
|
||||
|
||||
Input input;
|
||||
input.direct = false;
|
||||
input.attrs = attrs;
|
||||
|
|
|
@ -57,12 +57,7 @@ struct MercurialInputScheme : InputScheme
|
|||
Attrs attrs;
|
||||
attrs.emplace("type", "hg");
|
||||
|
||||
for (auto &[name, value] : url.query) {
|
||||
if (name == "rev" || name == "ref")
|
||||
attrs.emplace(name, value);
|
||||
else
|
||||
url2.query.emplace(name, value);
|
||||
}
|
||||
emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {});
|
||||
|
||||
attrs.emplace("url", url2.to_string());
|
||||
|
||||
|
|
|
@ -201,29 +201,17 @@ struct CurlInputScheme : InputScheme
|
|||
if (!isValidURL(_url, requireTree))
|
||||
return std::nullopt;
|
||||
|
||||
Input input;
|
||||
|
||||
auto url = _url;
|
||||
|
||||
Attrs attrs;
|
||||
attrs.emplace("type", inputType());
|
||||
|
||||
url.scheme = parseUrlScheme(url.scheme).transport;
|
||||
|
||||
auto narHash = url.query.find("narHash");
|
||||
if (narHash != url.query.end())
|
||||
input.attrs.insert_or_assign("narHash", narHash->second);
|
||||
emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {});
|
||||
|
||||
if (auto i = get(url.query, "rev"))
|
||||
input.attrs.insert_or_assign("rev", *i);
|
||||
|
||||
if (auto i = get(url.query, "revCount"))
|
||||
if (auto n = string2Int<uint64_t>(*i))
|
||||
input.attrs.insert_or_assign("revCount", *n);
|
||||
|
||||
url.query.erase("rev");
|
||||
url.query.erase("revCount");
|
||||
|
||||
input.attrs.insert_or_assign("type", inputType());
|
||||
input.attrs.insert_or_assign("url", url.to_string());
|
||||
return input;
|
||||
attrs.emplace("url", url.to_string());
|
||||
return inputFromAttrs(attrs);
|
||||
}
|
||||
|
||||
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
|
||||
|
@ -235,7 +223,7 @@ struct CurlInputScheme : InputScheme
|
|||
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"};
|
||||
for (auto & [name, value] : attrs)
|
||||
if (!allowedNames.count(name))
|
||||
throw Error("unsupported %s input attribute '%s'", *type, name);
|
||||
throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name);
|
||||
|
||||
Input input;
|
||||
input.attrs = attrs;
|
||||
|
|
91
tests/functional/fetchers.sh
Normal file
91
tests/functional/fetchers.sh
Normal file
|
@ -0,0 +1,91 @@
|
|||
source common.sh
|
||||
|
||||
requireGit
|
||||
|
||||
clearStore
|
||||
|
||||
testFetchTreeError() {
|
||||
rawFetchTreeArg="${1?fetchTree arg missing}"
|
||||
messageSubstring="${2?messageSubstring missing}"
|
||||
|
||||
output="$(nix eval --impure --raw --expr "(builtins.fetchTree $rawFetchTreeArg).outPath" 2>&1)" && status=0 || status=$?
|
||||
grepQuiet "$messageSubstring" <<<"$output"
|
||||
test "$status" -ne 0
|
||||
}
|
||||
|
||||
# github/gitlab/sourcehut fetcher input validation
|
||||
for provider in github gitlab sourcehut; do
|
||||
# ref/rev validation
|
||||
testFetchTreeError \
|
||||
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; ref = \",\"; }" \
|
||||
"URL '$provider:foo/bar' contains an invalid branch/tag name"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/,\"" \
|
||||
"URL '$provider:foo/bar', ',' is not a commit hash or a branch/tag name"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc\"" \
|
||||
"URL '$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc' already contains a ref or rev"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/ref?ref=ref2\"" \
|
||||
"URL '$provider://host/foo/bar/ref?ref=ref2' already contains a ref or rev"
|
||||
|
||||
# host validation
|
||||
testFetchTreeError \
|
||||
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; host = \"git_hub.com\"; }" \
|
||||
"URL '$provider:foo/bar' contains an invalid instance host"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/ref?host=git_hub.com\"" \
|
||||
"URL '$provider:foo/bar' contains an invalid instance host"
|
||||
|
||||
# invalid attributes
|
||||
testFetchTreeError \
|
||||
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; wrong = true; }" \
|
||||
"unsupported input attribute 'wrong'"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/ref?wrong=1\"" \
|
||||
"unsupported input attribute 'wrong'"
|
||||
done
|
||||
|
||||
# unsupported attributes w/ tarball fetcher
|
||||
testFetchTreeError \
|
||||
"\"https://host/foo?wrong=1\"" \
|
||||
"unsupported tarball input attribute 'wrong'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }"
|
||||
|
||||
# test for unsupported attributes / validation in git fetcher
|
||||
testFetchTreeError \
|
||||
"\"git+https://github.com/owner/repo?invalid=1\"" \
|
||||
"unsupported Git input attribute 'invalid'"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"git+https://github.com/owner/repo?url=foo\"" \
|
||||
"URL 'git+https://github.com/owner/repo?url=foo' must not override url via query param!"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"git+https://github.com/owner/repo?ref=foo.lock\"" \
|
||||
"invalid Git branch/tag name 'foo.lock'"
|
||||
|
||||
testFetchTreeError \
|
||||
"{ type = \"git\"; url =\"https://github.com/owner/repo\"; ref = \"foo.lock\"; }" \
|
||||
"invalid Git branch/tag name 'foo.lock'"
|
||||
|
||||
# same for mercurial
|
||||
testFetchTreeError \
|
||||
"\"hg+https://forge.tld/owner/repo?invalid=1\"" \
|
||||
"unsupported Mercurial input attribute 'invalid'"
|
||||
|
||||
testFetchTreeError \
|
||||
"{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; invalid = 1; }" \
|
||||
"unsupported Mercurial input attribute 'invalid'"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"hg+https://forge.tld/owner/repo?ref=,\"" \
|
||||
"invalid Mercurial branch/tag name ','"
|
||||
|
||||
testFetchTreeError \
|
||||
"{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; ref = \",\"; }" \
|
||||
"invalid Mercurial branch/tag name ','"
|
|
@ -93,6 +93,7 @@ functional_tests_scripts = [
|
|||
'fetchGitRefs.sh',
|
||||
'gc-runtime.sh',
|
||||
'tarball.sh',
|
||||
'fetchers.sh',
|
||||
'fetchGit.sh',
|
||||
'fetchurl.sh',
|
||||
'fetchPath.sh',
|
||||
|
|
|
@ -69,7 +69,7 @@ in
|
|||
|
||||
# Check that we got redirected to the immutable URL.
|
||||
locked_url = info["locked"]["url"]
|
||||
assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz"
|
||||
assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz"
|
||||
|
||||
# Check that we got the rev and revCount attributes.
|
||||
revision = info["revision"]
|
||||
|
|
Loading…
Reference in a new issue