lix/src/libutil/url.cc
Eric Wolf 4d50995eff Fix url parsing for urls using file+
`file+https://example.org/test.mp4` should not be rejected with
`unexpected authority`.
2023-01-20 10:31:26 +01:00

158 lines
4.3 KiB
C++

#include "url.hh"
#include "url-parts.hh"
#include "util.hh"
#include "split.hh"
namespace nix {
std::regex refRegex(refRegexS, std::regex::ECMAScript);
std::regex badGitRefRegex(badGitRefRegexS, std::regex::ECMAScript);
std::regex revRegex(revRegexS, std::regex::ECMAScript);
std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript);
ParsedURL parseURL(const std::string & url)
{
static std::regex uriRegex(
"((" + schemeRegex + "):"
+ "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))"
+ "(?:\\?(" + queryRegex + "))?"
+ "(?:#(" + queryRegex + "))?",
std::regex::ECMAScript);
std::smatch match;
if (std::regex_match(url, match, uriRegex)) {
auto & base = match[1];
std::string scheme = match[2];
auto authority = match[3].matched
? std::optional<std::string>(match[3]) : std::nullopt;
std::string path = match[4].matched ? match[4] : match[5];
auto & query = match[6];
auto & fragment = match[7];
auto transportIsFile = parseUrlScheme(scheme).transport == "file";
if (authority && *authority != "" && transportIsFile)
throw BadURL("file:// URL '%s' has unexpected authority '%s'",
url, *authority);
if (transportIsFile && path.empty())
path = "/";
return ParsedURL{
.url = url,
.base = base,
.scheme = scheme,
.authority = authority,
.path = path,
.query = decodeQuery(query),
.fragment = percentDecode(std::string(fragment))
};
}
else
throw BadURL("'%s' is not a valid URL", url);
}
std::string percentDecode(std::string_view in)
{
std::string decoded;
for (size_t i = 0; i < in.size(); ) {
if (in[i] == '%') {
if (i + 2 >= in.size())
throw BadURL("invalid URI parameter '%s'", in);
try {
decoded += std::stoul(std::string(in, i + 1, 2), 0, 16);
i += 3;
} catch (...) {
throw BadURL("invalid URI parameter '%s'", in);
}
} else
decoded += in[i++];
}
return decoded;
}
std::map<std::string, std::string> decodeQuery(const std::string & query)
{
std::map<std::string, std::string> result;
for (auto s : tokenizeString<Strings>(query, "&")) {
auto e = s.find('=');
if (e != std::string::npos)
result.emplace(
s.substr(0, e),
percentDecode(std::string_view(s).substr(e + 1)));
}
return result;
}
std::string percentEncode(std::string_view s)
{
std::string res;
for (auto & c : s)
if ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
|| strchr("-._~!$&'()*+,;=:@", c))
res += c;
else
res += fmt("%%%02x", (unsigned int) c);
return res;
}
std::string encodeQuery(const std::map<std::string, std::string> & ss)
{
std::string res;
bool first = true;
for (auto & [name, value] : ss) {
if (!first) res += '&';
first = false;
res += percentEncode(name);
res += '=';
res += percentEncode(value);
}
return res;
}
std::string ParsedURL::to_string() const
{
return
scheme
+ ":"
+ (authority ? "//" + *authority : "")
+ path
+ (query.empty() ? "" : "?" + encodeQuery(query))
+ (fragment.empty() ? "" : "#" + percentEncode(fragment));
}
bool ParsedURL::operator ==(const ParsedURL & other) const
{
return
scheme == other.scheme
&& authority == other.authority
&& path == other.path
&& query == other.query
&& fragment == other.fragment;
}
/**
* Parse a URL scheme of the form '(applicationScheme\+)?transportScheme'
* into a tuple '(applicationScheme, transportScheme)'
*
* > parseUrlScheme("http") == ParsedUrlScheme{ {}, "http"}
* > parseUrlScheme("tarball+http") == ParsedUrlScheme{ {"tarball"}, "http"}
*/
ParsedUrlScheme parseUrlScheme(std::string_view scheme)
{
auto application = splitPrefixTo(scheme, '+');
auto transport = scheme;
return ParsedUrlScheme {
.application = application,
.transport = transport,
};
}
}