Fix URL parser

Fixes #3062.
This commit is contained in:
Eelco Dolstra 2020-02-03 15:27:26 +01:00
parent d070e1c532
commit a2628b43bb
3 changed files with 25 additions and 21 deletions

View file

@ -67,8 +67,10 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
{
using namespace fetchers;
static std::string fnRegex = "[0-9a-zA-Z-._~!$&'\"()*+,;=]+";
static std::regex pathUrlRegex(
"(" + pathRegex + "/?)"
"(/?" + fnRegex + "(?:/" + fnRegex + ")*/?)"
+ "(?:\\?(" + queryRegex + "))?"
+ "(?:#(" + queryRegex + "))?",
std::regex::ECMAScript);

View file

@ -11,24 +11,22 @@ std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript);
ParsedURL parseURL(const std::string & url)
{
static std::regex uriRegex(
"(((" + schemeRegex + "):"
+ "(//(" + authorityRegex + "))?"
+ "(" + pathRegex + "))"
"((" + schemeRegex + "):"
+ "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))"
+ "(?:\\?(" + queryRegex + "))?"
+ "(?:#(" + queryRegex + "))?"
+ ")",
+ "(?:#(" + queryRegex + "))?",
std::regex::ECMAScript);
std::smatch match;
if (std::regex_match(url, match, uriRegex)) {
auto & base = match[2];
std::string scheme = match[3];
auto & base = match[1];
std::string scheme = match[2];
auto authority = match[4].matched
? std::optional<std::string>(match[5]) : std::nullopt;
std::string path = match[6];
auto & query = match[7];
auto & fragment = match[8];
std::string path = match[4].matched ? match[4] : match[5];
auto & query = match[6];
auto & fragment = match[7];
auto isFile = scheme.find("file") != std::string::npos;

View file

@ -5,16 +5,20 @@
namespace nix::fetchers {
// URI stuff.
const static std::string pctEncoded = "%[0-9a-fA-F][0-9a-fA-F]";
const static std::string schemeRegex = "[a-z+]+";
const static std::string authorityRegex =
"(?:(?:[a-z])*@)?"
"[a-zA-Z0-9._~-]*";
const static std::string segmentRegex = "[a-zA-Z0-9._~-]+";
const static std::string pathRegex = "(?:/?" + segmentRegex + "(?:/" + segmentRegex + ")*|/?)";
const static std::string pcharRegex =
"(?:[a-zA-Z0-9-._~!$&'\"()*+,;=:@ ]|" + pctEncoded + ")";
const static std::string queryRegex = "(?:" + pcharRegex + "|[/?])*";
const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])";
const static std::string schemeRegex = "(?:[a-z+]+)";
const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])";
const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])";
const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])";
const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)";
const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")";
const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)";
const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?";
const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])";
const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*";
const static std::string segmentRegex = "(?:" + pcharRegex + "+)";
const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)";
const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)";
// A Git ref (i.e. branch or tag name).
const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check