Fix URL parser

Fixes #3062.
This commit is contained in:
Eelco Dolstra 2020-02-03 15:27:26 +01:00
parent d070e1c532
commit a2628b43bb
3 changed files with 25 additions and 21 deletions

View file

@ -67,8 +67,10 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
{ {
using namespace fetchers; using namespace fetchers;
static std::string fnRegex = "[0-9a-zA-Z-._~!$&'\"()*+,;=]+";
static std::regex pathUrlRegex( static std::regex pathUrlRegex(
"(" + pathRegex + "/?)" "(/?" + fnRegex + "(?:/" + fnRegex + ")*/?)"
+ "(?:\\?(" + queryRegex + "))?" + "(?:\\?(" + queryRegex + "))?"
+ "(?:#(" + queryRegex + "))?", + "(?:#(" + queryRegex + "))?",
std::regex::ECMAScript); std::regex::ECMAScript);

View file

@ -11,24 +11,22 @@ std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript);
ParsedURL parseURL(const std::string & url) ParsedURL parseURL(const std::string & url)
{ {
static std::regex uriRegex( static std::regex uriRegex(
"(((" + schemeRegex + "):" "((" + schemeRegex + "):"
+ "(//(" + authorityRegex + "))?" + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))"
+ "(" + pathRegex + "))"
+ "(?:\\?(" + queryRegex + "))?" + "(?:\\?(" + queryRegex + "))?"
+ "(?:#(" + queryRegex + "))?" + "(?:#(" + queryRegex + "))?",
+ ")",
std::regex::ECMAScript); std::regex::ECMAScript);
std::smatch match; std::smatch match;
if (std::regex_match(url, match, uriRegex)) { if (std::regex_match(url, match, uriRegex)) {
auto & base = match[2]; auto & base = match[1];
std::string scheme = match[3]; std::string scheme = match[2];
auto authority = match[4].matched auto authority = match[4].matched
? std::optional<std::string>(match[5]) : std::nullopt; ? std::optional<std::string>(match[5]) : std::nullopt;
std::string path = match[6]; std::string path = match[4].matched ? match[4] : match[5];
auto & query = match[7]; auto & query = match[6];
auto & fragment = match[8]; auto & fragment = match[7];
auto isFile = scheme.find("file") != std::string::npos; auto isFile = scheme.find("file") != std::string::npos;

View file

@ -5,16 +5,20 @@
namespace nix::fetchers { namespace nix::fetchers {
// URI stuff. // URI stuff.
const static std::string pctEncoded = "%[0-9a-fA-F][0-9a-fA-F]"; const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])";
const static std::string schemeRegex = "[a-z+]+"; const static std::string schemeRegex = "(?:[a-z+]+)";
const static std::string authorityRegex = const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])";
"(?:(?:[a-z])*@)?" const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])";
"[a-zA-Z0-9._~-]*"; const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])";
const static std::string segmentRegex = "[a-zA-Z0-9._~-]+"; const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)";
const static std::string pathRegex = "(?:/?" + segmentRegex + "(?:/" + segmentRegex + ")*|/?)"; const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")";
const static std::string pcharRegex = const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)";
"(?:[a-zA-Z0-9-._~!$&'\"()*+,;=:@ ]|" + pctEncoded + ")"; const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?";
const static std::string queryRegex = "(?:" + pcharRegex + "|[/?])*"; const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])";
const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*";
const static std::string segmentRegex = "(?:" + pcharRegex + "+)";
const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)";
const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)";
// A Git ref (i.e. branch or tag name). // A Git ref (i.e. branch or tag name).
const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check