lix/src/libutil/url.hh
Nikola Knezevic 77007d4eab Improve ref validity checking in fetchGit
The previous regex was too strict and did not match what git was allowing. It
could lead to `fetchGit` not accepting valid branch names, even though they
exist in a repository (for example, branch names containing `/`, which are
pretty standard, like `release/1.0` branches).

The new regex defines what a branch name should **NOT** contain. It takes the
definitions from `refs.c` in https://github.com/git/git and `git help
check-ref-format` pages.

This change also introduces a test for ref name validity checking, which
compares the result from Nix with the result of `git check-ref-format --branch`.
2020-05-30 12:29:35 +02:00

69 lines
2.7 KiB
C++

#pragma once
#include "types.hh"
#include <regex>
namespace nix {
struct ParsedURL
{
std::string url;
std::string base; // URL without query/fragment
std::string scheme;
std::optional<std::string> authority;
std::string path;
std::map<std::string, std::string> query;
std::string fragment;
std::string to_string() const;
bool operator ==(const ParsedURL & other) const;
};
MakeError(BadURL, Error);
std::string percentDecode(std::string_view in);
std::map<std::string, std::string> decodeQuery(const std::string & query);
ParsedURL parseURL(const std::string & url);
// URI stuff.
const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])";
const static std::string schemeRegex = "(?:[a-z+]+)";
const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])";
const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])";
const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])";
const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)";
const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")";
const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)";
const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?";
const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])";
const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*";
const static std::string segmentRegex = "(?:" + pcharRegex + "+)";
const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)";
const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)";
// A Git ref (i.e. branch or tag name).
const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check
extern std::regex refRegex;
// Instead of defining what a good Git Ref is, we define what a bad Git Ref is
// This is because of the definition of a ref in refs.c in https://github.com/git/git
// See tests/fetchGitRefs.sh for the full definition
const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$";
extern std::regex badGitRefRegex;
// A Git revision (a SHA-1 commit hash).
const static std::string revRegexS = "[0-9a-fA-F]{40}";
extern std::regex revRegex;
// A ref or revision, or a ref followed by a revision.
const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))";
const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*";
extern std::regex flakeIdRegex;
}