forked from lix-project/lix
77007d4eab
The previous regex was too strict and did not match what git was allowing. It could lead to `fetchGit` not accepting valid branch names, even though they exist in a repository (for example, branch names containing `/`, which are pretty standard, like `release/1.0` branches). The new regex defines what a branch name should **NOT** contain. It takes the definitions from `refs.c` in https://github.com/git/git and `git help check-ref-format` pages. This change also introduces a test for ref name validity checking, which compares the result from Nix with the result of `git check-ref-format --branch`.
68 lines
2.7 KiB
C++
68 lines
2.7 KiB
C++
#pragma once
|
|
|
|
#include "types.hh"
|
|
|
|
#include <regex>
|
|
|
|
namespace nix {
|
|
|
|
struct ParsedURL
|
|
{
|
|
std::string url;
|
|
std::string base; // URL without query/fragment
|
|
std::string scheme;
|
|
std::optional<std::string> authority;
|
|
std::string path;
|
|
std::map<std::string, std::string> query;
|
|
std::string fragment;
|
|
|
|
std::string to_string() const;
|
|
|
|
bool operator ==(const ParsedURL & other) const;
|
|
};
|
|
|
|
MakeError(BadURL, Error);
|
|
|
|
std::string percentDecode(std::string_view in);
|
|
|
|
std::map<std::string, std::string> decodeQuery(const std::string & query);
|
|
|
|
ParsedURL parseURL(const std::string & url);
|
|
|
|
// URI stuff.
|
|
const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])";
|
|
const static std::string schemeRegex = "(?:[a-z+]+)";
|
|
const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])";
|
|
const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])";
|
|
const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])";
|
|
const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)";
|
|
const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")";
|
|
const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)";
|
|
const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?";
|
|
const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])";
|
|
const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*";
|
|
const static std::string segmentRegex = "(?:" + pcharRegex + "+)";
|
|
const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)";
|
|
const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)";
|
|
|
|
// A Git ref (i.e. branch or tag name).
|
|
const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check
|
|
extern std::regex refRegex;
|
|
|
|
// Instead of defining what a good Git Ref is, we define what a bad Git Ref is
|
|
// This is because of the definition of a ref in refs.c in https://github.com/git/git
|
|
// See tests/fetchGitRefs.sh for the full definition
|
|
const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$";
|
|
extern std::regex badGitRefRegex;
|
|
|
|
// A Git revision (a SHA-1 commit hash).
|
|
const static std::string revRegexS = "[0-9a-fA-F]{40}";
|
|
extern std::regex revRegex;
|
|
|
|
// A ref or revision, or a ref followed by a revision.
|
|
const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))";
|
|
|
|
const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*";
|
|
extern std::regex flakeIdRegex;
|
|
|
|
}
|