Extract git reference parsing to a shared library

These utility functions can be shared between the git and github fetchers.
This commit is contained in:
Kjetil Orbekk 2022-04-29 18:30:00 -04:00
parent c21afd684c
commit 9bf296c970
4 changed files with 70 additions and 35 deletions

View file

@ -0,0 +1,25 @@
#include "git-utils.hh"
#include <regex>
std::optional<std::string> parseListReferenceHeadRef(std::string_view line) {
const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$");
std::match_results<std::string_view::const_iterator> match;
if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) {
return match[1];
} else {
return std::nullopt;
}
}
std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line) {
const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$");
std::match_results<std::string_view::const_iterator> match;
if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) {
return std::nullopt;
}
if (rev != match[2].str()) {
return std::nullopt;
}
return match[1];
}

View file

@ -0,0 +1,23 @@
#pragma once
#include <string>
#include <string_view>
#include <optional>
// Parses the HEAD ref as reported by `git ls-remote --symref`
//
// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if
// ls-remote returns the output below, "main" is returned based on the ref line.
//
// ref: refs/heads/main HEAD
//
// If the repository is in 'detached head' state (HEAD is pointing to a rev
// instead of a branch), parseListReferenceForRev("HEAD") may be used instead.
std::optional<std::string> parseListReferenceHeadRef(std::string_view line);
// Parses a reference line from `git ls-remote --symref`, e.g.,
// parseListReferenceForRev("refs/heads/master", line) will return 6926...
// given the line below.
//
// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master
std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line);

View file

@ -6,6 +6,7 @@
#include "url-parts.hh" #include "url-parts.hh"
#include "pathlocks.hh" #include "pathlocks.hh"
#include "util.hh" #include "util.hh"
#include "git-utils.hh"
#include "fetch-settings.hh" #include "fetch-settings.hh"
@ -69,27 +70,19 @@ std::optional<std::string> readHead(const Path & path)
.args = {"ls-remote", "--symref", path}, .args = {"ls-remote", "--symref", path},
}); });
if (exit_code != 0) { if (exit_code != 0) {
return std::nullopt; return std::nullopt;
} }
// Matches the common case when HEAD points to a branch, e.g.: std::string_view line = output;
// "ref: refs/heads/main HEAD". line = line.substr(0, line.find("\n"));
const static std::regex head_ref_regex("^ref:\\s*([^\\s]+)\\s*HEAD$"); if (const auto ref = parseListReferenceHeadRef(line); ref) {
// Matches when HEAD points directly at a commit, e.g.: debug("resolved HEAD ref '%s' for repo '%s'", *ref, path);
// "71abcd... HEAD". return *ref;
const static std::regex head_rev_regex("^([^\\s]+)\\s*HEAD$"); }
if (const auto rev = parseListReferenceForRev("HEAD", line); rev) {
for (const auto & line : tokenizeString<std::vector<std::string>>(output, "\n")) { debug("resolved HEAD rev '%s' for repo '%s'", *rev, path);
std::smatch match; return *rev;
if (std::regex_match(line, match, head_ref_regex)) {
debug("resolved HEAD ref '%s' for repo '%s'", match[1], path);
return match[1];
} else if (std::regex_match(line, match, head_rev_regex)) {
debug("resolved HEAD ref '%s' for repo '%s'", match[1], path);
return match[1];
}
} }
return std::nullopt; return std::nullopt;
} }

View file

@ -4,7 +4,7 @@
#include "store-api.hh" #include "store-api.hh"
#include "types.hh" #include "types.hh"
#include "url-parts.hh" #include "url-parts.hh"
#include "git-utils.hh"
#include "fetchers.hh" #include "fetchers.hh"
#include "fetch-settings.hh" #include "fetch-settings.hh"
@ -383,35 +383,29 @@ struct SourceHutInputScheme : GitArchiveInputScheme
std::string line; std::string line;
getline(is, line); getline(is, line);
auto ref_index = line.find("ref: "); auto r = parseListReferenceHeadRef(line);
if (ref_index == std::string::npos) { if (!r) {
throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref); throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref);
} }
ref_uri = *r;
ref_uri = line.substr(ref_index+5, line.length()-1); } else {
} else
ref_uri = fmt("refs/(heads|tags)/%s", ref); ref_uri = fmt("refs/(heads|tags)/%s", ref);
}
auto file = store->toRealPath( auto file = store->toRealPath(
downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath); downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath);
std::ifstream is(file); std::ifstream is(file);
std::string line; std::string line;
std::string id; std::optional<std::string> id;
while(getline(is, line)) { while(!id && getline(is, line)) {
// Append $ to avoid partial name matches id = parseListReferenceForRev(ref_uri, line);
std::regex pattern(fmt("%s$", ref_uri));
if (std::regex_search(line, pattern)) {
id = line.substr(0, line.find('\t'));
break;
}
} }
if(id.empty()) if(!id)
throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref); throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref);
auto rev = Hash::parseAny(id, htSHA1); auto rev = Hash::parseAny(*id, htSHA1);
debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev()); debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev());
return rev; return rev;
} }