Extract git reference parsing to a shared library
These utility functions can be shared between the git and github fetchers.
This commit is contained in:
parent
c21afd684c
commit
9bf296c970
25
src/libfetchers/git-utils.cc
Normal file
25
src/libfetchers/git-utils.cc
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
#include "git-utils.hh"
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
|
std::optional<std::string> parseListReferenceHeadRef(std::string_view line) {
|
||||||
|
const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$");
|
||||||
|
std::match_results<std::string_view::const_iterator> match;
|
||||||
|
if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) {
|
||||||
|
return match[1];
|
||||||
|
} else {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line) {
|
||||||
|
const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$");
|
||||||
|
std::match_results<std::string_view::const_iterator> match;
|
||||||
|
if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
if (rev != match[2].str()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return match[1];
|
||||||
|
}
|
23
src/libfetchers/git-utils.hh
Normal file
23
src/libfetchers/git-utils.hh
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
// Parses the HEAD ref as reported by `git ls-remote --symref`
|
||||||
|
//
|
||||||
|
// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if
|
||||||
|
// ls-remote returns the output below, "main" is returned based on the ref line.
|
||||||
|
//
|
||||||
|
// ref: refs/heads/main HEAD
|
||||||
|
//
|
||||||
|
// If the repository is in 'detached head' state (HEAD is pointing to a rev
|
||||||
|
// instead of a branch), parseListReferenceForRev("HEAD") may be used instead.
|
||||||
|
std::optional<std::string> parseListReferenceHeadRef(std::string_view line);
|
||||||
|
|
||||||
|
// Parses a reference line from `git ls-remote --symref`, e.g.,
|
||||||
|
// parseListReferenceForRev("refs/heads/master", line) will return 6926...
|
||||||
|
// given the line below.
|
||||||
|
//
|
||||||
|
// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master
|
||||||
|
std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line);
|
|
@ -6,6 +6,7 @@
|
||||||
#include "url-parts.hh"
|
#include "url-parts.hh"
|
||||||
#include "pathlocks.hh"
|
#include "pathlocks.hh"
|
||||||
#include "util.hh"
|
#include "util.hh"
|
||||||
|
#include "git-utils.hh"
|
||||||
|
|
||||||
#include "fetch-settings.hh"
|
#include "fetch-settings.hh"
|
||||||
|
|
||||||
|
@ -69,27 +70,19 @@ std::optional<std::string> readHead(const Path & path)
|
||||||
.args = {"ls-remote", "--symref", path},
|
.args = {"ls-remote", "--symref", path},
|
||||||
});
|
});
|
||||||
if (exit_code != 0) {
|
if (exit_code != 0) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Matches the common case when HEAD points to a branch, e.g.:
|
std::string_view line = output;
|
||||||
// "ref: refs/heads/main HEAD".
|
line = line.substr(0, line.find("\n"));
|
||||||
const static std::regex head_ref_regex("^ref:\\s*([^\\s]+)\\s*HEAD$");
|
if (const auto ref = parseListReferenceHeadRef(line); ref) {
|
||||||
// Matches when HEAD points directly at a commit, e.g.:
|
debug("resolved HEAD ref '%s' for repo '%s'", *ref, path);
|
||||||
// "71abcd... HEAD".
|
return *ref;
|
||||||
const static std::regex head_rev_regex("^([^\\s]+)\\s*HEAD$");
|
}
|
||||||
|
if (const auto rev = parseListReferenceForRev("HEAD", line); rev) {
|
||||||
for (const auto & line : tokenizeString<std::vector<std::string>>(output, "\n")) {
|
debug("resolved HEAD rev '%s' for repo '%s'", *rev, path);
|
||||||
std::smatch match;
|
return *rev;
|
||||||
if (std::regex_match(line, match, head_ref_regex)) {
|
|
||||||
debug("resolved HEAD ref '%s' for repo '%s'", match[1], path);
|
|
||||||
return match[1];
|
|
||||||
} else if (std::regex_match(line, match, head_rev_regex)) {
|
|
||||||
debug("resolved HEAD ref '%s' for repo '%s'", match[1], path);
|
|
||||||
return match[1];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#include "store-api.hh"
|
#include "store-api.hh"
|
||||||
#include "types.hh"
|
#include "types.hh"
|
||||||
#include "url-parts.hh"
|
#include "url-parts.hh"
|
||||||
|
#include "git-utils.hh"
|
||||||
#include "fetchers.hh"
|
#include "fetchers.hh"
|
||||||
#include "fetch-settings.hh"
|
#include "fetch-settings.hh"
|
||||||
|
|
||||||
|
@ -383,35 +383,29 @@ struct SourceHutInputScheme : GitArchiveInputScheme
|
||||||
std::string line;
|
std::string line;
|
||||||
getline(is, line);
|
getline(is, line);
|
||||||
|
|
||||||
auto ref_index = line.find("ref: ");
|
auto r = parseListReferenceHeadRef(line);
|
||||||
if (ref_index == std::string::npos) {
|
if (!r) {
|
||||||
throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref);
|
throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref);
|
||||||
}
|
}
|
||||||
|
ref_uri = *r;
|
||||||
ref_uri = line.substr(ref_index+5, line.length()-1);
|
} else {
|
||||||
} else
|
|
||||||
ref_uri = fmt("refs/(heads|tags)/%s", ref);
|
ref_uri = fmt("refs/(heads|tags)/%s", ref);
|
||||||
|
}
|
||||||
|
|
||||||
auto file = store->toRealPath(
|
auto file = store->toRealPath(
|
||||||
downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath);
|
downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath);
|
||||||
std::ifstream is(file);
|
std::ifstream is(file);
|
||||||
|
|
||||||
std::string line;
|
std::string line;
|
||||||
std::string id;
|
std::optional<std::string> id;
|
||||||
while(getline(is, line)) {
|
while(!id && getline(is, line)) {
|
||||||
// Append $ to avoid partial name matches
|
id = parseListReferenceForRev(ref_uri, line);
|
||||||
std::regex pattern(fmt("%s$", ref_uri));
|
|
||||||
|
|
||||||
if (std::regex_search(line, pattern)) {
|
|
||||||
id = line.substr(0, line.find('\t'));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(id.empty())
|
if(!id)
|
||||||
throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref);
|
throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref);
|
||||||
|
|
||||||
auto rev = Hash::parseAny(id, htSHA1);
|
auto rev = Hash::parseAny(*id, htSHA1);
|
||||||
debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev());
|
debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev());
|
||||||
return rev;
|
return rev;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue