From 9bf296c970bf33b7ed53d7e2d8fbe44197482518 Mon Sep 17 00:00:00 2001 From: Kjetil Orbekk Date: Fri, 29 Apr 2022 18:30:00 -0400 Subject: [PATCH] Extract git reference parsing to a shared library These utility functions can be shared between the git and github fetchers. --- src/libfetchers/git-utils.cc | 25 +++++++++++++++++++++++++ src/libfetchers/git-utils.hh | 23 +++++++++++++++++++++++ src/libfetchers/git.cc | 29 +++++++++++------------------ src/libfetchers/github.cc | 28 +++++++++++----------------- 4 files changed, 70 insertions(+), 35 deletions(-) create mode 100644 src/libfetchers/git-utils.cc create mode 100644 src/libfetchers/git-utils.hh diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc new file mode 100644 index 000000000..060077098 --- /dev/null +++ b/src/libfetchers/git-utils.cc @@ -0,0 +1,25 @@ +#include "git-utils.hh" + +#include + +std::optional parseListReferenceHeadRef(std::string_view line) { + const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$"); + std::match_results match; + if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) { + return match[1]; + } else { + return std::nullopt; + } +} + +std::optional parseListReferenceForRev(std::string_view rev, std::string_view line) { + const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$"); + std::match_results match; + if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) { + return std::nullopt; + } + if (rev != match[2].str()) { + return std::nullopt; + } + return match[1]; +} diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh new file mode 100644 index 000000000..946a68a9e --- /dev/null +++ b/src/libfetchers/git-utils.hh @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include + +// Parses the HEAD ref as reported by `git ls-remote --symref` +// +// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if +// ls-remote returns the output below, "main" is returned based on the ref line. +// +// ref: refs/heads/main HEAD +// +// If the repository is in 'detached head' state (HEAD is pointing to a rev +// instead of a branch), parseListReferenceForRev("HEAD") may be used instead. +std::optional parseListReferenceHeadRef(std::string_view line); + +// Parses a reference line from `git ls-remote --symref`, e.g., +// parseListReferenceForRev("refs/heads/master", line) will return 6926... +// given the line below. +// +// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master +std::optional parseListReferenceForRev(std::string_view rev, std::string_view line); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 968cd642a..9d4348cf1 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -6,6 +6,7 @@ #include "url-parts.hh" #include "pathlocks.hh" #include "util.hh" +#include "git-utils.hh" #include "fetch-settings.hh" @@ -69,27 +70,19 @@ std::optional readHead(const Path & path) .args = {"ls-remote", "--symref", path}, }); if (exit_code != 0) { - return std::nullopt; + return std::nullopt; } - // Matches the common case when HEAD points to a branch, e.g.: - // "ref: refs/heads/main HEAD". - const static std::regex head_ref_regex("^ref:\\s*([^\\s]+)\\s*HEAD$"); - // Matches when HEAD points directly at a commit, e.g.: - // "71abcd... HEAD". - const static std::regex head_rev_regex("^([^\\s]+)\\s*HEAD$"); - - for (const auto & line : tokenizeString>(output, "\n")) { - std::smatch match; - if (std::regex_match(line, match, head_ref_regex)) { - debug("resolved HEAD ref '%s' for repo '%s'", match[1], path); - return match[1]; - } else if (std::regex_match(line, match, head_rev_regex)) { - debug("resolved HEAD ref '%s' for repo '%s'", match[1], path); - return match[1]; - } + std::string_view line = output; + line = line.substr(0, line.find("\n")); + if (const auto ref = parseListReferenceHeadRef(line); ref) { + debug("resolved HEAD ref '%s' for repo '%s'", *ref, path); + return *ref; + } + if (const auto rev = parseListReferenceForRev("HEAD", line); rev) { + debug("resolved HEAD rev '%s' for repo '%s'", *rev, path); + return *rev; } - return std::nullopt; } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 58b6e7c04..1bdf2759f 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -4,7 +4,7 @@ #include "store-api.hh" #include "types.hh" #include "url-parts.hh" - +#include "git-utils.hh" #include "fetchers.hh" #include "fetch-settings.hh" @@ -383,35 +383,29 @@ struct SourceHutInputScheme : GitArchiveInputScheme std::string line; getline(is, line); - auto ref_index = line.find("ref: "); - if (ref_index == std::string::npos) { + auto r = parseListReferenceHeadRef(line); + if (!r) { throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref); } - - ref_uri = line.substr(ref_index+5, line.length()-1); - } else + ref_uri = *r; + } else { ref_uri = fmt("refs/(heads|tags)/%s", ref); + } auto file = store->toRealPath( downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath); std::ifstream is(file); std::string line; - std::string id; - while(getline(is, line)) { - // Append $ to avoid partial name matches - std::regex pattern(fmt("%s$", ref_uri)); - - if (std::regex_search(line, pattern)) { - id = line.substr(0, line.find('\t')); - break; - } + std::optional id; + while(!id && getline(is, line)) { + id = parseListReferenceForRev(ref_uri, line); } - if(id.empty()) + if(!id) throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref); - auto rev = Hash::parseAny(id, htSHA1); + auto rev = Hash::parseAny(*id, htSHA1); debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev()); return rev; }