From e68676e6c859815f40079b6340399d82cc1913b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Wed, 4 May 2022 14:32:21 +0200 Subject: [PATCH] Fix the parsing of the sourcehut refs file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since a26be9f3b89be2ee90c6358250b9889b37f95cf8, the same parser is used to parse the result of sourcehut’s `HEAD` endpoint (coming from [git dumb protocol]) and the output of `git ls-remote`. However, they are very slightly different (the former doesn’t specify the current reference since it’s implied to be `HEAD`). Unify both, and make the parser a bit more robust and understandable (by making it more typed and adding tests for it) [git dumb protocol]: https://git-scm.com/book/en/v2/Git-Internals-Transfer-Protocols#_the_dumb_protocol --- src/libfetchers/git-utils.cc | 27 ------------------------ src/libfetchers/git-utils.hh | 23 --------------------- src/libfetchers/git.cc | 19 +++++++++-------- src/libfetchers/github.cc | 12 ++++++----- src/libutil/git.cc | 25 ++++++++++++++++++++++ src/libutil/git.hh | 40 ++++++++++++++++++++++++++++++++++++ src/libutil/tests/git.cc | 33 +++++++++++++++++++++++++++++ 7 files changed, 116 insertions(+), 63 deletions(-) delete mode 100644 src/libfetchers/git-utils.cc delete mode 100644 src/libfetchers/git-utils.hh create mode 100644 src/libutil/git.cc create mode 100644 src/libutil/git.hh create mode 100644 src/libutil/tests/git.cc diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc deleted file mode 100644 index b2d6b7893..000000000 --- a/src/libfetchers/git-utils.cc +++ /dev/null @@ -1,27 +0,0 @@ -#include "git-utils.hh" - -#include - -std::optional parseListReferenceHeadRef(std::string_view line) -{ - const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$"); - std::match_results match; - if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) { - return match[1]; - } else { - return std::nullopt; - } -} - -std::optional parseListReferenceForRev(std::string_view rev, std::string_view line) -{ - const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$"); - std::match_results match; - if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) { - return std::nullopt; - } - if (rev != match[2].str()) { - return std::nullopt; - } - return match[1]; -} diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh deleted file mode 100644 index 946a68a9e..000000000 --- a/src/libfetchers/git-utils.hh +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include -#include -#include - -// Parses the HEAD ref as reported by `git ls-remote --symref` -// -// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if -// ls-remote returns the output below, "main" is returned based on the ref line. -// -// ref: refs/heads/main HEAD -// -// If the repository is in 'detached head' state (HEAD is pointing to a rev -// instead of a branch), parseListReferenceForRev("HEAD") may be used instead. -std::optional parseListReferenceHeadRef(std::string_view line); - -// Parses a reference line from `git ls-remote --symref`, e.g., -// parseListReferenceForRev("refs/heads/master", line) will return 6926... -// given the line below. -// -// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master -std::optional parseListReferenceForRev(std::string_view rev, std::string_view line); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 266246fe9..d23a820a4 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -6,7 +6,7 @@ #include "url-parts.hh" #include "pathlocks.hh" #include "util.hh" -#include "git-utils.hh" +#include "git.hh" #include "fetch-settings.hh" @@ -72,13 +72,16 @@ std::optional readHead(const Path & path) std::string_view line = output; line = line.substr(0, line.find("\n")); - if (const auto ref = parseListReferenceHeadRef(line); ref) { - debug("resolved HEAD ref '%s' for repo '%s'", *ref, path); - return *ref; - } - if (const auto rev = parseListReferenceForRev("HEAD", line); rev) { - debug("resolved HEAD rev '%s' for repo '%s'", *rev, path); - return *rev; + if (const auto parseResult = git::parseLsRemoteLine(line)) { + switch (parseResult->kind) { + case git::LsRemoteRefLine::Kind::Symbolic: + debug("resolved HEAD ref '%s' for repo '%s'", parseResult->target, path); + break; + case git::LsRemoteRefLine::Kind::Object: + debug("resolved HEAD rev '%s' for repo '%s'", parseResult->target, path); + break; + } + return parseResult->target; } return std::nullopt; } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 1bdf2759f..a1084c984 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -4,7 +4,7 @@ #include "store-api.hh" #include "types.hh" #include "url-parts.hh" -#include "git-utils.hh" +#include "git.hh" #include "fetchers.hh" #include "fetch-settings.hh" @@ -383,11 +383,11 @@ struct SourceHutInputScheme : GitArchiveInputScheme std::string line; getline(is, line); - auto r = parseListReferenceHeadRef(line); - if (!r) { + auto remoteLine = git::parseLsRemoteLine(line); + if (!remoteLine) { throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref); } - ref_uri = *r; + ref_uri = remoteLine->target; } else { ref_uri = fmt("refs/(heads|tags)/%s", ref); } @@ -399,7 +399,9 @@ struct SourceHutInputScheme : GitArchiveInputScheme std::string line; std::optional id; while(!id && getline(is, line)) { - id = parseListReferenceForRev(ref_uri, line); + auto parsedLine = git::parseLsRemoteLine(line); + if (parsedLine && parsedLine->reference == ref_uri) + id = parsedLine->target; } if(!id) diff --git a/src/libutil/git.cc b/src/libutil/git.cc new file mode 100644 index 000000000..f35c2fdb7 --- /dev/null +++ b/src/libutil/git.cc @@ -0,0 +1,25 @@ +#include "git.hh" + +#include + +namespace nix { +namespace git { + +std::optional parseLsRemoteLine(std::string_view line) +{ + const static std::regex line_regex("^(ref: *)?([^\\s]+)(?:\\t+(.*))?$"); + std::match_results match; + if (!std::regex_match(line.cbegin(), line.cend(), match, line_regex)) + return std::nullopt; + + return LsRemoteRefLine { + .kind = match[1].length() == 0 + ? LsRemoteRefLine::Kind::Object + : LsRemoteRefLine::Kind::Symbolic, + .target = match[2], + .reference = match[3].length() == 0 ? std::nullopt : std::optional{ match[3] } + }; +} + +} +} diff --git a/src/libutil/git.hh b/src/libutil/git.hh new file mode 100644 index 000000000..cb13ef0e5 --- /dev/null +++ b/src/libutil/git.hh @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +namespace nix { + +namespace git { + +// A line from the output of `git ls-remote --symref`. +// +// These can be of two kinds: +// +// - Symbolic references of the form +// +// ref: {target} {reference} +// +// where {target} is itself a reference and {reference} is optional +// +// - Object references of the form +// +// {target} {reference} +// +// where {target} is a commit id and {reference} is mandatory +struct LsRemoteRefLine { + enum struct Kind { + Symbolic, + Object + }; + Kind kind; + std::string target; + std::optional reference; +}; + +std::optional parseLsRemoteLine(std::string_view line); + +} + +} diff --git a/src/libutil/tests/git.cc b/src/libutil/tests/git.cc new file mode 100644 index 000000000..5b5715fc2 --- /dev/null +++ b/src/libutil/tests/git.cc @@ -0,0 +1,33 @@ +#include "git.hh" +#include + +namespace nix { + + TEST(GitLsRemote, parseSymrefLineWithReference) { + auto line = "ref: refs/head/main HEAD"; + auto res = git::parseLsRemoteLine(line); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res->kind, git::LsRemoteRefLine::Kind::Symbolic); + ASSERT_EQ(res->target, "refs/head/main"); + ASSERT_EQ(res->reference, "HEAD"); + } + + TEST(GitLsRemote, parseSymrefLineWithNoReference) { + auto line = "ref: refs/head/main"; + auto res = git::parseLsRemoteLine(line); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res->kind, git::LsRemoteRefLine::Kind::Symbolic); + ASSERT_EQ(res->target, "refs/head/main"); + ASSERT_EQ(res->reference, std::nullopt); + } + + TEST(GitLsRemote, parseObjectRefLine) { + auto line = "abc123 refs/head/main"; + auto res = git::parseLsRemoteLine(line); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res->kind, git::LsRemoteRefLine::Kind::Object); + ASSERT_EQ(res->target, "abc123"); + ASSERT_EQ(res->reference, "refs/head/main"); + } +} +