Fix the parsing of the sourcehut refs file

Since a26be9f3b8, the same parser is used
to parse the result of sourcehut’s `HEAD` endpoint (coming from [git
dumb protocol]) and the output of `git ls-remote`. However, they are very
slightly different (the former doesn’t specify the current reference
since it’s implied to be `HEAD`).

Unify both, and make the parser a bit more robust and understandable (by
making it more typed and adding tests for it)

[git dumb protocol]: https://git-scm.com/book/en/v2/Git-Internals-Transfer-Protocols#_the_dumb_protocol
This commit is contained in:
Théophane Hufschmitt 2022-05-04 14:32:21 +02:00
parent 470e27ce80
commit e68676e6c8
7 changed files with 116 additions and 63 deletions

View file

@ -1,27 +0,0 @@
#include "git-utils.hh"
#include <regex>
std::optional<std::string> parseListReferenceHeadRef(std::string_view line)
{
const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$");
std::match_results<std::string_view::const_iterator> match;
if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) {
return match[1];
} else {
return std::nullopt;
}
}
std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line)
{
const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$");
std::match_results<std::string_view::const_iterator> match;
if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) {
return std::nullopt;
}
if (rev != match[2].str()) {
return std::nullopt;
}
return match[1];
}

View file

@ -1,23 +0,0 @@
#pragma once
#include <string>
#include <string_view>
#include <optional>
// Parses the HEAD ref as reported by `git ls-remote --symref`
//
// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if
// ls-remote returns the output below, "main" is returned based on the ref line.
//
// ref: refs/heads/main HEAD
//
// If the repository is in 'detached head' state (HEAD is pointing to a rev
// instead of a branch), parseListReferenceForRev("HEAD") may be used instead.
std::optional<std::string> parseListReferenceHeadRef(std::string_view line);
// Parses a reference line from `git ls-remote --symref`, e.g.,
// parseListReferenceForRev("refs/heads/master", line) will return 6926...
// given the line below.
//
// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master
std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line);

View file

@ -6,7 +6,7 @@
#include "url-parts.hh"
#include "pathlocks.hh"
#include "util.hh"
#include "git-utils.hh"
#include "git.hh"
#include "fetch-settings.hh"
@ -72,13 +72,16 @@ std::optional<std::string> readHead(const Path & path)
std::string_view line = output;
line = line.substr(0, line.find("\n"));
if (const auto ref = parseListReferenceHeadRef(line); ref) {
debug("resolved HEAD ref '%s' for repo '%s'", *ref, path);
return *ref;
if (const auto parseResult = git::parseLsRemoteLine(line)) {
switch (parseResult->kind) {
case git::LsRemoteRefLine::Kind::Symbolic:
debug("resolved HEAD ref '%s' for repo '%s'", parseResult->target, path);
break;
case git::LsRemoteRefLine::Kind::Object:
debug("resolved HEAD rev '%s' for repo '%s'", parseResult->target, path);
break;
}
if (const auto rev = parseListReferenceForRev("HEAD", line); rev) {
debug("resolved HEAD rev '%s' for repo '%s'", *rev, path);
return *rev;
return parseResult->target;
}
return std::nullopt;
}

View file

@ -4,7 +4,7 @@
#include "store-api.hh"
#include "types.hh"
#include "url-parts.hh"
#include "git-utils.hh"
#include "git.hh"
#include "fetchers.hh"
#include "fetch-settings.hh"
@ -383,11 +383,11 @@ struct SourceHutInputScheme : GitArchiveInputScheme
std::string line;
getline(is, line);
auto r = parseListReferenceHeadRef(line);
if (!r) {
auto remoteLine = git::parseLsRemoteLine(line);
if (!remoteLine) {
throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref);
}
ref_uri = *r;
ref_uri = remoteLine->target;
} else {
ref_uri = fmt("refs/(heads|tags)/%s", ref);
}
@ -399,7 +399,9 @@ struct SourceHutInputScheme : GitArchiveInputScheme
std::string line;
std::optional<std::string> id;
while(!id && getline(is, line)) {
id = parseListReferenceForRev(ref_uri, line);
auto parsedLine = git::parseLsRemoteLine(line);
if (parsedLine && parsedLine->reference == ref_uri)
id = parsedLine->target;
}
if(!id)

25
src/libutil/git.cc Normal file
View file

@ -0,0 +1,25 @@
#include "git.hh"
#include <regex>
namespace nix {
namespace git {
std::optional<LsRemoteRefLine> parseLsRemoteLine(std::string_view line)
{
const static std::regex line_regex("^(ref: *)?([^\\s]+)(?:\\t+(.*))?$");
std::match_results<std::string_view::const_iterator> match;
if (!std::regex_match(line.cbegin(), line.cend(), match, line_regex))
return std::nullopt;
return LsRemoteRefLine {
.kind = match[1].length() == 0
? LsRemoteRefLine::Kind::Object
: LsRemoteRefLine::Kind::Symbolic,
.target = match[2],
.reference = match[3].length() == 0 ? std::nullopt : std::optional<std::string>{ match[3] }
};
}
}
}

40
src/libutil/git.hh Normal file
View file

@ -0,0 +1,40 @@
#pragma once
#include <string>
#include <string_view>
#include <optional>
namespace nix {
namespace git {
// A line from the output of `git ls-remote --symref`.
//
// These can be of two kinds:
//
// - Symbolic references of the form
//
// ref: {target} {reference}
//
// where {target} is itself a reference and {reference} is optional
//
// - Object references of the form
//
// {target} {reference}
//
// where {target} is a commit id and {reference} is mandatory
struct LsRemoteRefLine {
enum struct Kind {
Symbolic,
Object
};
Kind kind;
std::string target;
std::optional<std::string> reference;
};
std::optional<LsRemoteRefLine> parseLsRemoteLine(std::string_view line);
}
}

33
src/libutil/tests/git.cc Normal file
View file

@ -0,0 +1,33 @@
#include "git.hh"
#include <gtest/gtest.h>
namespace nix {
TEST(GitLsRemote, parseSymrefLineWithReference) {
auto line = "ref: refs/head/main HEAD";
auto res = git::parseLsRemoteLine(line);
ASSERT_TRUE(res.has_value());
ASSERT_EQ(res->kind, git::LsRemoteRefLine::Kind::Symbolic);
ASSERT_EQ(res->target, "refs/head/main");
ASSERT_EQ(res->reference, "HEAD");
}
TEST(GitLsRemote, parseSymrefLineWithNoReference) {
auto line = "ref: refs/head/main";
auto res = git::parseLsRemoteLine(line);
ASSERT_TRUE(res.has_value());
ASSERT_EQ(res->kind, git::LsRemoteRefLine::Kind::Symbolic);
ASSERT_EQ(res->target, "refs/head/main");
ASSERT_EQ(res->reference, std::nullopt);
}
TEST(GitLsRemote, parseObjectRefLine) {
auto line = "abc123 refs/head/main";
auto res = git::parseLsRemoteLine(line);
ASSERT_TRUE(res.has_value());
ASSERT_EQ(res->kind, git::LsRemoteRefLine::Kind::Object);
ASSERT_EQ(res->target, "abc123");
ASSERT_EQ(res->reference, "refs/head/main");
}
}