Merge pull request #4638 from orbekk/read_head

Resolve reference for remote git repositories (makes fetchGit work with non-'master' branch)
This commit is contained in:
Eelco Dolstra 2022-05-02 13:31:30 +02:00 committed by GitHub
commit a26be9f3b8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 326 additions and 130 deletions

View file

@ -0,0 +1,25 @@
#include "git-utils.hh"
#include <regex>
std::optional<std::string> parseListReferenceHeadRef(std::string_view line) {
const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$");
std::match_results<std::string_view::const_iterator> match;
if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) {
return match[1];
} else {
return std::nullopt;
}
}
std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line) {
const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$");
std::match_results<std::string_view::const_iterator> match;
if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) {
return std::nullopt;
}
if (rev != match[2].str()) {
return std::nullopt;
}
return match[1];
}

View file

@ -0,0 +1,23 @@
#pragma once
#include <string>
#include <string_view>
#include <optional>
// Parses the HEAD ref as reported by `git ls-remote --symref`
//
// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if
// ls-remote returns the output below, "main" is returned based on the ref line.
//
// ref: refs/heads/main HEAD
//
// If the repository is in 'detached head' state (HEAD is pointing to a rev
// instead of a branch), parseListReferenceForRev("HEAD") may be used instead.
std::optional<std::string> parseListReferenceHeadRef(std::string_view line);
// Parses a reference line from `git ls-remote --symref`, e.g.,
// parseListReferenceForRev("refs/heads/master", line) will return 6926...
// given the line below.
//
// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master
std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line);

View file

@ -5,15 +5,20 @@
#include "store-api.hh"
#include "url-parts.hh"
#include "pathlocks.hh"
#include "util.hh"
#include "git-utils.hh"
#include "fetch-settings.hh"
#include <regex>
#include <string.h>
#include <sys/time.h>
#include <sys/wait.h>
using namespace std::string_literals;
namespace nix::fetchers {
namespace {
// Explicit initial branch of our bare repo to suppress warnings from new version of git.
// The value itself does not matter, since we always fetch a specific revision or branch.
@ -21,7 +26,7 @@ namespace nix::fetchers {
// old version of git, which will ignore unrecognized `-c` options.
const std::string gitInitialBranch = "__nix_dummy_branch";
static std::string getGitDir()
std::string getGitDir()
{
auto gitDir = getEnv("GIT_DIR");
if (!gitDir) {
@ -30,16 +35,222 @@ static std::string getGitDir()
return *gitDir;
}
static std::string readHead(const Path & path)
{
return chomp(runProgram("git", true, { "-C", path, "--git-dir", ".git", "rev-parse", "--abbrev-ref", "HEAD" }));
bool isCacheFileWithinTtl(const time_t now, const struct stat& st) {
return st.st_mtime + settings.tarballTtl > now;
}
static bool isNotDotGitDirectory(const Path & path)
bool touchCacheFile(const Path& path, const time_t& touch_time)
{
struct timeval times[2];
times[0].tv_sec = touch_time;
times[0].tv_usec = 0;
times[1].tv_sec = touch_time;
times[1].tv_usec = 0;
return lutimes(path.c_str(), times) == 0;
}
Path getCachePath(std::string key)
{
return getCacheDir() + "/nix/gitv3/" +
hashString(htSHA256, key).to_string(Base32, false);
}
// Returns the name of the HEAD branch.
//
// Returns the head branch name as reported by git ls-remote --symref, e.g., if
// ls-remote returns the output below, "main" is returned based on the ref line.
//
// ref: refs/heads/main HEAD
// ...
std::optional<std::string> readHead(const Path & path)
{
auto [exit_code, output] = runProgram(RunOptions {
.program = "git",
.args = {"ls-remote", "--symref", path},
});
if (exit_code != 0) {
return std::nullopt;
}
std::string_view line = output;
line = line.substr(0, line.find("\n"));
if (const auto ref = parseListReferenceHeadRef(line); ref) {
debug("resolved HEAD ref '%s' for repo '%s'", *ref, path);
return *ref;
}
if (const auto rev = parseListReferenceForRev("HEAD", line); rev) {
debug("resolved HEAD rev '%s' for repo '%s'", *rev, path);
return *rev;
}
return std::nullopt;
}
// Persist the HEAD ref from the remote repo in the local cached repo.
bool storeCachedHead(const std::string& actualUrl, const std::string& headRef)
{
Path cacheDir = getCachePath(actualUrl);
try {
runProgram("git", true, { "-C", cacheDir, "symbolic-ref", "--", "HEAD", headRef });
} catch (ExecError &e) {
if (!WIFEXITED(e.status)) throw;
return false;
}
/* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */
return true;
}
std::optional<std::string> readHeadCached(const std::string& actualUrl)
{
// Create a cache path to store the branch of the HEAD ref. Append something
// in front of the URL to prevent collision with the repository itself.
Path cacheDir = getCachePath(actualUrl);
Path headRefFile = cacheDir + "/HEAD";
time_t now = time(0);
struct stat st;
std::optional<std::string> cachedRef;
if (stat(headRefFile.c_str(), &st) == 0) {
cachedRef = readHead(cacheDir);
if (cachedRef != std::nullopt &&
*cachedRef != gitInitialBranch &&
isCacheFileWithinTtl(now, st)) {
debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl);
return cachedRef;
}
}
auto ref = readHead(actualUrl);
if (ref) {
return ref;
}
if (cachedRef) {
// If the cached git ref is expired in fetch() below, and the 'git fetch'
// fails, it falls back to continuing with the most recent version.
// This function must behave the same way, so we return the expired
// cached ref here.
warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef);
return *cachedRef;
}
return std::nullopt;
}
bool isNotDotGitDirectory(const Path & path)
{
return baseNameOf(path) != ".git";
}
struct WorkdirInfo
{
bool clean = false;
bool hasHead = false;
};
// Returns whether a git workdir is clean and has commits.
WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir)
{
const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
auto gitDir = getGitDir();
auto env = getEnv();
// Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong
// that way unknown errors can lead to a failure instead of continuing through the wrong code path
env["LC_ALL"] = "C";
/* Check whether HEAD points to something that looks like a commit,
since that is the refrence we want to use later on. */
auto result = runProgram(RunOptions {
.program = "git",
.args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" },
.environment = env,
.mergeStderrToStdout = true
});
auto exitCode = WEXITSTATUS(result.first);
auto errorMessage = result.second;
if (errorMessage.find("fatal: not a git repository") != std::string::npos) {
throw Error("'%s' is not a Git repository", workdir);
} else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) {
// indicates that the repo does not have any commits
// we want to proceed and will consider it dirty later
} else if (exitCode != 0) {
// any other errors should lead to a failure
throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage);
}
bool clean = false;
bool hasHead = exitCode == 0;
try {
if (hasHead) {
// Using git diff is preferrable over lower-level operations here,
// because its conceptually simpler and we only need the exit code anyways.
auto gitDiffOpts = Strings({ "-C", workdir, "diff", "HEAD", "--quiet"});
if (!submodules) {
// Changes in submodules should only make the tree dirty
// when those submodules will be copied as well.
gitDiffOpts.emplace_back("--ignore-submodules");
}
gitDiffOpts.emplace_back("--");
runProgram("git", true, gitDiffOpts);
clean = true;
}
} catch (ExecError & e) {
if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
}
return WorkdirInfo { .clean = clean, .hasHead = hasHead };
}
std::pair<StorePath, Input> fetchFromWorkdir(ref<Store> store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo)
{
const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
if (!fetchSettings.allowDirty)
throw Error("Git tree '%s' is dirty", workdir);
if (fetchSettings.warnDirty)
warn("Git tree '%s' is dirty", workdir);
auto gitOpts = Strings({ "-C", workdir, "ls-files", "-z" });
if (submodules)
gitOpts.emplace_back("--recurse-submodules");
auto files = tokenizeString<std::set<std::string>>(
runProgram("git", true, gitOpts), "\0"s);
Path actualPath(absPath(workdir));
PathFilter filter = [&](const Path & p) -> bool {
assert(hasPrefix(p, actualPath));
std::string file(p, actualPath.size() + 1);
auto st = lstat(p);
if (S_ISDIR(st.st_mode)) {
auto prefix = file + "/";
auto i = files.lower_bound(prefix);
return i != files.end() && hasPrefix(*i, prefix);
}
return files.count(file);
};
auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter);
// FIXME: maybe we should use the timestamp of the last
// modified dirty file?
input.attrs.insert_or_assign(
"lastModified",
workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
return {std::move(storePath), input};
}
} // end namespace
struct GitInputScheme : InputScheme
{
std::optional<Input> inputFromURL(const ParsedURL & url) override
@ -234,106 +445,16 @@ struct GitInputScheme : InputScheme
auto [isLocal, actualUrl_] = getActualUrl(input);
auto actualUrl = actualUrl_; // work around clang bug
// If this is a local directory and no ref or revision is
// given, then allow the use of an unclean working tree.
/* If this is a local directory and no ref or revision is given,
allow fetching directly from a dirty workdir. */
if (!input.getRef() && !input.getRev() && isLocal) {
bool clean = false;
auto env = getEnv();
// Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong
// that way unknown errors can lead to a failure instead of continuing through the wrong code path
env["LC_ALL"] = "C";
/* Check whether HEAD points to something that looks like a commit,
since that is the refrence we want to use later on. */
auto result = runProgram(RunOptions {
.program = "git",
.args = { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" },
.environment = env,
.mergeStderrToStdout = true
});
auto exitCode = WEXITSTATUS(result.first);
auto errorMessage = result.second;
if (errorMessage.find("fatal: not a git repository") != std::string::npos) {
throw Error("'%s' is not a Git repository", actualUrl);
} else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) {
// indicates that the repo does not have any commits
// we want to proceed and will consider it dirty later
} else if (exitCode != 0) {
// any other errors should lead to a failure
throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", actualUrl, exitCode, errorMessage);
}
bool hasHead = exitCode == 0;
try {
if (hasHead) {
// Using git diff is preferrable over lower-level operations here,
// because its conceptually simpler and we only need the exit code anyways.
auto gitDiffOpts = Strings({ "-C", actualUrl, "--git-dir", gitDir, "diff", "HEAD", "--quiet"});
if (!submodules) {
// Changes in submodules should only make the tree dirty
// when those submodules will be copied as well.
gitDiffOpts.emplace_back("--ignore-submodules");
}
gitDiffOpts.emplace_back("--");
runProgram("git", true, gitDiffOpts);
clean = true;
}
} catch (ExecError & e) {
if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
}
if (!clean) {
/* This is an unclean working tree. So copy all tracked files. */
if (!fetchSettings.allowDirty)
throw Error("Git tree '%s' is dirty", actualUrl);
if (fetchSettings.warnDirty)
warn("Git tree '%s' is dirty", actualUrl);
auto gitOpts = Strings({ "-C", actualUrl, "--git-dir", gitDir, "ls-files", "-z" });
if (submodules)
gitOpts.emplace_back("--recurse-submodules");
auto files = tokenizeString<std::set<std::string>>(
runProgram("git", true, gitOpts), "\0"s);
Path actualPath(absPath(actualUrl));
PathFilter filter = [&](const Path & p) -> bool {
assert(hasPrefix(p, actualPath));
std::string file(p, actualPath.size() + 1);
auto st = lstat(p);
if (S_ISDIR(st.st_mode)) {
auto prefix = file + "/";
auto i = files.lower_bound(prefix);
return i != files.end() && hasPrefix(*i, prefix);
}
return files.count(file);
};
auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter);
// FIXME: maybe we should use the timestamp of the last
// modified dirty file?
input.attrs.insert_or_assign(
"lastModified",
hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
return {std::move(storePath), input};
auto workdirInfo = getWorkdirInfo(input, actualUrl);
if (!workdirInfo.clean) {
return fetchFromWorkdir(store, input, actualUrl, workdirInfo);
}
}
if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master");
Attrs unlockedAttrs({
const Attrs unlockedAttrs({
{"type", cacheType},
{"name", name},
{"url", actualUrl},
@ -343,14 +464,30 @@ struct GitInputScheme : InputScheme
Path repoDir;
if (isLocal) {
if (!input.getRef()) {
auto head = readHead(actualUrl);
if (!head) {
warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl);
head = "master";
}
input.attrs.insert_or_assign("ref", *head);
}
if (!input.getRev())
input.attrs.insert_or_assign("rev",
Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev());
repoDir = actualUrl;
} else {
const bool useHeadRef = !input.getRef();
if (useHeadRef) {
auto head = readHeadCached(actualUrl);
if (!head) {
warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl);
head = "master";
}
input.attrs.insert_or_assign("ref", *head);
}
if (auto res = getCache()->lookup(store, unlockedAttrs)) {
auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1);
@ -360,7 +497,7 @@ struct GitInputScheme : InputScheme
}
}
Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false);
Path cacheDir = getCachePath(actualUrl);
repoDir = cacheDir;
gitDir = ".";
@ -400,7 +537,7 @@ struct GitInputScheme : InputScheme
git fetch to update the local ref to the remote ref. */
struct stat st;
doFetch = stat(localRefFile.c_str(), &st) != 0 ||
(uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now;
!isCacheFileWithinTtl(now, st);
}
}
@ -424,13 +561,10 @@ struct GitInputScheme : InputScheme
warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl);
}
struct timeval times[2];
times[0].tv_sec = now;
times[0].tv_usec = 0;
times[1].tv_sec = now;
times[1].tv_usec = 0;
utimes(localRefFile.c_str(), times);
if (!touchCacheFile(localRefFile, now))
warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno));
if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef()))
warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl);
}
if (!input.getRev())

View file

@ -4,7 +4,7 @@
#include "store-api.hh"
#include "types.hh"
#include "url-parts.hh"
#include "git-utils.hh"
#include "fetchers.hh"
#include "fetch-settings.hh"
@ -383,35 +383,29 @@ struct SourceHutInputScheme : GitArchiveInputScheme
std::string line;
getline(is, line);
auto ref_index = line.find("ref: ");
if (ref_index == std::string::npos) {
auto r = parseListReferenceHeadRef(line);
if (!r) {
throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref);
}
ref_uri = line.substr(ref_index+5, line.length()-1);
} else
ref_uri = *r;
} else {
ref_uri = fmt("refs/(heads|tags)/%s", ref);
}
auto file = store->toRealPath(
downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath);
std::ifstream is(file);
std::string line;
std::string id;
while(getline(is, line)) {
// Append $ to avoid partial name matches
std::regex pattern(fmt("%s$", ref_uri));
if (std::regex_search(line, pattern)) {
id = line.substr(0, line.find('\t'));
break;
}
std::optional<std::string> id;
while(!id && getline(is, line)) {
id = parseListReferenceForRev(ref_uri, line);
}
if(id.empty())
if(!id)
throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref);
auto rev = Hash::parseAny(id, htSHA1);
auto rev = Hash::parseAny(*id, htSHA1);
debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev());
return rev;
}

View file

@ -153,7 +153,7 @@ Currently the `type` attribute can be one of the following:
git(+http|+https|+ssh|+git|+file|):(//<server>)?<path>(\?<params>)?
```
The `ref` attribute defaults to `master`.
The `ref` attribute defaults to resolving the `HEAD` reference.
The `rev` attribute must denote a commit that exists in the branch
or tag specified by the `ref` attribute, since Nix doesn't do a full
@ -161,6 +161,11 @@ Currently the `type` attribute can be one of the following:
doesn't allow fetching a `rev` without a known `ref`). The default
is the commit currently pointed to by `ref`.
When `git+file` is used without specifying `ref` or `rev`, files are
fetched directly from the local `path` as long as they have been added
to the Git repository. If there are uncommitted changes, the reference
is treated as dirty and a warning is printed.
For example, the following are valid Git flake references:
* `git+https://example.org/my/repo`

View file

@ -161,6 +161,14 @@ path4=$(nix eval --impure --raw --expr "(builtins.fetchGit $repo).outPath")
[[ $(cat $path4/hello) = dev ]]
[[ $path3 = $path4 ]]
# Using remote path with branch other than 'master' should fetch the HEAD revision.
# (--tarball-ttl 0 to prevent using the cached repo above)
export _NIX_FORCE_HTTP=1
path4=$(nix eval --tarball-ttl 0 --impure --raw --expr "(builtins.fetchGit $repo).outPath")
[[ $(cat $path4/hello) = dev ]]
[[ $path3 = $path4 ]]
unset _NIX_FORCE_HTTP
# Confirm same as 'dev' branch
path5=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; ref = \"dev\"; }).outPath")
[[ $path3 = $path5 ]]

View file

@ -31,7 +31,14 @@ flakeFollowsE=$TEST_ROOT/follows/flakeA/flakeE
for repo in $flake1Dir $flake2Dir $flake3Dir $flake7Dir $templatesDir $nonFlakeDir $flakeA $flakeB $flakeFollowsA; do
rm -rf $repo $repo.tmp
mkdir -p $repo
git -C $repo init
# Give one repo a non-master initial branch.
extraArgs=
if [[ $repo == $flake2Dir ]]; then
extraArgs="--initial-branch=main"
fi
git -C $repo init $extraArgs
git -C $repo config user.email "foobar@example.com"
git -C $repo config user.name "Foobar"
done