lix/src/libfetchers/git.cc

736 lines
28 KiB
C++
Raw Normal View History

#include "fetchers.hh"
#include "cache.hh"
#include "globals.hh"
#include "processes.hh"
#include "tarfile.hh"
#include "store-api.hh"
#include "url-parts.hh"
2021-08-02 11:39:48 +00:00
#include "pathlocks.hh"
#include "users.hh"
#include "git.hh"
#include "logging.hh"
#include "finally.hh"
#include "fetch-settings.hh"
#include <regex>
#include <string.h>
#include <sys/time.h>
#include <sys/wait.h>
using namespace std::string_literals;
namespace nix::fetchers {
namespace {
// Explicit initial branch of our bare repo to suppress warnings from new version of git.
// The value itself does not matter, since we always fetch a specific revision or branch.
// It is set with `-c init.defaultBranch=` instead of `--initial-branch=` to stay compatible with
// old version of git, which will ignore unrecognized `-c` options.
const std::string gitInitialBranch = "__nix_dummy_branch";
bool isCacheFileWithinTtl(time_t now, const struct stat & st)
2022-05-02 11:37:53 +00:00
{
return st.st_mtime + settings.tarballTtl > now;
}
bool touchCacheFile(const Path & path, time_t touch_time)
{
struct timeval times[2];
times[0].tv_sec = touch_time;
times[0].tv_usec = 0;
times[1].tv_sec = touch_time;
times[1].tv_usec = 0;
return lutimes(path.c_str(), times) == 0;
}
Path getCachePath(std::string_view key)
{
return getCacheDir() + "/nix/gitv3/" +
hashString(htSHA256, key).to_string(Base32, false);
}
// Returns the name of the HEAD branch.
//
// Returns the head branch name as reported by git ls-remote --symref, e.g., if
// ls-remote returns the output below, "main" is returned based on the ref line.
//
// ref: refs/heads/main HEAD
// ...
std::optional<std::string> readHead(const Path & path)
{
2023-05-18 10:18:34 +00:00
auto [status, output] = runProgram(RunOptions {
.program = "git",
// FIXME: use 'HEAD' to avoid returning all refs
.args = {"ls-remote", "--symref", path},
2023-05-18 10:18:34 +00:00
.isInteractive = true,
});
if (status != 0) return std::nullopt;
std::string_view line = output;
line = line.substr(0, line.find("\n"));
if (const auto parseResult = git::parseLsRemoteLine(line)) {
switch (parseResult->kind) {
case git::LsRemoteRefLine::Kind::Symbolic:
debug("resolved HEAD ref '%s' for repo '%s'", parseResult->target, path);
break;
case git::LsRemoteRefLine::Kind::Object:
debug("resolved HEAD rev '%s' for repo '%s'", parseResult->target, path);
break;
}
return parseResult->target;
}
return std::nullopt;
}
// Persist the HEAD ref from the remote repo in the local cached repo.
bool storeCachedHead(const std::string & actualUrl, const std::string & headRef)
{
Path cacheDir = getCachePath(actualUrl);
try {
runProgram("git", true, { "-C", cacheDir, "--git-dir", ".", "symbolic-ref", "--", "HEAD", headRef });
} catch (ExecError &e) {
if (!WIFEXITED(e.status)) throw;
return false;
}
/* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */
return true;
}
std::optional<std::string> readHeadCached(const std::string & actualUrl)
2020-02-04 20:55:57 +00:00
{
// Create a cache path to store the branch of the HEAD ref. Append something
// in front of the URL to prevent collision with the repository itself.
Path cacheDir = getCachePath(actualUrl);
Path headRefFile = cacheDir + "/HEAD";
time_t now = time(0);
struct stat st;
std::optional<std::string> cachedRef;
if (stat(headRefFile.c_str(), &st) == 0) {
cachedRef = readHead(cacheDir);
if (cachedRef != std::nullopt &&
*cachedRef != gitInitialBranch &&
isCacheFileWithinTtl(now, st))
{
debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl);
return cachedRef;
}
}
auto ref = readHead(actualUrl);
if (ref) return ref;
if (cachedRef) {
// If the cached git ref is expired in fetch() below, and the 'git fetch'
// fails, it falls back to continuing with the most recent version.
// This function must behave the same way, so we return the expired
// cached ref here.
warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef);
return *cachedRef;
}
return std::nullopt;
2020-02-04 20:55:57 +00:00
}
bool isNotDotGitDirectory(const Path & path)
{
return baseNameOf(path) != ".git";
}
struct WorkdirInfo
{
bool clean = false;
bool hasHead = false;
};
// Returns whether a git workdir is clean and has commits.
WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir)
{
const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
std::string gitDir(".git");
auto env = getEnv();
// Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong
// that way unknown errors can lead to a failure instead of continuing through the wrong code path
env["LC_ALL"] = "C";
/* Check whether HEAD points to something that looks like a commit,
since that is the refrence we want to use later on. */
auto result = runProgram(RunOptions {
.program = "git",
.args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" },
.environment = env,
.mergeStderrToStdout = true
});
auto exitCode = WEXITSTATUS(result.first);
auto errorMessage = result.second;
if (errorMessage.find("fatal: not a git repository") != std::string::npos) {
throw Error("'%s' is not a Git repository", workdir);
} else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) {
// indicates that the repo does not have any commits
// we want to proceed and will consider it dirty later
} else if (exitCode != 0) {
// any other errors should lead to a failure
throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage);
}
bool clean = false;
bool hasHead = exitCode == 0;
try {
if (hasHead) {
// Using git diff is preferrable over lower-level operations here,
// because its conceptually simpler and we only need the exit code anyways.
auto gitDiffOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "diff", "HEAD", "--quiet"});
if (!submodules) {
// Changes in submodules should only make the tree dirty
// when those submodules will be copied as well.
gitDiffOpts.emplace_back("--ignore-submodules");
}
gitDiffOpts.emplace_back("--");
runProgram("git", true, gitDiffOpts);
clean = true;
}
} catch (ExecError & e) {
if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
}
return WorkdirInfo { .clean = clean, .hasHead = hasHead };
}
std::pair<StorePath, Input> fetchFromWorkdir(ref<Store> store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo)
{
const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
auto gitDir = ".git";
if (!fetchSettings.allowDirty)
throw Error("Git tree '%s' is dirty", workdir);
if (fetchSettings.warnDirty)
warn("Git tree '%s' is dirty", workdir);
auto gitOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "ls-files", "-z" });
if (submodules)
gitOpts.emplace_back("--recurse-submodules");
auto files = tokenizeString<std::set<std::string>>(
runProgram("git", true, gitOpts), "\0"s);
Path actualPath(absPath(workdir));
PathFilter filter = [&](const Path & p) -> bool {
assert(p.starts_with(actualPath));
std::string file(p, actualPath.size() + 1);
auto st = lstat(p);
if (S_ISDIR(st.st_mode)) {
auto prefix = file + "/";
auto i = files.lower_bound(prefix);
return (i != files.end() && (*i).starts_with(prefix)) || files.count(file);
}
return files.count(file);
};
auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter);
// FIXME: maybe we should use the timestamp of the last
// modified dirty file?
input.attrs.insert_or_assign(
"lastModified",
workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
if (workdirInfo.hasHead) {
input.attrs.insert_or_assign("dirtyRev", chomp(
runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "rev-parse", "--verify", "HEAD" })) + "-dirty");
input.attrs.insert_or_assign("dirtyShortRev", chomp(
runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "rev-parse", "--verify", "--short", "HEAD" })) + "-dirty");
}
return {std::move(storePath), input};
}
} // end namespace
struct GitInputScheme : InputScheme
{
std::optional<Input> inputFromURL(const ParsedURL & url, bool requireTree) const override
{
if (url.scheme != "git" &&
url.scheme != "git+http" &&
url.scheme != "git+https" &&
url.scheme != "git+ssh" &&
url.scheme != "git+file") return {};
auto url2(url);
if (url2.scheme.starts_with("git+")) url2.scheme = std::string(url2.scheme, 4);
url2.query.clear();
Attrs attrs;
attrs.emplace("type", "git");
Revert "libfetchers: make attribute / URL query handling consistent" This reverts commit 35eec921af1043fc6322edc0ad88c872d41623b8. Reason for revert: Regressed nix-eval-jobs, and it appears to be this change is buggy/missing a case. It just needs another pass. Code causing the problem in n-e-j, when invoked with `nix-eval-jobs --flake '.#hydraJobs'`: ``` n-e-j/tests/assets » ../../build/src/nix-eval-jobs --meta --workers 1 --flake .#hydraJobs warning: unknown setting 'trusted-users' warning: `--gc-roots-dir' not specified error: unsupported Git input attribute 'dir' error: worker error: error: unsupported Git input attribute 'dir' ``` ``` nix::Value *vRoot = [&]() { if (args.flake) { auto [flakeRef, fragment, outputSpec] = nix::parseFlakeRefWithFragmentAndExtendedOutputsSpec( args.releaseExpr, nix::absPath(".")); nix::InstallableFlake flake{ {}, state, std::move(flakeRef), fragment, outputSpec, {}, {}, args.lockFlags}; return flake.toValue(*state).first; } else { return releaseExprTopLevelValue(*state, autoArgs, args); } }(); ``` Inspecting the program behaviour reveals that `dir` was in fact set in the URL going into the fetcher. This is in turn because unlike in the case changed in this commit, it was not erased before handing it to libfetchers, which is probably just a mistake. ``` (rr) up 3 0x00007ffff60262ae in nix::fetchers::Input::fromURL (url=..., requireTree=requireTree@entry=true) at src/libfetchers/fetchers.cc:39 warning: Source file is more recent than executable. 39 auto res = inputScheme->inputFromURL(url, requireTree); (rr) p url $1 = (const nix::ParsedURL &) @0x7fffdc874190: {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = {["dir"] = "tests/assets"}, fragment = ""} (rr) up 4 0x00007ffff789d904 in nix::parseFlakeRefWithFragment (url=".#hydraJobs", baseDir=std::optional<std::string> = {...}, allowMissing=allowMissing@entry=false, isFlake=isFlake@entry=true) at src/libexpr/flake/flakeref.cc:179 warning: Source file is more recent than executable. 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), (rr) p parsedURL $2 = {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = { ["dir"] = "tests/assets"}, fragment = ""} (rr) list 174 175 if (pathExists(flakeRoot + "/.git/shallow")) 176 parsedURL.query.insert_or_assign("shallow", "1"); 177 178 return std::make_pair( 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), 180 fragment); 181 } ``` Change-Id: Ib55a882eaeb3e59228857761dc1e3b2e366b0f5e
2024-06-24 22:49:17 +00:00
for (auto & [name, value] : url.query) {
if (name == "rev" || name == "ref")
attrs.emplace(name, value);
else if (name == "shallow" || name == "submodules" || name == "allRefs")
attrs.emplace(name, Explicit<bool> { value == "1" });
else
url2.query.emplace(name, value);
}
attrs.emplace("url", url2.to_string());
libfetchers: make attribute / URL query handling consistent The original idea was to fix lix#174, but for a user friendly solution, I figured that we'd need more consistency: * Invalid query params will cause an error, just like invalid attributes. This has the following two consequences: * The `?dir=`-param from flakes will be removed before the URL to be fetched is passed to libfetchers. * The tarball fetcher doesn't allow URLs with custom query params anymore. I think this was questionable anyways given that an arbitrary set of query params was silently removed from the URL you wanted to fetch. The correct way is to use an attribute-set with a key `url` that contains the tarball URL to fetch. * Same for the git & mercurial fetchers: in that case it doesn't even matter though: both fetchers added unused query params to the URL that's passed from the input scheme to the fetcher (`url2` in the code). It turns out that this was never used since the query parameters were erased again in `getActualUrl`. * Validation happens for both attributes and URLs. Previously, a lot of fetchers validated e.g. refs/revs only when specified in a URL and the validity of attribute names only in `inputFromAttrs`. Now, all the validation is done in `inputFromAttrs` and `inputFromURL` constructs attributes that will be passed to `inputFromAttrs`. * Accept all attributes as URL query parameters. That also includes lesser used ones such as `narHash`. And "output" attributes like `lastModified`: these could be declared already when declaring inputs as attribute rather than URL. Now the behavior is at least consistent. Personally, I think we should differentiate in the future between "fetched input" (basically the attr-set that ends up in the lock-file) and "unfetched input" earlier: both inputFrom{Attrs,URL} entrypoints are probably OK for unfetched inputs, but for locked/fetched inputs a custom entrypoint should be used. Then, the current entrypoints wouldn't have to allow these attributes anymore. Change-Id: I1be1992249f7af8287cfc37891ab505ddaa2e8cd
2024-05-04 10:55:10 +00:00
return inputFromAttrs(attrs);
}
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
{
if (maybeGetStrAttr(attrs, "type") != "git") return {};
for (auto & [name, value] : attrs)
if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "lastModified" && name != "revCount" && name != "narHash" && name != "allRefs" && name != "name" && name != "dirtyRev" && name != "dirtyShortRev")
throw Error("unsupported Git input attribute '%s'", name);
parseURL(getStrAttr(attrs, "url"));
maybeGetBoolAttr(attrs, "shallow");
maybeGetBoolAttr(attrs, "submodules");
maybeGetBoolAttr(attrs, "allRefs");
if (auto ref = maybeGetStrAttr(attrs, "ref")) {
if (std::regex_search(*ref, badGitRefRegex))
throw BadURL("invalid Git branch/tag name '%s'", *ref);
}
Input input;
input.attrs = attrs;
return input;
}
ParsedURL toURL(const Input & input) const override
{
auto url = parseURL(getStrAttr(input.attrs, "url"));
if (url.scheme != "git") url.scheme = "git+" + url.scheme;
if (auto rev = input.getRev()) url.query.insert_or_assign("rev", rev->gitRev());
if (auto ref = input.getRef()) url.query.insert_or_assign("ref", *ref);
if (maybeGetBoolAttr(input.attrs, "shallow").value_or(false))
url.query.insert_or_assign("shallow", "1");
return url;
}
bool hasAllInfo(const Input & input) const override
{
bool maybeDirty = !input.getRef();
bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false);
return
maybeGetIntAttr(input.attrs, "lastModified")
&& (shallow || maybeDirty || maybeGetIntAttr(input.attrs, "revCount"));
}
Input applyOverrides(
const Input & input,
std::optional<std::string> ref,
std::optional<Hash> rev) const override
{
auto res(input);
if (rev) res.attrs.insert_or_assign("rev", rev->gitRev());
if (ref) res.attrs.insert_or_assign("ref", *ref);
if (!res.getRef() && res.getRev())
throw Error("Git input '%s' has a commit hash but no branch/tag name", res.to_string());
return res;
}
void clone(const Input & input, const Path & destDir) const override
{
auto [isLocal, actualUrl] = getActualUrl(input);
Strings args = {"clone"};
args.push_back(actualUrl);
if (auto ref = input.getRef()) {
args.push_back("--branch");
args.push_back(*ref);
}
if (input.getRev()) throw UnimplementedError("cloning a specific revision is not implemented");
args.push_back(destDir);
runProgram("git", true, args, true);
}
std::optional<Path> getSourcePath(const Input & input) const override
{
auto url = parseURL(getStrAttr(input.attrs, "url"));
if (url.scheme == "file" && !input.getRef() && !input.getRev())
return url.path;
return {};
}
void putFile(
const Input & input,
const CanonPath & path,
std::string_view contents,
std::optional<std::string> commitMsg) const override
{
auto root = getSourcePath(input);
if (!root)
throw Error("cannot commit '%s' to Git repository '%s' because it's not a working tree", path, input.to_string());
writeFile((CanonPath(*root) + path).abs(), contents);
auto gitDir = ".git";
2020-02-05 13:48:49 +00:00
auto result = runProgram(RunOptions {
.program = "git",
.args = {"-C", *root, "--git-dir", gitDir, "check-ignore", "--quiet", std::string(path.rel())},
});
auto exitCode = WEXITSTATUS(result.first);
if (exitCode != 0) {
// The path is not `.gitignore`d, we can add the file.
2020-02-05 13:48:49 +00:00
runProgram("git", true,
{ "-C", *root, "--git-dir", gitDir, "add", "--intent-to-add", "--", std::string(path.rel()) });
if (commitMsg) {
auto [_fd, msgPath] = createTempFile("nix-msg");
AutoDelete const _delete{msgPath};
writeFile(msgPath, *commitMsg);
runProgram("git", true,
{ "-C", *root, "--git-dir", gitDir, "commit", std::string(path.rel()), "-F", msgPath }, true);
}
}
}
std::pair<bool, std::string> getActualUrl(const Input & input) const
{
// file:// URIs are normally not cloned (but otherwise treated the
// same as remote URIs, i.e. we don't use the working tree or
// HEAD). Exception: If _NIX_FORCE_HTTP is set, or the repo is a bare git
// repo, treat as a remote URI to force a clone.
static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing
auto url = parseURL(getStrAttr(input.attrs, "url"));
bool isBareRepository = url.scheme == "file" && !pathExists(url.path + "/.git");
bool isLocal = url.scheme == "file" && !forceHttp && !isBareRepository;
return {isLocal, isLocal ? url.path : url.base};
}
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
{
Input input(_input);
auto gitDir = ".git";
std::string name = input.getName();
bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false);
bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
bool allRefs = maybeGetBoolAttr(input.attrs, "allRefs").value_or(false);
std::string cacheType = "git";
if (shallow) cacheType += "-shallow";
if (submodules) cacheType += "-submodules";
if (allRefs) cacheType += "-all-refs";
2020-03-17 21:32:26 +00:00
auto checkHashType = [&](const std::optional<Hash> & hash)
{
if (hash.has_value() && !(hash->type == htSHA1 || hash->type == htSHA256))
throw Error("Hash '%s' is not supported by Git. Supported types are sha1 and sha256.", hash->to_string(Base16, true));
};
auto getLockedAttrs = [&]()
2020-03-17 21:32:26 +00:00
{
checkHashType(input.getRev());
2020-03-17 21:32:26 +00:00
return Attrs({
{"type", cacheType},
{"name", name},
{"rev", input.getRev()->gitRev()},
2020-03-17 21:32:26 +00:00
});
};
auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath)
-> std::pair<StorePath, Input>
2020-03-17 21:32:26 +00:00
{
assert(input.getRev());
assert(!_input.getRev() || _input.getRev() == input.getRev());
if (!shallow)
input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount"));
input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified"));
return {std::move(storePath), input};
2020-03-17 21:32:26 +00:00
};
if (input.getRev()) {
if (auto res = getCache()->lookup(store, getLockedAttrs()))
2020-03-17 21:32:26 +00:00
return makeResult(res->first, std::move(res->second));
}
auto [isLocal, actualUrl_] = getActualUrl(input);
auto actualUrl = actualUrl_; // work around clang bug
/* If this is a local directory and no ref or revision is given,
allow fetching directly from a dirty workdir. */
if (!input.getRef() && !input.getRev() && isLocal) {
auto workdirInfo = getWorkdirInfo(input, actualUrl);
if (!workdirInfo.clean) {
return fetchFromWorkdir(store, input, actualUrl, workdirInfo);
}
}
Attrs unlockedAttrs({
2020-03-17 21:32:26 +00:00
{"type", cacheType},
{"name", name},
{"url", actualUrl},
});
Path repoDir;
if (isLocal) {
if (!input.getRef()) {
auto head = readHead(actualUrl);
if (!head) {
warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl);
head = "master";
}
input.attrs.insert_or_assign("ref", *head);
unlockedAttrs.insert_or_assign("ref", *head);
}
if (!input.getRev())
input.attrs.insert_or_assign("rev",
Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev());
repoDir = actualUrl;
} else {
const bool useHeadRef = !input.getRef();
if (useHeadRef) {
auto head = readHeadCached(actualUrl);
if (!head) {
warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl);
head = "master";
}
input.attrs.insert_or_assign("ref", *head);
unlockedAttrs.insert_or_assign("ref", *head);
} else {
if (!input.getRev()) {
unlockedAttrs.insert_or_assign("ref", input.getRef().value());
}
}
if (auto res = getCache()->lookup(store, unlockedAttrs)) {
auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1);
if (!input.getRev() || input.getRev() == rev2) {
input.attrs.insert_or_assign("rev", rev2.gitRev());
2020-03-17 21:32:26 +00:00
return makeResult(res->first, std::move(res->second));
}
}
Path cacheDir = getCachePath(actualUrl);
repoDir = cacheDir;
gitDir = ".";
2021-08-02 11:39:48 +00:00
createDirs(dirOf(cacheDir));
PathLocks cacheDirLock({cacheDir + ".lock"});
2021-08-02 11:39:48 +00:00
if (!pathExists(cacheDir)) {
runProgram("git", true, { "-c", "init.defaultBranch=" + gitInitialBranch, "init", "--bare", repoDir });
}
Path localRefFile =
input.getRef()->compare(0, 5, "refs/") == 0
? cacheDir + "/" + *input.getRef()
: cacheDir + "/refs/heads/" + *input.getRef();
bool doFetch;
time_t now = time(0);
/* If a rev was specified, we need to fetch if it's not in the
repo. */
if (input.getRev()) {
try {
runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() });
doFetch = false;
} catch (ExecError & e) {
if (WIFEXITED(e.status)) {
doFetch = true;
} else {
throw;
}
}
} else {
if (allRefs) {
doFetch = true;
} else {
/* If the local ref is older than tarball-ttl seconds, do a
git fetch to update the local ref to the remote ref. */
struct stat st;
doFetch = stat(localRefFile.c_str(), &st) != 0 ||
!isCacheFileWithinTtl(now, st);
}
}
if (doFetch) {
Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", actualUrl));
// FIXME: git stderr messes up our progress indicator, so
// we're using --quiet for now. Should process its stderr.
try {
auto ref = input.getRef();
auto fetchRef = allRefs
? "refs/*"
: ref->compare(0, 5, "refs/") == 0
? *ref
: ref == "HEAD"
? *ref
: "refs/heads/" + *ref;
runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }, true);
} catch (Error & e) {
if (!pathExists(localRefFile)) throw;
warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl);
}
if (!touchCacheFile(localRefFile, now))
warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno));
if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef()))
warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl);
}
if (!input.getRev())
input.attrs.insert_or_assign("rev", Hash::parseAny(chomp(readFile(localRefFile)), htSHA1).gitRev());
// cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder
}
bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", "--is-shallow-repository" })) == "true";
if (isShallow && !shallow)
2022-06-26 12:00:00 +00:00
throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified.", actualUrl);
// FIXME: check whether rev is an ancestor of ref.
printTalkative("using revision %s of repo '%s'", input.getRev()->gitRev(), actualUrl);
2020-03-17 21:32:26 +00:00
/* Now that we know the ref, check again whether we have it in
the store. */
if (auto res = getCache()->lookup(store, getLockedAttrs()))
2020-03-17 21:32:26 +00:00
return makeResult(res->first, std::move(res->second));
Path tmpDir = createTempDir();
AutoDelete delTmpDir(tmpDir, true);
PathFilter filter = defaultPathFilter;
2021-09-14 06:19:41 +00:00
auto result = runProgram(RunOptions {
.program = "git",
.args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() },
.mergeStderrToStdout = true
});
if (WEXITSTATUS(result.first) == 128
&& result.second.find("bad file") != std::string::npos)
{
throw Error(
"Cannot find Git revision '%s' in ref '%s' of repository '%s'! "
"Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the "
ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD
"allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".",
input.getRev()->gitRev(),
*input.getRef(),
actualUrl
);
}
if (submodules) {
Path tmpGitDir = createTempDir();
AutoDelete delTmpGitDir(tmpGitDir, true);
runProgram("git", true, { "-c", "init.defaultBranch=" + gitInitialBranch, "init", tmpDir, "--separate-git-dir", tmpGitDir });
{
// TODO: repoDir might lack the ref (it only checks if rev
// exists, see FIXME above) so use a big hammer and fetch
// everything to ensure we get the rev.
2023-02-07 21:22:50 +00:00
Activity act(*logger, lvlTalkative, actUnknown, fmt("making temporary clone of '%s'", repoDir));
2023-05-18 10:26:23 +00:00
runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force",
"--update-head-ok", "--", repoDir, "refs/*:refs/*" }, true);
}
runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getRev()->gitRev() });
/* Ensure that we use the correct origin for fetching
submodules. This matters for submodules with relative
URLs. */
if (isLocal) {
writeFile(tmpGitDir + "/config", readFile(repoDir + "/" + gitDir + "/config"));
/* Restore the config.bare setting we may have just
copied erroneously from the user's repo. */
runProgram("git", true, { "-C", tmpDir, "config", "core.bare", "false" });
} else
runProgram("git", true, { "-C", tmpDir, "config", "remote.origin.url", actualUrl });
/* As an optimisation, copy the modules directory of the
source repo if it exists. */
auto modulesPath = repoDir + "/" + gitDir + "/modules";
if (pathExists(modulesPath)) {
Activity act(*logger, lvlTalkative, actUnknown, fmt("copying submodules of '%s'", actualUrl));
runProgram("cp", true, { "-R", "--", modulesPath, tmpGitDir + "/modules" });
}
{
Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching submodules of '%s'", actualUrl));
runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }, true);
}
filter = isNotDotGitDirectory;
} else {
auto proc = runProgram2({
.program = "git",
.args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() },
.captureStdout = true,
});
Finally const _wait([&] { proc.wait(); });
unpackTarfile(*proc.stdout(), tmpDir);
}
auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter);
auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() }));
2020-03-17 21:32:26 +00:00
Attrs infoAttrs({
{"rev", input.getRev()->gitRev()},
2020-03-17 21:32:26 +00:00
{"lastModified", lastModified},
});
2020-03-17 21:32:26 +00:00
if (!shallow)
infoAttrs.insert_or_assign("revCount",
std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() })));
2020-03-17 21:32:26 +00:00
if (!_input.getRev())
2020-03-17 21:32:26 +00:00
getCache()->add(
store,
unlockedAttrs,
2020-03-17 21:32:26 +00:00
infoAttrs,
storePath,
false);
getCache()->add(
store,
getLockedAttrs(),
2020-03-17 21:32:26 +00:00
infoAttrs,
storePath,
true);
return makeResult(infoAttrs, std::move(storePath));
}
};
static auto rGitInputScheme = OnStartup([] { registerInputScheme(std::make_unique<GitInputScheme>()); });
}