diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc new file mode 100644 index 000000000..060077098 --- /dev/null +++ b/src/libfetchers/git-utils.cc @@ -0,0 +1,25 @@ +#include "git-utils.hh" + +#include + +std::optional parseListReferenceHeadRef(std::string_view line) { + const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$"); + std::match_results match; + if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) { + return match[1]; + } else { + return std::nullopt; + } +} + +std::optional parseListReferenceForRev(std::string_view rev, std::string_view line) { + const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$"); + std::match_results match; + if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) { + return std::nullopt; + } + if (rev != match[2].str()) { + return std::nullopt; + } + return match[1]; +} diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh new file mode 100644 index 000000000..946a68a9e --- /dev/null +++ b/src/libfetchers/git-utils.hh @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include + +// Parses the HEAD ref as reported by `git ls-remote --symref` +// +// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if +// ls-remote returns the output below, "main" is returned based on the ref line. +// +// ref: refs/heads/main HEAD +// +// If the repository is in 'detached head' state (HEAD is pointing to a rev +// instead of a branch), parseListReferenceForRev("HEAD") may be used instead. +std::optional parseListReferenceHeadRef(std::string_view line); + +// Parses a reference line from `git ls-remote --symref`, e.g., +// parseListReferenceForRev("refs/heads/master", line) will return 6926... +// given the line below. +// +// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master +std::optional parseListReferenceForRev(std::string_view rev, std::string_view line); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index af40990e5..9d4348cf1 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -5,15 +5,20 @@ #include "store-api.hh" #include "url-parts.hh" #include "pathlocks.hh" +#include "util.hh" +#include "git-utils.hh" #include "fetch-settings.hh" +#include +#include #include #include using namespace std::string_literals; namespace nix::fetchers { +namespace { // Explicit initial branch of our bare repo to suppress warnings from new version of git. // The value itself does not matter, since we always fetch a specific revision or branch. @@ -21,7 +26,7 @@ namespace nix::fetchers { // old version of git, which will ignore unrecognized `-c` options. const std::string gitInitialBranch = "__nix_dummy_branch"; -static std::string getGitDir() +std::string getGitDir() { auto gitDir = getEnv("GIT_DIR"); if (!gitDir) { @@ -30,16 +35,222 @@ static std::string getGitDir() return *gitDir; } -static std::string readHead(const Path & path) -{ - return chomp(runProgram("git", true, { "-C", path, "--git-dir", ".git", "rev-parse", "--abbrev-ref", "HEAD" })); +bool isCacheFileWithinTtl(const time_t now, const struct stat& st) { + return st.st_mtime + settings.tarballTtl > now; } -static bool isNotDotGitDirectory(const Path & path) +bool touchCacheFile(const Path& path, const time_t& touch_time) +{ + struct timeval times[2]; + times[0].tv_sec = touch_time; + times[0].tv_usec = 0; + times[1].tv_sec = touch_time; + times[1].tv_usec = 0; + + return lutimes(path.c_str(), times) == 0; +} + +Path getCachePath(std::string key) +{ + return getCacheDir() + "/nix/gitv3/" + + hashString(htSHA256, key).to_string(Base32, false); +} + +// Returns the name of the HEAD branch. +// +// Returns the head branch name as reported by git ls-remote --symref, e.g., if +// ls-remote returns the output below, "main" is returned based on the ref line. +// +// ref: refs/heads/main HEAD +// ... +std::optional readHead(const Path & path) +{ + auto [exit_code, output] = runProgram(RunOptions { + .program = "git", + .args = {"ls-remote", "--symref", path}, + }); + if (exit_code != 0) { + return std::nullopt; + } + + std::string_view line = output; + line = line.substr(0, line.find("\n")); + if (const auto ref = parseListReferenceHeadRef(line); ref) { + debug("resolved HEAD ref '%s' for repo '%s'", *ref, path); + return *ref; + } + if (const auto rev = parseListReferenceForRev("HEAD", line); rev) { + debug("resolved HEAD rev '%s' for repo '%s'", *rev, path); + return *rev; + } + return std::nullopt; +} + +// Persist the HEAD ref from the remote repo in the local cached repo. +bool storeCachedHead(const std::string& actualUrl, const std::string& headRef) +{ + Path cacheDir = getCachePath(actualUrl); + try { + runProgram("git", true, { "-C", cacheDir, "symbolic-ref", "--", "HEAD", headRef }); + } catch (ExecError &e) { + if (!WIFEXITED(e.status)) throw; + return false; + } + /* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */ + return true; +} + +std::optional readHeadCached(const std::string& actualUrl) +{ + // Create a cache path to store the branch of the HEAD ref. Append something + // in front of the URL to prevent collision with the repository itself. + Path cacheDir = getCachePath(actualUrl); + Path headRefFile = cacheDir + "/HEAD"; + + time_t now = time(0); + struct stat st; + std::optional cachedRef; + if (stat(headRefFile.c_str(), &st) == 0) { + cachedRef = readHead(cacheDir); + if (cachedRef != std::nullopt && + *cachedRef != gitInitialBranch && + isCacheFileWithinTtl(now, st)) { + debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl); + return cachedRef; + } + } + + auto ref = readHead(actualUrl); + if (ref) { + return ref; + } + + if (cachedRef) { + // If the cached git ref is expired in fetch() below, and the 'git fetch' + // fails, it falls back to continuing with the most recent version. + // This function must behave the same way, so we return the expired + // cached ref here. + warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef); + return *cachedRef; + } + + return std::nullopt; +} + +bool isNotDotGitDirectory(const Path & path) { return baseNameOf(path) != ".git"; } +struct WorkdirInfo +{ + bool clean = false; + bool hasHead = false; +}; + +// Returns whether a git workdir is clean and has commits. +WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir) +{ + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + auto gitDir = getGitDir(); + + auto env = getEnv(); + // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong + // that way unknown errors can lead to a failure instead of continuing through the wrong code path + env["LC_ALL"] = "C"; + + /* Check whether HEAD points to something that looks like a commit, + since that is the refrence we want to use later on. */ + auto result = runProgram(RunOptions { + .program = "git", + .args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, + .environment = env, + .mergeStderrToStdout = true + }); + auto exitCode = WEXITSTATUS(result.first); + auto errorMessage = result.second; + + if (errorMessage.find("fatal: not a git repository") != std::string::npos) { + throw Error("'%s' is not a Git repository", workdir); + } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { + // indicates that the repo does not have any commits + // we want to proceed and will consider it dirty later + } else if (exitCode != 0) { + // any other errors should lead to a failure + throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage); + } + + bool clean = false; + bool hasHead = exitCode == 0; + + try { + if (hasHead) { + // Using git diff is preferrable over lower-level operations here, + // because its conceptually simpler and we only need the exit code anyways. + auto gitDiffOpts = Strings({ "-C", workdir, "diff", "HEAD", "--quiet"}); + if (!submodules) { + // Changes in submodules should only make the tree dirty + // when those submodules will be copied as well. + gitDiffOpts.emplace_back("--ignore-submodules"); + } + gitDiffOpts.emplace_back("--"); + runProgram("git", true, gitDiffOpts); + + clean = true; + } + } catch (ExecError & e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; + } + + return WorkdirInfo { .clean = clean, .hasHead = hasHead }; +} + +std::pair fetchFromWorkdir(ref store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo) +{ + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + + if (!fetchSettings.allowDirty) + throw Error("Git tree '%s' is dirty", workdir); + + if (fetchSettings.warnDirty) + warn("Git tree '%s' is dirty", workdir); + + auto gitOpts = Strings({ "-C", workdir, "ls-files", "-z" }); + if (submodules) + gitOpts.emplace_back("--recurse-submodules"); + + auto files = tokenizeString>( + runProgram("git", true, gitOpts), "\0"s); + + Path actualPath(absPath(workdir)); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualPath)); + std::string file(p, actualPath.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); + + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + input.attrs.insert_or_assign( + "lastModified", + workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); + + return {std::move(storePath), input}; +} +} // end namespace + struct GitInputScheme : InputScheme { std::optional inputFromURL(const ParsedURL & url) override @@ -234,106 +445,16 @@ struct GitInputScheme : InputScheme auto [isLocal, actualUrl_] = getActualUrl(input); auto actualUrl = actualUrl_; // work around clang bug - // If this is a local directory and no ref or revision is - // given, then allow the use of an unclean working tree. + /* If this is a local directory and no ref or revision is given, + allow fetching directly from a dirty workdir. */ if (!input.getRef() && !input.getRev() && isLocal) { - bool clean = false; - - auto env = getEnv(); - // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong - // that way unknown errors can lead to a failure instead of continuing through the wrong code path - env["LC_ALL"] = "C"; - - /* Check whether HEAD points to something that looks like a commit, - since that is the refrence we want to use later on. */ - auto result = runProgram(RunOptions { - .program = "git", - .args = { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, - .environment = env, - .mergeStderrToStdout = true - }); - auto exitCode = WEXITSTATUS(result.first); - auto errorMessage = result.second; - - if (errorMessage.find("fatal: not a git repository") != std::string::npos) { - throw Error("'%s' is not a Git repository", actualUrl); - } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { - // indicates that the repo does not have any commits - // we want to proceed and will consider it dirty later - } else if (exitCode != 0) { - // any other errors should lead to a failure - throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", actualUrl, exitCode, errorMessage); - } - - bool hasHead = exitCode == 0; - try { - if (hasHead) { - // Using git diff is preferrable over lower-level operations here, - // because its conceptually simpler and we only need the exit code anyways. - auto gitDiffOpts = Strings({ "-C", actualUrl, "--git-dir", gitDir, "diff", "HEAD", "--quiet"}); - if (!submodules) { - // Changes in submodules should only make the tree dirty - // when those submodules will be copied as well. - gitDiffOpts.emplace_back("--ignore-submodules"); - } - gitDiffOpts.emplace_back("--"); - runProgram("git", true, gitDiffOpts); - - clean = true; - } - } catch (ExecError & e) { - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - } - - if (!clean) { - - /* This is an unclean working tree. So copy all tracked files. */ - - if (!fetchSettings.allowDirty) - throw Error("Git tree '%s' is dirty", actualUrl); - - if (fetchSettings.warnDirty) - warn("Git tree '%s' is dirty", actualUrl); - - auto gitOpts = Strings({ "-C", actualUrl, "--git-dir", gitDir, "ls-files", "-z" }); - if (submodules) - gitOpts.emplace_back("--recurse-submodules"); - - auto files = tokenizeString>( - runProgram("git", true, gitOpts), "\0"s); - - Path actualPath(absPath(actualUrl)); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, actualPath)); - std::string file(p, actualPath.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); - - // FIXME: maybe we should use the timestamp of the last - // modified dirty file? - input.attrs.insert_or_assign( - "lastModified", - hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); - - return {std::move(storePath), input}; + auto workdirInfo = getWorkdirInfo(input, actualUrl); + if (!workdirInfo.clean) { + return fetchFromWorkdir(store, input, actualUrl, workdirInfo); } } - if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master"); - - Attrs unlockedAttrs({ + const Attrs unlockedAttrs({ {"type", cacheType}, {"name", name}, {"url", actualUrl}, @@ -343,14 +464,30 @@ struct GitInputScheme : InputScheme Path repoDir; if (isLocal) { + if (!input.getRef()) { + auto head = readHead(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + } if (!input.getRev()) input.attrs.insert_or_assign("rev", Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev()); repoDir = actualUrl; - } else { + const bool useHeadRef = !input.getRef(); + if (useHeadRef) { + auto head = readHeadCached(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + } if (auto res = getCache()->lookup(store, unlockedAttrs)) { auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1); @@ -360,7 +497,7 @@ struct GitInputScheme : InputScheme } } - Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); + Path cacheDir = getCachePath(actualUrl); repoDir = cacheDir; gitDir = "."; @@ -400,7 +537,7 @@ struct GitInputScheme : InputScheme git fetch to update the local ref to the remote ref. */ struct stat st; doFetch = stat(localRefFile.c_str(), &st) != 0 || - (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; + !isCacheFileWithinTtl(now, st); } } @@ -424,13 +561,10 @@ struct GitInputScheme : InputScheme warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); } - struct timeval times[2]; - times[0].tv_sec = now; - times[0].tv_usec = 0; - times[1].tv_sec = now; - times[1].tv_usec = 0; - - utimes(localRefFile.c_str(), times); + if (!touchCacheFile(localRefFile, now)) + warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno)); + if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef())) + warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl); } if (!input.getRev()) diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 58b6e7c04..1bdf2759f 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -4,7 +4,7 @@ #include "store-api.hh" #include "types.hh" #include "url-parts.hh" - +#include "git-utils.hh" #include "fetchers.hh" #include "fetch-settings.hh" @@ -383,35 +383,29 @@ struct SourceHutInputScheme : GitArchiveInputScheme std::string line; getline(is, line); - auto ref_index = line.find("ref: "); - if (ref_index == std::string::npos) { + auto r = parseListReferenceHeadRef(line); + if (!r) { throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref); } - - ref_uri = line.substr(ref_index+5, line.length()-1); - } else + ref_uri = *r; + } else { ref_uri = fmt("refs/(heads|tags)/%s", ref); + } auto file = store->toRealPath( downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath); std::ifstream is(file); std::string line; - std::string id; - while(getline(is, line)) { - // Append $ to avoid partial name matches - std::regex pattern(fmt("%s$", ref_uri)); - - if (std::regex_search(line, pattern)) { - id = line.substr(0, line.find('\t')); - break; - } + std::optional id; + while(!id && getline(is, line)) { + id = parseListReferenceForRev(ref_uri, line); } - if(id.empty()) + if(!id) throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref); - auto rev = Hash::parseAny(id, htSHA1); + auto rev = Hash::parseAny(*id, htSHA1); debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev()); return rev; } diff --git a/src/nix/flake.md b/src/nix/flake.md index 7d179a6c4..c8251eb74 100644 --- a/src/nix/flake.md +++ b/src/nix/flake.md @@ -153,7 +153,7 @@ Currently the `type` attribute can be one of the following: git(+http|+https|+ssh|+git|+file|):(//)?(\?)? ``` - The `ref` attribute defaults to `master`. + The `ref` attribute defaults to resolving the `HEAD` reference. The `rev` attribute must denote a commit that exists in the branch or tag specified by the `ref` attribute, since Nix doesn't do a full @@ -161,6 +161,11 @@ Currently the `type` attribute can be one of the following: doesn't allow fetching a `rev` without a known `ref`). The default is the commit currently pointed to by `ref`. + When `git+file` is used without specifying `ref` or `rev`, files are + fetched directly from the local `path` as long as they have been added + to the Git repository. If there are uncommitted changes, the reference + is treated as dirty and a warning is printed. + For example, the following are valid Git flake references: * `git+https://example.org/my/repo` diff --git a/tests/fetchGit.sh b/tests/fetchGit.sh index 9179e2071..166bccfc7 100644 --- a/tests/fetchGit.sh +++ b/tests/fetchGit.sh @@ -161,6 +161,14 @@ path4=$(nix eval --impure --raw --expr "(builtins.fetchGit $repo).outPath") [[ $(cat $path4/hello) = dev ]] [[ $path3 = $path4 ]] +# Using remote path with branch other than 'master' should fetch the HEAD revision. +# (--tarball-ttl 0 to prevent using the cached repo above) +export _NIX_FORCE_HTTP=1 +path4=$(nix eval --tarball-ttl 0 --impure --raw --expr "(builtins.fetchGit $repo).outPath") +[[ $(cat $path4/hello) = dev ]] +[[ $path3 = $path4 ]] +unset _NIX_FORCE_HTTP + # Confirm same as 'dev' branch path5=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; ref = \"dev\"; }).outPath") [[ $path3 = $path5 ]] diff --git a/tests/flakes.sh b/tests/flakes.sh index 46e6a7982..24601784f 100644 --- a/tests/flakes.sh +++ b/tests/flakes.sh @@ -31,7 +31,14 @@ flakeFollowsE=$TEST_ROOT/follows/flakeA/flakeE for repo in $flake1Dir $flake2Dir $flake3Dir $flake7Dir $templatesDir $nonFlakeDir $flakeA $flakeB $flakeFollowsA; do rm -rf $repo $repo.tmp mkdir -p $repo - git -C $repo init + + # Give one repo a non-master initial branch. + extraArgs= + if [[ $repo == $flake2Dir ]]; then + extraArgs="--initial-branch=main" + fi + + git -C $repo init $extraArgs git -C $repo config user.email "foobar@example.com" git -C $repo config user.name "Foobar" done