From 401e60f2893d689772f716cbd800ee2ee0309362 Mon Sep 17 00:00:00 2001 From: Kjetil Orbekk Date: Tue, 2 Mar 2021 20:13:20 -0500 Subject: [PATCH 1/6] Resolve reference for remote repository Resolves the HEAD reference from the remote repository instead of assuming "master". --- src/libfetchers/git.cc | 104 ++++++++++++++++++++++++++++++++++++++--- tests/flakes.sh | 9 +++- 2 files changed, 106 insertions(+), 7 deletions(-) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index af40990e5..6b83638f6 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -5,9 +5,11 @@ #include "store-api.hh" #include "url-parts.hh" #include "pathlocks.hh" +#include "util.hh" #include "fetch-settings.hh" +#include #include #include @@ -19,7 +21,7 @@ namespace nix::fetchers { // The value itself does not matter, since we always fetch a specific revision or branch. // It is set with `-c init.defaultBranch=` instead of `--initial-branch=` to stay compatible with // old version of git, which will ignore unrecognized `-c` options. -const std::string gitInitialBranch = "__nix_dummy_branch"; +static const std::string gitInitialBranch = "__nix_dummy_branch"; static std::string getGitDir() { @@ -30,9 +32,92 @@ static std::string getGitDir() return *gitDir; } -static std::string readHead(const Path & path) +static bool isCacheFileWithinTtl(const time_t now, const struct stat& st) { + return st.st_mtime + settings.tarballTtl > now; +} + +static Path getCachePath(std::string key) { + return getCacheDir() + "/nix/gitv3/" + + hashString(htSHA256, key).to_string(Base32, false); +} + +// Returns the name of the HEAD branch. +// +// Returns the head branch name as reported by git ls-remote --symref, e.g., if +// ls-remote returns the output below, "main" is returned based on the ref line. +// +// ref: refs/heads/main HEAD +// ... +static std::optional readHead(const Path & path) { - return chomp(runProgram("git", true, { "-C", path, "--git-dir", ".git", "rev-parse", "--abbrev-ref", "HEAD" })); + auto [exit_code, output] = runProgram(RunOptions { + .program = "git", + .args = {"ls-remote", "--symref", path}, + }); + if (exit_code != 0) { + return std::nullopt; + } + + // Matches the common case when HEAD points to a branch, e.g.: + // "ref: refs/heads/main HEAD". + const static std::regex head_ref_regex("^ref:\\s*([^\\s]+)\\s*HEAD$"); + // Matches when HEAD points directly at a commit, e.g.: + // "71abcd... HEAD". + const static std::regex head_rev_regex("^([^\\s]+)\\s*HEAD$"); + + for (const auto & line : tokenizeString>(output, "\n")) { + std::smatch match; + if (std::regex_match(line, match, head_ref_regex)) { + debug("resolved HEAD ref '%s' for repo '%s'", match[1], path); + return match[1]; + } else if (std::regex_match(line, match, head_rev_regex)) { + debug("resolved HEAD ref '%s' for repo '%s'", match[1], path); + return match[1]; + } + } + + return std::nullopt; +} + +static std::optional readHeadCached(std::string actualUrl) +{ + // Create a cache path to store the branch of the HEAD ref. Append something + // in front of the URL to prevent collision with the repository itself. + Path cachePath = getCachePath("|" + actualUrl); + time_t now = time(0); + struct stat st; + std::optional cachedRef; + if (stat(cachePath.c_str(), &st) == 0 && + // The file may be empty, because writeFile() is not atomic. + st.st_size > 0) { + + // The cached ref is persisted unconditionally (see below). + cachedRef = readFile(cachePath); + if (isCacheFileWithinTtl(now, st)) { + debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl); + return cachedRef; + } + } + + auto ref = readHead(actualUrl); + + if (ref) { + debug("storing cached HEAD ref '%s' for repo '%s' at '%s'", *ref, actualUrl, cachePath); + createDirs(dirOf(cachePath)); + writeFile(cachePath, *ref); + return ref; + } + + if (cachedRef) { + // If the cached git ref is expired in fetch() below, and the 'git fetch' + // fails, it falls back to continuing with the most recent version. + // This function must behave the same way, so we return the expired + // cached ref here. + warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef); + return *cachedRef; + } + + return std::nullopt; } static bool isNotDotGitDirectory(const Path & path) @@ -331,7 +416,14 @@ struct GitInputScheme : InputScheme } } - if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master"); + if (!input.getRef()) { + auto head = isLocal ? readHead(actualUrl) : readHeadCached(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + } Attrs unlockedAttrs({ {"type", cacheType}, @@ -360,7 +452,7 @@ struct GitInputScheme : InputScheme } } - Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); + Path cacheDir = getCachePath(actualUrl); repoDir = cacheDir; gitDir = "."; @@ -400,7 +492,7 @@ struct GitInputScheme : InputScheme git fetch to update the local ref to the remote ref. */ struct stat st; doFetch = stat(localRefFile.c_str(), &st) != 0 || - (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; + !isCacheFileWithinTtl(now, st); } } diff --git a/tests/flakes.sh b/tests/flakes.sh index 46e6a7982..24601784f 100644 --- a/tests/flakes.sh +++ b/tests/flakes.sh @@ -31,7 +31,14 @@ flakeFollowsE=$TEST_ROOT/follows/flakeA/flakeE for repo in $flake1Dir $flake2Dir $flake3Dir $flake7Dir $templatesDir $nonFlakeDir $flakeA $flakeB $flakeFollowsA; do rm -rf $repo $repo.tmp mkdir -p $repo - git -C $repo init + + # Give one repo a non-master initial branch. + extraArgs= + if [[ $repo == $flake2Dir ]]; then + extraArgs="--initial-branch=main" + fi + + git -C $repo init $extraArgs git -C $repo config user.email "foobar@example.com" git -C $repo config user.name "Foobar" done From 05a3fbac5a99e5738b951443afe17b680d5a7ead Mon Sep 17 00:00:00 2001 From: Kjetil Orbekk Date: Tue, 20 Apr 2021 07:52:50 -0400 Subject: [PATCH 2/6] Test fetchGit with non-'master' remote repo --- tests/fetchGit.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/fetchGit.sh b/tests/fetchGit.sh index 9179e2071..166bccfc7 100644 --- a/tests/fetchGit.sh +++ b/tests/fetchGit.sh @@ -161,6 +161,14 @@ path4=$(nix eval --impure --raw --expr "(builtins.fetchGit $repo).outPath") [[ $(cat $path4/hello) = dev ]] [[ $path3 = $path4 ]] +# Using remote path with branch other than 'master' should fetch the HEAD revision. +# (--tarball-ttl 0 to prevent using the cached repo above) +export _NIX_FORCE_HTTP=1 +path4=$(nix eval --tarball-ttl 0 --impure --raw --expr "(builtins.fetchGit $repo).outPath") +[[ $(cat $path4/hello) = dev ]] +[[ $path3 = $path4 ]] +unset _NIX_FORCE_HTTP + # Confirm same as 'dev' branch path5=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; ref = \"dev\"; }).outPath") [[ $path3 = $path5 ]] From de54e1cd3fce6eb456048b6a346a0be2b88660ae Mon Sep 17 00:00:00 2001 From: Kjetil Orbekk Date: Sat, 29 Jan 2022 14:12:01 -0500 Subject: [PATCH 3/6] Refactor fetching of dirty workdir Extract the handling of a local dirty workdir to a helper function. --- src/libfetchers/git.cc | 224 ++++++++++++++++++++++------------------- 1 file changed, 123 insertions(+), 101 deletions(-) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 6b83638f6..00a7b85d8 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -16,14 +16,15 @@ using namespace std::string_literals; namespace nix::fetchers { +namespace { // Explicit initial branch of our bare repo to suppress warnings from new version of git. // The value itself does not matter, since we always fetch a specific revision or branch. // It is set with `-c init.defaultBranch=` instead of `--initial-branch=` to stay compatible with // old version of git, which will ignore unrecognized `-c` options. -static const std::string gitInitialBranch = "__nix_dummy_branch"; +const std::string gitInitialBranch = "__nix_dummy_branch"; -static std::string getGitDir() +std::string getGitDir() { auto gitDir = getEnv("GIT_DIR"); if (!gitDir) { @@ -32,11 +33,11 @@ static std::string getGitDir() return *gitDir; } -static bool isCacheFileWithinTtl(const time_t now, const struct stat& st) { +bool isCacheFileWithinTtl(const time_t now, const struct stat& st) { return st.st_mtime + settings.tarballTtl > now; } -static Path getCachePath(std::string key) { +Path getCachePath(std::string key) { return getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, key).to_string(Base32, false); } @@ -48,7 +49,7 @@ static Path getCachePath(std::string key) { // // ref: refs/heads/main HEAD // ... -static std::optional readHead(const Path & path) +std::optional readHead(const Path & path) { auto [exit_code, output] = runProgram(RunOptions { .program = "git", @@ -79,7 +80,7 @@ static std::optional readHead(const Path & path) return std::nullopt; } -static std::optional readHeadCached(std::string actualUrl) +std::optional readHeadCached(std::string actualUrl) { // Create a cache path to store the branch of the HEAD ref. Append something // in front of the URL to prevent collision with the repository itself. @@ -120,11 +121,120 @@ static std::optional readHeadCached(std::string actualUrl) return std::nullopt; } -static bool isNotDotGitDirectory(const Path & path) +bool isNotDotGitDirectory(const Path & path) { return baseNameOf(path) != ".git"; } +struct WorkdirInfo +{ + bool clean = false; + bool hasHead = false; +}; + +// Returns whether a git workdir is clean and has commits. +WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir) +{ + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + auto gitDir = getGitDir(); + + auto env = getEnv(); + // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong + // that way unknown errors can lead to a failure instead of continuing through the wrong code path + env["LC_ALL"] = "C"; + + /* Check whether HEAD points to something that looks like a commit, + since that is the refrence we want to use later on. */ + auto result = runProgram(RunOptions { + .program = "git", + .args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, + .environment = env, + .mergeStderrToStdout = true + }); + auto exitCode = WEXITSTATUS(result.first); + auto errorMessage = result.second; + + if (errorMessage.find("fatal: not a git repository") != std::string::npos) { + throw Error("'%s' is not a Git repository", workdir); + } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { + // indicates that the repo does not have any commits + // we want to proceed and will consider it dirty later + } else if (exitCode != 0) { + // any other errors should lead to a failure + throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage); + } + + bool clean = false; + bool hasHead = exitCode == 0; + + try { + if (hasHead) { + // Using git diff is preferrable over lower-level operations here, + // because its conceptually simpler and we only need the exit code anyways. + auto gitDiffOpts = Strings({ "-C", workdir, "diff", "HEAD", "--quiet"}); + if (!submodules) { + // Changes in submodules should only make the tree dirty + // when those submodules will be copied as well. + gitDiffOpts.emplace_back("--ignore-submodules"); + } + gitDiffOpts.emplace_back("--"); + runProgram("git", true, gitDiffOpts); + + clean = true; + } + } catch (ExecError & e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; + } + + return WorkdirInfo { .clean = clean, .hasHead = hasHead }; +} + +std::pair fetchFromWorkdir(ref store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo) +{ + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + + if (!fetchSettings.allowDirty) + throw Error("Git tree '%s' is dirty", workdir); + + if (fetchSettings.warnDirty) + warn("Git tree '%s' is dirty", workdir); + + auto gitOpts = Strings({ "-C", workdir, "ls-files", "-z" }); + if (submodules) + gitOpts.emplace_back("--recurse-submodules"); + + auto files = tokenizeString>( + runProgram("git", true, gitOpts), "\0"s); + + Path actualPath(absPath(workdir)); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualPath)); + std::string file(p, actualPath.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); + + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + input.attrs.insert_or_assign( + "lastModified", + workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); + + return {std::move(storePath), input}; +} +} // end namespace + struct GitInputScheme : InputScheme { std::optional inputFromURL(const ParsedURL & url) override @@ -319,100 +429,12 @@ struct GitInputScheme : InputScheme auto [isLocal, actualUrl_] = getActualUrl(input); auto actualUrl = actualUrl_; // work around clang bug - // If this is a local directory and no ref or revision is - // given, then allow the use of an unclean working tree. + /* If this is a local directory and no ref or revision is given, + allow fetching directly from a dirty workdir. */ if (!input.getRef() && !input.getRev() && isLocal) { - bool clean = false; - - auto env = getEnv(); - // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong - // that way unknown errors can lead to a failure instead of continuing through the wrong code path - env["LC_ALL"] = "C"; - - /* Check whether HEAD points to something that looks like a commit, - since that is the refrence we want to use later on. */ - auto result = runProgram(RunOptions { - .program = "git", - .args = { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, - .environment = env, - .mergeStderrToStdout = true - }); - auto exitCode = WEXITSTATUS(result.first); - auto errorMessage = result.second; - - if (errorMessage.find("fatal: not a git repository") != std::string::npos) { - throw Error("'%s' is not a Git repository", actualUrl); - } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { - // indicates that the repo does not have any commits - // we want to proceed and will consider it dirty later - } else if (exitCode != 0) { - // any other errors should lead to a failure - throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", actualUrl, exitCode, errorMessage); - } - - bool hasHead = exitCode == 0; - try { - if (hasHead) { - // Using git diff is preferrable over lower-level operations here, - // because its conceptually simpler and we only need the exit code anyways. - auto gitDiffOpts = Strings({ "-C", actualUrl, "--git-dir", gitDir, "diff", "HEAD", "--quiet"}); - if (!submodules) { - // Changes in submodules should only make the tree dirty - // when those submodules will be copied as well. - gitDiffOpts.emplace_back("--ignore-submodules"); - } - gitDiffOpts.emplace_back("--"); - runProgram("git", true, gitDiffOpts); - - clean = true; - } - } catch (ExecError & e) { - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - } - - if (!clean) { - - /* This is an unclean working tree. So copy all tracked files. */ - - if (!fetchSettings.allowDirty) - throw Error("Git tree '%s' is dirty", actualUrl); - - if (fetchSettings.warnDirty) - warn("Git tree '%s' is dirty", actualUrl); - - auto gitOpts = Strings({ "-C", actualUrl, "--git-dir", gitDir, "ls-files", "-z" }); - if (submodules) - gitOpts.emplace_back("--recurse-submodules"); - - auto files = tokenizeString>( - runProgram("git", true, gitOpts), "\0"s); - - Path actualPath(absPath(actualUrl)); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, actualPath)); - std::string file(p, actualPath.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); - - // FIXME: maybe we should use the timestamp of the last - // modified dirty file? - input.attrs.insert_or_assign( - "lastModified", - hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); - - return {std::move(storePath), input}; + auto workdirInfo = getWorkdirInfo(input, actualUrl); + if (!workdirInfo.clean) { + return fetchFromWorkdir(store, input, actualUrl, workdirInfo); } } @@ -425,7 +447,7 @@ struct GitInputScheme : InputScheme input.attrs.insert_or_assign("ref", *head); } - Attrs unlockedAttrs({ + const Attrs unlockedAttrs({ {"type", cacheType}, {"name", name}, {"url", actualUrl}, From 1203e489263fe867d0a1ae3fd9270f40c8a1c24e Mon Sep 17 00:00:00 2001 From: Kjetil Orbekk Date: Sat, 29 Jan 2022 14:22:55 -0500 Subject: [PATCH 4/6] Store cached head in cached git repo The previous head caching implementation stored two paths in the local cache; one for the cached git repo and another textfile containing the resolved HEAD ref. This commit instead stores the resolved HEAD by setting the HEAD ref in the local cache appropriately. --- src/libfetchers/git.cc | 89 +++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 31 deletions(-) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 00a7b85d8..968cd642a 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -10,6 +10,7 @@ #include "fetch-settings.hh" #include +#include #include #include @@ -37,7 +38,19 @@ bool isCacheFileWithinTtl(const time_t now, const struct stat& st) { return st.st_mtime + settings.tarballTtl > now; } -Path getCachePath(std::string key) { +bool touchCacheFile(const Path& path, const time_t& touch_time) +{ + struct timeval times[2]; + times[0].tv_sec = touch_time; + times[0].tv_usec = 0; + times[1].tv_sec = touch_time; + times[1].tv_usec = 0; + + return lutimes(path.c_str(), times) == 0; +} + +Path getCachePath(std::string key) +{ return getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, key).to_string(Base32, false); } @@ -80,32 +93,42 @@ std::optional readHead(const Path & path) return std::nullopt; } -std::optional readHeadCached(std::string actualUrl) +// Persist the HEAD ref from the remote repo in the local cached repo. +bool storeCachedHead(const std::string& actualUrl, const std::string& headRef) +{ + Path cacheDir = getCachePath(actualUrl); + try { + runProgram("git", true, { "-C", cacheDir, "symbolic-ref", "--", "HEAD", headRef }); + } catch (ExecError &e) { + if (!WIFEXITED(e.status)) throw; + return false; + } + /* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */ + return true; +} + +std::optional readHeadCached(const std::string& actualUrl) { // Create a cache path to store the branch of the HEAD ref. Append something // in front of the URL to prevent collision with the repository itself. - Path cachePath = getCachePath("|" + actualUrl); + Path cacheDir = getCachePath(actualUrl); + Path headRefFile = cacheDir + "/HEAD"; + time_t now = time(0); struct stat st; std::optional cachedRef; - if (stat(cachePath.c_str(), &st) == 0 && - // The file may be empty, because writeFile() is not atomic. - st.st_size > 0) { - - // The cached ref is persisted unconditionally (see below). - cachedRef = readFile(cachePath); - if (isCacheFileWithinTtl(now, st)) { + if (stat(headRefFile.c_str(), &st) == 0) { + cachedRef = readHead(cacheDir); + if (cachedRef != std::nullopt && + *cachedRef != gitInitialBranch && + isCacheFileWithinTtl(now, st)) { debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl); return cachedRef; } } auto ref = readHead(actualUrl); - if (ref) { - debug("storing cached HEAD ref '%s' for repo '%s' at '%s'", *ref, actualUrl, cachePath); - createDirs(dirOf(cachePath)); - writeFile(cachePath, *ref); return ref; } @@ -438,15 +461,6 @@ struct GitInputScheme : InputScheme } } - if (!input.getRef()) { - auto head = isLocal ? readHead(actualUrl) : readHeadCached(actualUrl); - if (!head) { - warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); - head = "master"; - } - input.attrs.insert_or_assign("ref", *head); - } - const Attrs unlockedAttrs({ {"type", cacheType}, {"name", name}, @@ -457,14 +471,30 @@ struct GitInputScheme : InputScheme Path repoDir; if (isLocal) { + if (!input.getRef()) { + auto head = readHead(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + } if (!input.getRev()) input.attrs.insert_or_assign("rev", Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev()); repoDir = actualUrl; - } else { + const bool useHeadRef = !input.getRef(); + if (useHeadRef) { + auto head = readHeadCached(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + } if (auto res = getCache()->lookup(store, unlockedAttrs)) { auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1); @@ -538,13 +568,10 @@ struct GitInputScheme : InputScheme warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); } - struct timeval times[2]; - times[0].tv_sec = now; - times[0].tv_usec = 0; - times[1].tv_sec = now; - times[1].tv_usec = 0; - - utimes(localRefFile.c_str(), times); + if (!touchCacheFile(localRefFile, now)) + warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno)); + if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef())) + warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl); } if (!input.getRev()) From c21afd684cb5f59337b879684728884fd8275ce4 Mon Sep 17 00:00:00 2001 From: Kjetil Orbekk Date: Sun, 30 Jan 2022 11:30:57 -0500 Subject: [PATCH 5/6] Update `nix flake` documentation of `ref` handling Update the documentation about how `ref` is resolved if it is not specified. Add a note about special handling of local workdirs with `git+file`. --- src/nix/flake.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/nix/flake.md b/src/nix/flake.md index 7d179a6c4..c8251eb74 100644 --- a/src/nix/flake.md +++ b/src/nix/flake.md @@ -153,7 +153,7 @@ Currently the `type` attribute can be one of the following: git(+http|+https|+ssh|+git|+file|):(//)?(\?)? ``` - The `ref` attribute defaults to `master`. + The `ref` attribute defaults to resolving the `HEAD` reference. The `rev` attribute must denote a commit that exists in the branch or tag specified by the `ref` attribute, since Nix doesn't do a full @@ -161,6 +161,11 @@ Currently the `type` attribute can be one of the following: doesn't allow fetching a `rev` without a known `ref`). The default is the commit currently pointed to by `ref`. + When `git+file` is used without specifying `ref` or `rev`, files are + fetched directly from the local `path` as long as they have been added + to the Git repository. If there are uncommitted changes, the reference + is treated as dirty and a warning is printed. + For example, the following are valid Git flake references: * `git+https://example.org/my/repo` From 9bf296c970bf33b7ed53d7e2d8fbe44197482518 Mon Sep 17 00:00:00 2001 From: Kjetil Orbekk Date: Fri, 29 Apr 2022 18:30:00 -0400 Subject: [PATCH 6/6] Extract git reference parsing to a shared library These utility functions can be shared between the git and github fetchers. --- src/libfetchers/git-utils.cc | 25 +++++++++++++++++++++++++ src/libfetchers/git-utils.hh | 23 +++++++++++++++++++++++ src/libfetchers/git.cc | 29 +++++++++++------------------ src/libfetchers/github.cc | 28 +++++++++++----------------- 4 files changed, 70 insertions(+), 35 deletions(-) create mode 100644 src/libfetchers/git-utils.cc create mode 100644 src/libfetchers/git-utils.hh diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc new file mode 100644 index 000000000..060077098 --- /dev/null +++ b/src/libfetchers/git-utils.cc @@ -0,0 +1,25 @@ +#include "git-utils.hh" + +#include + +std::optional parseListReferenceHeadRef(std::string_view line) { + const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$"); + std::match_results match; + if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) { + return match[1]; + } else { + return std::nullopt; + } +} + +std::optional parseListReferenceForRev(std::string_view rev, std::string_view line) { + const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$"); + std::match_results match; + if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) { + return std::nullopt; + } + if (rev != match[2].str()) { + return std::nullopt; + } + return match[1]; +} diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh new file mode 100644 index 000000000..946a68a9e --- /dev/null +++ b/src/libfetchers/git-utils.hh @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include + +// Parses the HEAD ref as reported by `git ls-remote --symref` +// +// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if +// ls-remote returns the output below, "main" is returned based on the ref line. +// +// ref: refs/heads/main HEAD +// +// If the repository is in 'detached head' state (HEAD is pointing to a rev +// instead of a branch), parseListReferenceForRev("HEAD") may be used instead. +std::optional parseListReferenceHeadRef(std::string_view line); + +// Parses a reference line from `git ls-remote --symref`, e.g., +// parseListReferenceForRev("refs/heads/master", line) will return 6926... +// given the line below. +// +// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master +std::optional parseListReferenceForRev(std::string_view rev, std::string_view line); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 968cd642a..9d4348cf1 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -6,6 +6,7 @@ #include "url-parts.hh" #include "pathlocks.hh" #include "util.hh" +#include "git-utils.hh" #include "fetch-settings.hh" @@ -69,27 +70,19 @@ std::optional readHead(const Path & path) .args = {"ls-remote", "--symref", path}, }); if (exit_code != 0) { - return std::nullopt; + return std::nullopt; } - // Matches the common case when HEAD points to a branch, e.g.: - // "ref: refs/heads/main HEAD". - const static std::regex head_ref_regex("^ref:\\s*([^\\s]+)\\s*HEAD$"); - // Matches when HEAD points directly at a commit, e.g.: - // "71abcd... HEAD". - const static std::regex head_rev_regex("^([^\\s]+)\\s*HEAD$"); - - for (const auto & line : tokenizeString>(output, "\n")) { - std::smatch match; - if (std::regex_match(line, match, head_ref_regex)) { - debug("resolved HEAD ref '%s' for repo '%s'", match[1], path); - return match[1]; - } else if (std::regex_match(line, match, head_rev_regex)) { - debug("resolved HEAD ref '%s' for repo '%s'", match[1], path); - return match[1]; - } + std::string_view line = output; + line = line.substr(0, line.find("\n")); + if (const auto ref = parseListReferenceHeadRef(line); ref) { + debug("resolved HEAD ref '%s' for repo '%s'", *ref, path); + return *ref; + } + if (const auto rev = parseListReferenceForRev("HEAD", line); rev) { + debug("resolved HEAD rev '%s' for repo '%s'", *rev, path); + return *rev; } - return std::nullopt; } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 58b6e7c04..1bdf2759f 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -4,7 +4,7 @@ #include "store-api.hh" #include "types.hh" #include "url-parts.hh" - +#include "git-utils.hh" #include "fetchers.hh" #include "fetch-settings.hh" @@ -383,35 +383,29 @@ struct SourceHutInputScheme : GitArchiveInputScheme std::string line; getline(is, line); - auto ref_index = line.find("ref: "); - if (ref_index == std::string::npos) { + auto r = parseListReferenceHeadRef(line); + if (!r) { throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref); } - - ref_uri = line.substr(ref_index+5, line.length()-1); - } else + ref_uri = *r; + } else { ref_uri = fmt("refs/(heads|tags)/%s", ref); + } auto file = store->toRealPath( downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath); std::ifstream is(file); std::string line; - std::string id; - while(getline(is, line)) { - // Append $ to avoid partial name matches - std::regex pattern(fmt("%s$", ref_uri)); - - if (std::regex_search(line, pattern)) { - id = line.substr(0, line.find('\t')); - break; - } + std::optional id; + while(!id && getline(is, line)) { + id = parseListReferenceForRev(ref_uri, line); } - if(id.empty()) + if(!id) throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref); - auto rev = Hash::parseAny(id, htSHA1); + auto rev = Hash::parseAny(*id, htSHA1); debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev()); return rev; }