From 0bfa0cdea1b4eb09405e35b338887c91a041d28c Mon Sep 17 00:00:00 2001 From: Martin Schwaighofer Date: Mon, 13 Dec 2021 21:31:15 +0100 Subject: [PATCH 1/4] git fetcher: improve check for valid repository The .git/refs/heads directory might be empty for a valid usable git repository. This often happens in CI environments, which might only fetch commits, not branches. Therefore instead we let git itself check if HEAD points to something that looks like a commit. fixes #5302 --- src/libfetchers/git.cc | 28 ++++++++++++++-------------- tests/fetchGit.sh | 8 ++++++++ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index c3f0f8c8f..7479318e3 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -220,21 +220,21 @@ struct GitInputScheme : InputScheme if (!input.getRef() && !input.getRev() && isLocal) { bool clean = false; - /* Check whether this repo has any commits. There are - probably better ways to do this. */ - auto gitDir = actualUrl + "/.git"; - auto commonGitDir = chomp(runProgram( - "git", - true, - { "-C", actualUrl, "rev-parse", "--git-common-dir" } - )); - if (commonGitDir != ".git") - gitDir = commonGitDir; - - bool haveCommits = !readDirectory(gitDir + "/refs/heads").empty(); + /* Check whether HEAD points to something that looks like a commit, + since that is the refrence we want to use later on. */ + bool hasHead = false; + try { + runProgram("git", true, { "-C", actualUrl, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }); + hasHead = true; + } catch (ExecError & e) { + // git exits with status 128 here if it does not detect a repository. + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 128) { + throw Error("Git tree '%s' is broken.\n'git rev-parse --verify HEAD' failed with exit code %d.", actualUrl, WEXITSTATUS(e.status)); + } + } try { - if (haveCommits) { + if (hasHead) { runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" }); clean = true; } @@ -280,7 +280,7 @@ struct GitInputScheme : InputScheme // modified dirty file? input.attrs.insert_or_assign( "lastModified", - haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); + hasHead ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); return {std::move(storePath), input}; } diff --git a/tests/fetchGit.sh b/tests/fetchGit.sh index 89294d8d2..dd0d98956 100644 --- a/tests/fetchGit.sh +++ b/tests/fetchGit.sh @@ -170,6 +170,14 @@ NIX=$(command -v nix) path5=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; ref = \"dev\"; }).outPath") [[ $path3 = $path5 ]] +# Fetching from a repo with only a specific revision and no branches should +# not fall back to copying files and record correct revision information. See: #5302 +mkdir $TEST_ROOT/minimal +git -C $TEST_ROOT/minimal init +git -C $TEST_ROOT/minimal fetch $repo $rev2 +git -C $TEST_ROOT/minimal checkout $rev2 +[[ $(nix eval --impure --raw --expr "(builtins.fetchGit { url = $TEST_ROOT/minimal; }).rev") = $rev2 ]] + # Fetching a shallow repo shouldn't work by default, because we can't # return a revCount. git clone --depth 1 file://$repo $TEST_ROOT/shallow From c7e527b82b3dafed5f0da2ec0e14a47cf8e65def Mon Sep 17 00:00:00 2001 From: Martin Schwaighofer Date: Mon, 13 Dec 2021 21:31:20 +0100 Subject: [PATCH 2/4] git fetcher: invoke diff instead of diff-index diff-index operates on the view that git has of the working tree, which might be outdated. The higher-level diff command does this automatically. This change also adds handling for submodules. fixes #4140 Alternative fixes would be invoking update-index before diff-index or matching more closely what require_clean_work_tree from git-sh-setup.sh does, but both those options make it more difficult to reason about correctness. --- src/libfetchers/git.cc | 12 +++++++++++- tests/fetchGit.sh | 7 ++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 7479318e3..d241eb67a 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -235,7 +235,17 @@ struct GitInputScheme : InputScheme try { if (hasHead) { - runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" }); + // Using git diff is preferrable over lower-level operations here, + // because its conceptually simpler and we only need the exit code anyways. + auto gitDiffOpts = Strings({ "-C", actualUrl, "diff", "HEAD", "--quiet"}); + if (!submodules) { + // Changes in submodules should only make the tree dirty + // when those submodules will be copied as well. + gitDiffOpts.emplace_back("--ignore-submodules"); + } + gitDiffOpts.emplace_back("--"); + runProgram("git", true, gitDiffOpts); + clean = true; } } catch (ExecError & e) { diff --git a/tests/fetchGit.sh b/tests/fetchGit.sh index dd0d98956..628d96924 100644 --- a/tests/fetchGit.sh +++ b/tests/fetchGit.sh @@ -11,7 +11,7 @@ repo=$TEST_ROOT/git export _NIX_FORCE_HTTP=1 -rm -rf $repo ${repo}-tmp $TEST_HOME/.cache/nix $TEST_ROOT/worktree $TEST_ROOT/shallow +rm -rf $repo ${repo}-tmp $TEST_HOME/.cache/nix $TEST_ROOT/worktree $TEST_ROOT/shallow $TEST_ROOT/minimal git init $repo git -C $repo config user.email "foobar@example.com" @@ -147,8 +147,13 @@ path3=$(nix eval --impure --raw --expr "(builtins.fetchGit $repo).outPath") # (check dirty-tree handling was used) [[ $(nix eval --impure --raw --expr "(builtins.fetchGit $repo).rev") = 0000000000000000000000000000000000000000 ]] [[ $(nix eval --impure --raw --expr "(builtins.fetchGit $repo).shortRev") = 0000000 ]] +# Making a dirty tree clean again and fetching it should +# record correct revision information. See: #4140 +echo world > $repo/hello +[[ $(nix eval --impure --raw --expr "(builtins.fetchGit $repo).rev") = $rev2 ]] # Committing shouldn't change store path, or switch to using 'master' +echo dev > $repo/hello git -C $repo commit -m 'Bla5' -a path4=$(nix eval --impure --raw --expr "(builtins.fetchGit $repo).outPath") [[ $(cat $path4/hello) = dev ]] From 9504445cab095fe3869c5a68342fb2cf23ac0f28 Mon Sep 17 00:00:00 2001 From: Martin Schwaighofer Date: Sat, 1 Jan 2022 21:26:41 +0100 Subject: [PATCH 3/4] git fetcher: distinguish errors more precisely --- src/libfetchers/git.cc | 26 +++++++++++++++++--------- tests/fetchGit.sh | 8 ++++++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index d241eb67a..ad877eacc 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -222,17 +222,25 @@ struct GitInputScheme : InputScheme /* Check whether HEAD points to something that looks like a commit, since that is the refrence we want to use later on. */ - bool hasHead = false; - try { - runProgram("git", true, { "-C", actualUrl, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }); - hasHead = true; - } catch (ExecError & e) { - // git exits with status 128 here if it does not detect a repository. - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 128) { - throw Error("Git tree '%s' is broken.\n'git rev-parse --verify HEAD' failed with exit code %d.", actualUrl, WEXITSTATUS(e.status)); - } + auto result = runProgram(RunOptions { + .program = "git", + .args = { "-C", actualUrl, "--git-dir=.git", "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, + .mergeStderrToStdout = true + }); + auto exitCode = WEXITSTATUS(result.first); + auto errorMessage = result.second; + + if (errorMessage.find("fatal: not a git repository") != std::string::npos) { + throw Error("'%s' is not a git repository.", actualUrl); + } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { + // indicates that the repo does not have any commits + // we want to proceed and will consider it dirty later + } else if (exitCode != 0) { + // any other errors should lead to a failure + throw Error("Getting the HEAD of the git tree '%s' failed with exit code %d:\n%s", actualUrl, exitCode, errorMessage); } + bool hasHead = exitCode == 0; try { if (hasHead) { // Using git diff is preferrable over lower-level operations here, diff --git a/tests/fetchGit.sh b/tests/fetchGit.sh index 628d96924..ac23be5c0 100644 --- a/tests/fetchGit.sh +++ b/tests/fetchGit.sh @@ -206,3 +206,11 @@ rev4_nix=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = \"file://$ # The name argument should be handled path9=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = \"file://$repo\"; ref = \"HEAD\"; name = \"foo\"; }).outPath") [[ $path9 =~ -foo$ ]] + +# should fail if there is no repo +rm -rf $repo/.git +(! nix eval --impure --raw --expr "(builtins.fetchGit \"file://$repo\").outPath") + +# should succeed for a repo without commits +git init $repo +path10=$(nix eval --impure --raw --expr "(builtins.fetchGit \"file://$repo\").outPath") From 53523c0ab834416e38a15cf7be6f71d8f68d1c99 Mon Sep 17 00:00:00 2001 From: Martin Schwaighofer Date: Mon, 7 Feb 2022 20:36:39 +0100 Subject: [PATCH 4/4] git fetcher: set locale for rev-parse --- src/libfetchers/git.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index ad877eacc..6571a9d02 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -220,11 +220,17 @@ struct GitInputScheme : InputScheme if (!input.getRef() && !input.getRev() && isLocal) { bool clean = false; + auto env = getEnv(); + // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong + // that way unknown errors can lead to a failure instead of continuing through the wrong code path + env["LC_ALL"] = "C"; + /* Check whether HEAD points to something that looks like a commit, since that is the refrence we want to use later on. */ auto result = runProgram(RunOptions { .program = "git", .args = { "-C", actualUrl, "--git-dir=.git", "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, + .environment = env, .mergeStderrToStdout = true }); auto exitCode = WEXITSTATUS(result.first);