From 19d9955e8968b721daf04a21e69904cca136aae5 Mon Sep 17 00:00:00 2001 From: Nicolas Pierron Date: Sat, 14 Apr 2012 18:17:35 -0700 Subject: [PATCH] Optimize fetch-git. --- src/lib/Hydra/Helper/AddBuilds.pm | 117 ++++++++++++++++++------------ src/script/hydra-build | 2 +- src/script/nix-prefetch-git | 19 +++-- tests/Setup.pm | 10 ++- tests/evaluation-tests.pl | 18 ++++- tests/jobs/bzr-checkout-update.sh | 1 + tests/jobs/bzr-update.sh | 1 + tests/jobs/git-update.sh | 26 +++++++ tests/jobs/hg-update.sh | 1 + tests/jobs/svn-checkout-update.sh | 1 + tests/jobs/svn-update.sh | 1 + 11 files changed, 139 insertions(+), 58 deletions(-) create mode 100755 tests/jobs/bzr-checkout-update.sh create mode 100755 tests/jobs/bzr-update.sh create mode 100755 tests/jobs/git-update.sh create mode 100755 tests/jobs/hg-update.sh create mode 100755 tests/jobs/svn-checkout-update.sh create mode 100755 tests/jobs/svn-update.sh diff --git a/src/lib/Hydra/Helper/AddBuilds.pm b/src/lib/Hydra/Helper/AddBuilds.pm index c6c7703a..39e9cbe1 100644 --- a/src/lib/Hydra/Helper/AddBuilds.pm +++ b/src/lib/Hydra/Helper/AddBuilds.pm @@ -315,8 +315,8 @@ sub fetchInputSystemBuild { sub fetchInputGit { my ($db, $project, $jobset, $name, $value) = @_; - (my $uri, my $branch) = split ' ', $value; - $branch = defined $branch ? $branch : "master"; + (my $uri, my $branch, my $deepClone) = split ' ', $value; + $branch = defined $branch ? $branch : "master"; my $timestamp = time; my $sha256; @@ -325,42 +325,64 @@ sub fetchInputGit { mkpath(scmPath); my $clonePath = scmPath . "/" . sha256_hex($uri); - my $stdout; my $stderr; + my $stdout = ""; my $stderr = ""; my $res; if (! -d $clonePath) { - (my $res, $stdout, $stderr) = captureStdoutStderr(600, + # Clone everything and fetch the branch. + # TODO: Optimize the first clone by using "git init $clonePath" and "git remote add origin $uri". + ($res, $stdout, $stderr) = captureStdoutStderr(600, ("git", "clone", "--branch", $branch, $uri, $clonePath)); die "Error cloning git repo at `$uri':\n$stderr" unless $res; } - # git pull + check rev chdir $clonePath or die $!; # !!! urgh, shouldn't do a chdir - (my $res, $stdout, $stderr) = captureStdoutStderr(600, - ("git", "pull", "--all")); - die "Error pulling latest change git repo at `$uri':\n$stderr" unless $res; - (my $res1, $stdout, $stderr) = captureStdoutStderr(600, - ("git", "ls-remote", $clonePath, $branch)); - - die "Cannot get head revision of Git branch '$branch' at `$uri':\n$stderr" unless $res1 ; - - # Take the first commit ID returned by `ls-remote'. The - # assumption is that `ls-remote' returned both `refs/heads/BRANCH' - # and `refs/remotes/origin/BRANCH', and that both point at the - # same commit. - my ($first) = split /\n/, $stdout; - (my $revision, my $ref) = split ' ', $first; - die unless $revision =~ /^[0-9a-fA-F]+$/; - - if (-f ".topdeps") { - # This is a TopGit branch. Fetch all the topic branches so - # that builders can run "tg patch" and similar. - (my $res, $stdout, $stderr) = captureStdoutStderr(600, - ("tg", "remote", "--populate", "origin")); - - print STDERR "Warning: `tg remote --populate origin' failed:\n$stderr" unless $res; + if (defined $deepClone) { + # This fetch every branches from the remote repository and create a + # local branch for each heads of the remote repository. This is + # necessary to provide a working git-describe. + ($res, $stdout, $stderr) = captureStdoutStderr(600, + ("git", "pull", "--ff-only", "-fu", "--all", "origin")); + die "Error pulling latest change from git repo at `$uri':\n$stderr" unless $res; + } else { + # This command force the update of the local branch to be in the same as + # the remote branch for whatever the repository state is. This command mirror + # only one branch of the remote repository. + ($res, $stdout, $stderr) = captureStdoutStderr(600, + ("git", "fetch", "-fu", "origin", "+$branch:$branch")); + die "Error fetching latest change from git repo at `$uri':\n$stderr" unless $res; } - # Some simple caching: don't check a uri/branch more than once every hour, but prefer exact match on uri/branch/revision. + ($res, $stdout, $stderr) = captureStdoutStderr(600, + ("git", "rev-parse", "$branch")); + die "Error getting revision number of Git branch '$branch' at `$uri':\n$stderr" unless $res; + + my ($revision) = split /\n/, $stdout; + die unless $revision =~ /^[0-9a-fA-F]+$/; + die "Error getting a well-formated revision number of Git branch '$branch' at `$uri':\n$stdout" unless $res; + + my $ref = "refs/heads/$branch"; + + # If deepClone is defined, then we look at the content of the repository + # to determine if this is a top-git branch. + if (defined $deepClone) { + + # Checkout the branch to look at its content. + ($res, $stdout, $stderr) = captureStdoutStderr(600, + ("git", "checkout", "$branch")); + die "Error checking out Git branch '$branch' at `$uri':\n$stderr" unless $res; + + if (-f ".topdeps") { + # This is a TopGit branch. Fetch all the topic branches so + # that builders can run "tg patch" and similar. + ($res, $stdout, $stderr) = captureStdoutStderr(600, + ("tg", "remote", "--populate", "origin")); + + print STDERR "Warning: `tg remote --populate origin' failed:\n$stderr" unless $res; + } + } + + # Some simple caching: don't check a uri/branch/revision more than once. + # TODO: Fix case where the branch is reset to a previous commit. my $cachedInput ; ($cachedInput) = $db->resultset('CachedGitInputs')->search( {uri => $uri, branch => $branch, revision => $revision}, @@ -371,25 +393,28 @@ sub fetchInputGit { $sha256 = $cachedInput->sha256hash; $revision = $cachedInput->revision; } else { - # Then download this revision into the store. - print STDERR "checking out Git input from $uri\n"; + print STDERR "checking out Git branch $branch from $uri\n"; $ENV{"NIX_HASH_ALGO"} = "sha256"; $ENV{"PRINT_PATH"} = "1"; - - # Checked out code often wants to be able to run `git - # describe', e.g., code that uses Gnulib's `git-version-gen' - # script. Thus, we leave `.git' in there. Same for - # Subversion (e.g., libgcrypt's build system uses that.) - $ENV{"NIX_PREFETCH_GIT_LEAVE_DOT_GIT"} = "1"; - - # Ask for a "deep clone" to allow "git describe" and similar - # tools to work. See - # http://thread.gmane.org/gmane.linux.distributions.nixos/3569 - # for a discussion. - $ENV{"NIX_PREFETCH_GIT_DEEP_CLONE"} = "1"; + $ENV{"NIX_PREFETCH_GIT_LEAVE_DOT_GIT"} = "0"; + $ENV{"NIX_PREFETCH_GIT_DEEP_CLONE"} = ""; - (my $res, $stdout, $stderr) = captureStdoutStderr(600, + if (defined $deepClone) { + # Checked out code often wants to be able to run `git + # describe', e.g., code that uses Gnulib's `git-version-gen' + # script. Thus, we leave `.git' in there. Same for + # Subversion (e.g., libgcrypt's build system uses that.) + $ENV{"NIX_PREFETCH_GIT_LEAVE_DOT_GIT"} = "1"; + + # Ask for a "deep clone" to allow "git describe" and similar + # tools to work. See + # http://thread.gmane.org/gmane.linux.distributions.nixos/3569 + # for a discussion. + $ENV{"NIX_PREFETCH_GIT_DEEP_CLONE"} = "1"; + } + + ($res, $stdout, $stderr) = captureStdoutStderr(600, ("nix-prefetch-git", $clonePath, $revision)); die "Cannot check out Git repository branch '$branch' at `$uri':\n$stderr" unless $res; @@ -509,7 +534,7 @@ sub fetchInputHg { # init local hg clone - my $stdout; my $stderr; + my $stdout = ""; my $stderr = ""; mkpath(scmPath); my $clonePath = scmPath . "/" . sha256_hex($uri); @@ -681,7 +706,7 @@ sub captureStdoutStderr { if ($@) { die unless $@ eq "timeout\n"; # propagate unexpected errors - return (undef, undef, undef); + return (undef, "", "timeout\n"); } else { return ($res, $stdout, $stderr); } diff --git a/src/script/hydra-build b/src/script/hydra-build index aeb70574..e5db81b9 100755 --- a/src/script/hydra-build +++ b/src/script/hydra-build @@ -371,7 +371,7 @@ sub doBuild { if ($thisBuildFailed) { $buildStatus = 1; } elsif ($someBuildFailed) { $buildStatus = 2; } else { $buildStatus = 3; } - } + } # Only store the output of running Nix if we have a miscellaneous error. $errormsg = undef unless $buildStatus == 3; diff --git a/src/script/nix-prefetch-git b/src/script/nix-prefetch-git index 979aa531..e0458819 100755 --- a/src/script/nix-prefetch-git +++ b/src/script/nix-prefetch-git @@ -63,8 +63,8 @@ Options: --url url Any url understand by 'git clone'. --rev ref Any sha1 or references (such as refs/heads/master) --hash h Expected hash. - --deepClone Clone submodules recursively. - --no-deepClone Do not clone submodules. + --deepClone Clone history until a tag is found as parent. + --no-deepClone Clone the minimum history. --leave-dotGit Keep the .git directories. --builder Clone as fetchgit does, but url, rev, and out option are mandatory. " @@ -117,7 +117,14 @@ checkout_ref(){ # allow "git describe" and similar tools to work. See # http://thread.gmane.org/gmane.linux.distributions.nixos/3569 # for a discussion. - return 1 + + # To make git describe works, we need to fetch all tags. + if git fetch -t ${builder:+--progress} --depth 1 origin; then + return 1 + else + # There is no tag, don't try to recover git-describe mechanism. + deepClone=false + fi fi if test -z "$ref"; then @@ -209,7 +216,7 @@ clone_user_rev() { # Allow doing additional processing before .git removal eval "$NIX_PREFETCH_GIT_CHECKOUT_HOOK" if test -z "$leaveDotGit"; then - echo "removing \`.git'..." >&2 + test -n "$QUIET" || echo "removing \`.git'..." >&2 find $dir -name .git\* | xargs rm -rf fi } @@ -248,7 +255,7 @@ else # Compute the hash. hash=$(nix-hash --type $hashType $hashFormat $tmpFile) - if ! test -n "$QUIET"; then echo "hash is $hash" >&2; fi + test -n "$QUIET" || echo "hash is $hash" >&2; # Add the downloaded file to the Nix store. finalPath=$(nix-store --add-fixed --recursive "$hashType" $tmpFile) @@ -259,7 +266,7 @@ else fi fi - if ! test -n "$QUIET"; then echo "path is $finalPath" >&2; fi + test -n "$QUIET" || echo "path is $finalPath" >&2 echo $hash diff --git a/tests/Setup.pm b/tests/Setup.pm index 549066a5..551c1683 100644 --- a/tests/Setup.pm +++ b/tests/Setup.pm @@ -8,7 +8,7 @@ use Hydra::Helper::AddBuilds; use Cwd; our @ISA = qw(Exporter); -our @EXPORT = qw(hydra_setup nrBuildsForJobset queuedBuildsForJobset nrQueuedBuildsForJobset createBaseJobset createJobsetWithOneInput evalSucceeds runBuild); +our @EXPORT = qw(hydra_setup nrBuildsForJobset queuedBuildsForJobset nrQueuedBuildsForJobset createBaseJobset createJobsetWithOneInput evalSucceeds runBuild updateRepository); sub hydra_setup { my ($db) = @_; @@ -74,4 +74,12 @@ sub runBuild { return captureStdoutStderr(60, ("../src/script/hydra-build", $build->id)); } +sub updateRepository { + my ($scm, $update, $repo) = @_; + my ($res, $stdout, $stderr) = captureStdoutStderr(60, ($update, $repo)); + die "Unexpected update error with $scm: $stderr\n" unless $res; + print STDOUT "Update $scm repository: $stdout" if $stdout ne ""; + return $stdout ne ""; +} + 1; diff --git a/tests/evaluation-tests.pl b/tests/evaluation-tests.pl index 26510000..f01cb593 100755 --- a/tests/evaluation-tests.pl +++ b/tests/evaluation-tests.pl @@ -7,7 +7,7 @@ use Setup; my $db = Hydra::Model::DB->new; -use Test::Simple tests => 28; +use Test::Simple tests => 48; hydra_setup($db); @@ -57,7 +57,17 @@ my @scminputs = ("svn", "svn-checkout", "git", "bzr", "bzr-checkout", "hg"); foreach my $scm (@scminputs) { $jobset = createJobsetWithOneInput($scm, "$scm-input.nix", "src", $scm, "$jobsBaseUri/$scm-repo"); - ok(evalSucceeds($jobset), "Evaluating jobs/$scm-input.nix should exit with return code 0."); - ok(nrQueuedBuildsForJobset($jobset) == 1, "Evaluating jobs/$scm-input.nix should result in 1 build in queue"); -} + my $c = 1; + my $q = 1; + do { + # Verify that it can be fetched and queued. + ok(evalSucceeds($jobset), "$c Evaluating jobs/$scm-input.nix should exit with return code 0."); $c++; + ok(nrQueuedBuildsForJobset($jobset) == $q, "$c Evaluating jobs/$scm-input.nix should result in 1 build in queue"); $c++; + # Verify that it is deterministic and not queued again. + ok(evalSucceeds($jobset), "$c Evaluating jobs/$scm-input.nix should exit with return code 0."); $c++; + ok(nrQueuedBuildsForJobset($jobset) == $q, "$c Evaluating jobs/$scm-input.nix should result in $q build in queue"); $c++; + + $q++; + } while(updateRepository($scm, getcwd . "/jobs/$scm-update.sh", getcwd . "/$scm-repo/")); +} diff --git a/tests/jobs/bzr-checkout-update.sh b/tests/jobs/bzr-checkout-update.sh new file mode 100755 index 00000000..50d48845 --- /dev/null +++ b/tests/jobs/bzr-checkout-update.sh @@ -0,0 +1 @@ +#! /bin/sh diff --git a/tests/jobs/bzr-update.sh b/tests/jobs/bzr-update.sh new file mode 100755 index 00000000..50d48845 --- /dev/null +++ b/tests/jobs/bzr-update.sh @@ -0,0 +1 @@ +#! /bin/sh diff --git a/tests/jobs/git-update.sh b/tests/jobs/git-update.sh new file mode 100755 index 00000000..f5631a87 --- /dev/null +++ b/tests/jobs/git-update.sh @@ -0,0 +1,26 @@ +#! /bin/sh + +cd "$1" +STATE_FILE=.state +if test -e $STATE_FILE; then + state=$(cat $STATE_FILE) +else + state=0; +fi + +case $state in + (0) + echo "Add new file." + touch git-file-2 + git add git-file-2 >&2 + git commit -m "add git file 2" git-file-2 >&2 + ;; + (1) + echo "Rewrite commit." + echo 1 > git-file-2 + git add git-file-2 >&2 + git commit --amend -m "add git file 2" git-file-2 >&2 + ;; +esac + +echo $(($state + 1)) > $STATE_FILE diff --git a/tests/jobs/hg-update.sh b/tests/jobs/hg-update.sh new file mode 100755 index 00000000..50d48845 --- /dev/null +++ b/tests/jobs/hg-update.sh @@ -0,0 +1 @@ +#! /bin/sh diff --git a/tests/jobs/svn-checkout-update.sh b/tests/jobs/svn-checkout-update.sh new file mode 100755 index 00000000..50d48845 --- /dev/null +++ b/tests/jobs/svn-checkout-update.sh @@ -0,0 +1 @@ +#! /bin/sh diff --git a/tests/jobs/svn-update.sh b/tests/jobs/svn-update.sh new file mode 100755 index 00000000..50d48845 --- /dev/null +++ b/tests/jobs/svn-update.sh @@ -0,0 +1 @@ +#! /bin/sh