From 0ab64729e98757d2893e9b29f6ee5996f302fb68 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 16 Oct 2019 00:12:40 +0200 Subject: [PATCH] Improve GitHub caching In particular, when building a flake lock file, inputs like 'nixpkgs' are now downloaded only once. Previously, it would fetch https://api.github.com/repos///tarball/ and then later https://api.github.com/repos///tarball/, even though they produce the same result. Git and GitHub now also share a cache that maps revs to a store path and other info. --- src/libexpr/flake/flake.cc | 37 +------- src/libexpr/primops/fetchGit.cc | 157 +++++++++++++++++++++++++------- src/libexpr/primops/fetchGit.hh | 15 ++- 3 files changed, 136 insertions(+), 73 deletions(-) diff --git a/src/libexpr/flake/flake.cc b/src/libexpr/flake/flake.cc index e8eb353fb..5fb40fabd 100644 --- a/src/libexpr/flake/flake.cc +++ b/src/libexpr/flake/flake.cc @@ -170,43 +170,10 @@ static SourceInfo fetchInput(EvalState & state, const FlakeRef & resolvedRef) // This only downloads only one revision of the repo, not the entire history. if (auto refData = std::get_if(&resolvedRef.data)) { - - // FIXME: use regular /archive URLs instead? api.github.com - // might have stricter rate limits. - - auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s", - refData->owner, refData->repo, - resolvedRef.rev ? resolvedRef.rev->to_string(Base16, false) - : resolvedRef.ref ? *resolvedRef.ref : "master"); - - std::string accessToken = settings.githubAccessToken.get(); - if (accessToken != "") - url += "?access_token=" + accessToken; - - CachedDownloadRequest request(url); - request.unpack = true; - request.name = "source"; - request.ttl = resolvedRef.rev ? 1000000000 : settings.tarballTtl; - request.getLastModified = true; - auto result = getDownloader()->downloadCached(state.store, request); - - if (!result.etag) - throw Error("did not receive an ETag header from '%s'", url); - - if (result.etag->size() != 42 || (*result.etag)[0] != '"' || (*result.etag)[41] != '"') - throw Error("ETag header '%s' from '%s' is not a Git revision", *result.etag, url); - - FlakeRef ref(resolvedRef.baseRef()); - ref.rev = Hash(std::string(*result.etag, 1, result.etag->size() - 2), htSHA1); - SourceInfo info(ref); - info.storePath = result.storePath; - info.narHash = state.store->queryPathInfo(info.storePath)->narHash; - info.lastModified = result.lastModified; - - return info; + return doGit(exportGitHub(state.store, refData->owner, refData->repo, resolvedRef.ref, resolvedRef.rev)); } - // This downloads the entire git history + // This downloads the entire git history. else if (auto refData = std::get_if(&resolvedRef.data)) { return doGit(exportGit(state.store, refData->uri, resolvedRef.ref, resolvedRef.rev, "source")); } diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index 21fa025c1..50277672c 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -18,6 +18,60 @@ namespace nix { extern std::regex revRegex; +static Path getCacheInfoPathFor(const std::string & name, const Hash & rev) +{ + Path cacheDir = getCacheDir() + "/nix/git-revs"; + std::string linkName = + name == "source" + ? rev.gitRev() + : hashString(htSHA512, name + std::string("\0"s) + rev.gitRev()).to_string(Base32, false); + return cacheDir + "/" + linkName + ".link"; +} + +static void cacheGitInfo(const std::string & name, const GitInfo & gitInfo) +{ + nlohmann::json json; + json["storePath"] = gitInfo.storePath; + json["name"] = name; + json["rev"] = gitInfo.rev.gitRev(); + if (gitInfo.revCount) + json["revCount"] = *gitInfo.revCount; + json["lastModified"] = gitInfo.lastModified; + + auto cacheInfoPath = getCacheInfoPathFor(name, gitInfo.rev); + createDirs(dirOf(cacheInfoPath)); + writeFile(cacheInfoPath, json.dump()); +} + +static std::optional lookupGitInfo( + ref store, + const std::string & name, + const Hash & rev) +{ + try { + auto json = nlohmann::json::parse(readFile(getCacheInfoPathFor(name, rev))); + + assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == rev); + + Path storePath = json["storePath"]; + + if (store->isValidPath(storePath)) { + GitInfo gitInfo; + gitInfo.storePath = storePath; + gitInfo.rev = rev; + if (json.find("revCount") != json.end()) + gitInfo.revCount = json["revCount"]; + gitInfo.lastModified = json["lastModified"]; + return gitInfo; + } + + } catch (SysError & e) { + if (e.errNo != ENOENT) throw; + } + + return {}; +} + GitInfo exportGit(ref store, std::string uri, std::optional ref, std::optional rev, @@ -25,6 +79,17 @@ GitInfo exportGit(ref store, std::string uri, { assert(!rev || rev->type == htSHA1); + if (rev) { + if (auto gitInfo = lookupGitInfo(store, name, *rev)) { + // If this gitInfo was produced by exportGitHub, then it won't + // have a revCount. So we have to do a full clone. + if (gitInfo->revCount) { + gitInfo->ref = ref; + return *gitInfo; + } + } + } + if (hasPrefix(uri, "git+")) uri = std::string(uri, 4); bool isLocal = hasPrefix(uri, "/") && pathExists(uri + "/.git"); @@ -100,9 +165,6 @@ GitInfo exportGit(ref store, std::string uri, isLocal = true; } - deletePath(getCacheDir() + "/nix/git"); - deletePath(getCacheDir() + "/nix/gitv2"); - Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, uri).to_string(Base32, false); Path repoDir; @@ -179,6 +241,13 @@ GitInfo exportGit(ref store, std::string uri, rev = Hash(chomp(readFile(localRefFile)), htSHA1); } + if (auto gitInfo = lookupGitInfo(store, name, *rev)) { + if (gitInfo->revCount) { + gitInfo->ref = ref; + return *gitInfo; + } + } + // FIXME: check whether rev is an ancestor of ref. GitInfo gitInfo; gitInfo.ref = *ref; @@ -186,29 +255,6 @@ GitInfo exportGit(ref store, std::string uri, printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri); - std::string storeLinkName = hashString(htSHA512, - name + std::string("\0"s) + gitInfo.rev.gitRev()).to_string(Base32, false); - Path storeLink = cacheDir + "/" + storeLinkName + ".link"; - PathLocks storeLinkLock({storeLink}, fmt("waiting for lock on '%1%'...", storeLink)); // FIXME: broken - - try { - auto json = nlohmann::json::parse(readFile(storeLink)); - - assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == gitInfo.rev); - - Path storePath = json["storePath"]; - - if (store->isValidPath(storePath)) { - gitInfo.storePath = storePath; - gitInfo.revCount = json["revCount"]; - gitInfo.lastModified = json["lastModified"]; - return gitInfo; - } - - } catch (SysError & e) { - if (e.errNo != ENOENT) throw; - } - // FIXME: should pipe this, or find some better way to extract a // revision. auto tar = runProgram("git", true, { "-C", repoDir, "archive", gitInfo.rev.gitRev() }); @@ -223,15 +269,55 @@ GitInfo exportGit(ref store, std::string uri, gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", gitInfo.rev.gitRev() })); gitInfo.lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "show", "-s", "--format=%ct", gitInfo.rev.gitRev() })); - nlohmann::json json; - json["storePath"] = gitInfo.storePath; - json["uri"] = uri; - json["name"] = name; - json["rev"] = gitInfo.rev.gitRev(); - json["revCount"] = gitInfo.revCount; - json["lastModified"] = gitInfo.lastModified; + cacheGitInfo(name, gitInfo); - writeFile(storeLink, json.dump()); + return gitInfo; +} + +GitInfo exportGitHub( + ref store, + const std::string & owner, + const std::string & repo, + std::optional ref, + std::optional rev) +{ + if (rev) { + if (auto gitInfo = lookupGitInfo(store, "source", *rev)) + return *gitInfo; + } + + // FIXME: use regular /archive URLs instead? api.github.com + // might have stricter rate limits. + + auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s", + owner, repo, rev ? rev->to_string(Base16, false) : ref ? *ref : "master"); + + std::string accessToken = settings.githubAccessToken.get(); + if (accessToken != "") + url += "?access_token=" + accessToken; + + CachedDownloadRequest request(url); + request.unpack = true; + request.name = "source"; + request.ttl = rev ? 1000000000 : settings.tarballTtl; + request.getLastModified = true; + auto result = getDownloader()->downloadCached(store, request); + + if (!result.etag) + throw Error("did not receive an ETag header from '%s'", url); + + if (result.etag->size() != 42 || (*result.etag)[0] != '"' || (*result.etag)[41] != '"') + throw Error("ETag header '%s' from '%s' is not a Git revision", *result.etag, url); + + assert(result.lastModified); + + GitInfo gitInfo; + gitInfo.storePath = result.storePath; + gitInfo.rev = Hash(std::string(*result.etag, 1, result.etag->size() - 2), htSHA1); + gitInfo.lastModified = *result.lastModified; + + // FIXME: this can overwrite a cache file that contains a revCount. + cacheGitInfo("source", gitInfo); return gitInfo; } @@ -283,7 +369,8 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, PathSet({gitInfo.storePath})); mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev.gitRev()); mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.rev.gitShortRev()); - mkInt(*state.allocAttr(v, state.symbols.create("revCount")), gitInfo.revCount); + assert(gitInfo.revCount); + mkInt(*state.allocAttr(v, state.symbols.create("revCount")), *gitInfo.revCount); v.attrs->sort(); if (state.allowedPaths) diff --git a/src/libexpr/primops/fetchGit.hh b/src/libexpr/primops/fetchGit.hh index 006fa8b5f..fe2b49942 100644 --- a/src/libexpr/primops/fetchGit.hh +++ b/src/libexpr/primops/fetchGit.hh @@ -9,15 +9,24 @@ namespace nix { struct GitInfo { Path storePath; - std::string ref; + std::optional ref; Hash rev{htSHA1}; - uint64_t revCount; + std::optional revCount; time_t lastModified; }; -GitInfo exportGit(ref store, std::string uri, +GitInfo exportGit( + ref store, + std::string uri, std::optional ref, std::optional rev, const std::string & name); +GitInfo exportGitHub( + ref store, + const std::string & owner, + const std::string & repo, + std::optional ref, + std::optional rev); + }