Improve GitHub caching

In particular, when building a flake lock file, inputs like 'nixpkgs'
are now downloaded only once. Previously, it would fetch
https://api.github.com/repos/<owner>/<repo>/tarball/<ref> and then
later https://api.github.com/repos/<owner>/<repo>/tarball/<rev>, even
though they produce the same result.

Git and GitHub now also share a cache that maps revs to a store path
and other info.
This commit is contained in:
Eelco Dolstra 2019-10-16 00:12:40 +02:00
parent 14a89aa8cd
commit 0ab64729e9
3 changed files with 136 additions and 73 deletions

View file

@ -170,43 +170,10 @@ static SourceInfo fetchInput(EvalState & state, const FlakeRef & resolvedRef)
// This only downloads only one revision of the repo, not the entire history. // This only downloads only one revision of the repo, not the entire history.
if (auto refData = std::get_if<FlakeRef::IsGitHub>(&resolvedRef.data)) { if (auto refData = std::get_if<FlakeRef::IsGitHub>(&resolvedRef.data)) {
return doGit(exportGitHub(state.store, refData->owner, refData->repo, resolvedRef.ref, resolvedRef.rev));
// FIXME: use regular /archive URLs instead? api.github.com
// might have stricter rate limits.
auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s",
refData->owner, refData->repo,
resolvedRef.rev ? resolvedRef.rev->to_string(Base16, false)
: resolvedRef.ref ? *resolvedRef.ref : "master");
std::string accessToken = settings.githubAccessToken.get();
if (accessToken != "")
url += "?access_token=" + accessToken;
CachedDownloadRequest request(url);
request.unpack = true;
request.name = "source";
request.ttl = resolvedRef.rev ? 1000000000 : settings.tarballTtl;
request.getLastModified = true;
auto result = getDownloader()->downloadCached(state.store, request);
if (!result.etag)
throw Error("did not receive an ETag header from '%s'", url);
if (result.etag->size() != 42 || (*result.etag)[0] != '"' || (*result.etag)[41] != '"')
throw Error("ETag header '%s' from '%s' is not a Git revision", *result.etag, url);
FlakeRef ref(resolvedRef.baseRef());
ref.rev = Hash(std::string(*result.etag, 1, result.etag->size() - 2), htSHA1);
SourceInfo info(ref);
info.storePath = result.storePath;
info.narHash = state.store->queryPathInfo(info.storePath)->narHash;
info.lastModified = result.lastModified;
return info;
} }
// This downloads the entire git history // This downloads the entire git history.
else if (auto refData = std::get_if<FlakeRef::IsGit>(&resolvedRef.data)) { else if (auto refData = std::get_if<FlakeRef::IsGit>(&resolvedRef.data)) {
return doGit(exportGit(state.store, refData->uri, resolvedRef.ref, resolvedRef.rev, "source")); return doGit(exportGit(state.store, refData->uri, resolvedRef.ref, resolvedRef.rev, "source"));
} }

View file

@ -18,6 +18,60 @@ namespace nix {
extern std::regex revRegex; extern std::regex revRegex;
static Path getCacheInfoPathFor(const std::string & name, const Hash & rev)
{
Path cacheDir = getCacheDir() + "/nix/git-revs";
std::string linkName =
name == "source"
? rev.gitRev()
: hashString(htSHA512, name + std::string("\0"s) + rev.gitRev()).to_string(Base32, false);
return cacheDir + "/" + linkName + ".link";
}
static void cacheGitInfo(const std::string & name, const GitInfo & gitInfo)
{
nlohmann::json json;
json["storePath"] = gitInfo.storePath;
json["name"] = name;
json["rev"] = gitInfo.rev.gitRev();
if (gitInfo.revCount)
json["revCount"] = *gitInfo.revCount;
json["lastModified"] = gitInfo.lastModified;
auto cacheInfoPath = getCacheInfoPathFor(name, gitInfo.rev);
createDirs(dirOf(cacheInfoPath));
writeFile(cacheInfoPath, json.dump());
}
static std::optional<GitInfo> lookupGitInfo(
ref<Store> store,
const std::string & name,
const Hash & rev)
{
try {
auto json = nlohmann::json::parse(readFile(getCacheInfoPathFor(name, rev)));
assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == rev);
Path storePath = json["storePath"];
if (store->isValidPath(storePath)) {
GitInfo gitInfo;
gitInfo.storePath = storePath;
gitInfo.rev = rev;
if (json.find("revCount") != json.end())
gitInfo.revCount = json["revCount"];
gitInfo.lastModified = json["lastModified"];
return gitInfo;
}
} catch (SysError & e) {
if (e.errNo != ENOENT) throw;
}
return {};
}
GitInfo exportGit(ref<Store> store, std::string uri, GitInfo exportGit(ref<Store> store, std::string uri,
std::optional<std::string> ref, std::optional<std::string> ref,
std::optional<Hash> rev, std::optional<Hash> rev,
@ -25,6 +79,17 @@ GitInfo exportGit(ref<Store> store, std::string uri,
{ {
assert(!rev || rev->type == htSHA1); assert(!rev || rev->type == htSHA1);
if (rev) {
if (auto gitInfo = lookupGitInfo(store, name, *rev)) {
// If this gitInfo was produced by exportGitHub, then it won't
// have a revCount. So we have to do a full clone.
if (gitInfo->revCount) {
gitInfo->ref = ref;
return *gitInfo;
}
}
}
if (hasPrefix(uri, "git+")) uri = std::string(uri, 4); if (hasPrefix(uri, "git+")) uri = std::string(uri, 4);
bool isLocal = hasPrefix(uri, "/") && pathExists(uri + "/.git"); bool isLocal = hasPrefix(uri, "/") && pathExists(uri + "/.git");
@ -100,9 +165,6 @@ GitInfo exportGit(ref<Store> store, std::string uri,
isLocal = true; isLocal = true;
} }
deletePath(getCacheDir() + "/nix/git");
deletePath(getCacheDir() + "/nix/gitv2");
Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, uri).to_string(Base32, false); Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, uri).to_string(Base32, false);
Path repoDir; Path repoDir;
@ -179,6 +241,13 @@ GitInfo exportGit(ref<Store> store, std::string uri,
rev = Hash(chomp(readFile(localRefFile)), htSHA1); rev = Hash(chomp(readFile(localRefFile)), htSHA1);
} }
if (auto gitInfo = lookupGitInfo(store, name, *rev)) {
if (gitInfo->revCount) {
gitInfo->ref = ref;
return *gitInfo;
}
}
// FIXME: check whether rev is an ancestor of ref. // FIXME: check whether rev is an ancestor of ref.
GitInfo gitInfo; GitInfo gitInfo;
gitInfo.ref = *ref; gitInfo.ref = *ref;
@ -186,29 +255,6 @@ GitInfo exportGit(ref<Store> store, std::string uri,
printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri); printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri);
std::string storeLinkName = hashString(htSHA512,
name + std::string("\0"s) + gitInfo.rev.gitRev()).to_string(Base32, false);
Path storeLink = cacheDir + "/" + storeLinkName + ".link";
PathLocks storeLinkLock({storeLink}, fmt("waiting for lock on '%1%'...", storeLink)); // FIXME: broken
try {
auto json = nlohmann::json::parse(readFile(storeLink));
assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == gitInfo.rev);
Path storePath = json["storePath"];
if (store->isValidPath(storePath)) {
gitInfo.storePath = storePath;
gitInfo.revCount = json["revCount"];
gitInfo.lastModified = json["lastModified"];
return gitInfo;
}
} catch (SysError & e) {
if (e.errNo != ENOENT) throw;
}
// FIXME: should pipe this, or find some better way to extract a // FIXME: should pipe this, or find some better way to extract a
// revision. // revision.
auto tar = runProgram("git", true, { "-C", repoDir, "archive", gitInfo.rev.gitRev() }); auto tar = runProgram("git", true, { "-C", repoDir, "archive", gitInfo.rev.gitRev() });
@ -223,15 +269,55 @@ GitInfo exportGit(ref<Store> store, std::string uri,
gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", gitInfo.rev.gitRev() })); gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", gitInfo.rev.gitRev() }));
gitInfo.lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "show", "-s", "--format=%ct", gitInfo.rev.gitRev() })); gitInfo.lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "show", "-s", "--format=%ct", gitInfo.rev.gitRev() }));
nlohmann::json json; cacheGitInfo(name, gitInfo);
json["storePath"] = gitInfo.storePath;
json["uri"] = uri;
json["name"] = name;
json["rev"] = gitInfo.rev.gitRev();
json["revCount"] = gitInfo.revCount;
json["lastModified"] = gitInfo.lastModified;
writeFile(storeLink, json.dump()); return gitInfo;
}
GitInfo exportGitHub(
ref<Store> store,
const std::string & owner,
const std::string & repo,
std::optional<std::string> ref,
std::optional<Hash> rev)
{
if (rev) {
if (auto gitInfo = lookupGitInfo(store, "source", *rev))
return *gitInfo;
}
// FIXME: use regular /archive URLs instead? api.github.com
// might have stricter rate limits.
auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s",
owner, repo, rev ? rev->to_string(Base16, false) : ref ? *ref : "master");
std::string accessToken = settings.githubAccessToken.get();
if (accessToken != "")
url += "?access_token=" + accessToken;
CachedDownloadRequest request(url);
request.unpack = true;
request.name = "source";
request.ttl = rev ? 1000000000 : settings.tarballTtl;
request.getLastModified = true;
auto result = getDownloader()->downloadCached(store, request);
if (!result.etag)
throw Error("did not receive an ETag header from '%s'", url);
if (result.etag->size() != 42 || (*result.etag)[0] != '"' || (*result.etag)[41] != '"')
throw Error("ETag header '%s' from '%s' is not a Git revision", *result.etag, url);
assert(result.lastModified);
GitInfo gitInfo;
gitInfo.storePath = result.storePath;
gitInfo.rev = Hash(std::string(*result.etag, 1, result.etag->size() - 2), htSHA1);
gitInfo.lastModified = *result.lastModified;
// FIXME: this can overwrite a cache file that contains a revCount.
cacheGitInfo("source", gitInfo);
return gitInfo; return gitInfo;
} }
@ -283,7 +369,8 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va
mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, PathSet({gitInfo.storePath})); mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, PathSet({gitInfo.storePath}));
mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev.gitRev()); mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev.gitRev());
mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.rev.gitShortRev()); mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.rev.gitShortRev());
mkInt(*state.allocAttr(v, state.symbols.create("revCount")), gitInfo.revCount); assert(gitInfo.revCount);
mkInt(*state.allocAttr(v, state.symbols.create("revCount")), *gitInfo.revCount);
v.attrs->sort(); v.attrs->sort();
if (state.allowedPaths) if (state.allowedPaths)

View file

@ -9,15 +9,24 @@ namespace nix {
struct GitInfo struct GitInfo
{ {
Path storePath; Path storePath;
std::string ref; std::optional<std::string> ref;
Hash rev{htSHA1}; Hash rev{htSHA1};
uint64_t revCount; std::optional<uint64_t> revCount;
time_t lastModified; time_t lastModified;
}; };
GitInfo exportGit(ref<Store> store, std::string uri, GitInfo exportGit(
ref<Store> store,
std::string uri,
std::optional<std::string> ref, std::optional<std::string> ref,
std::optional<Hash> rev, std::optional<Hash> rev,
const std::string & name); const std::string & name);
GitInfo exportGitHub(
ref<Store> store,
const std::string & owner,
const std::string & repo,
std::optional<std::string> ref,
std::optional<Hash> rev);
} }