Improve GitHub caching
In particular, when building a flake lock file, inputs like 'nixpkgs' are now downloaded only once. Previously, it would fetch https://api.github.com/repos/<owner>/<repo>/tarball/<ref> and then later https://api.github.com/repos/<owner>/<repo>/tarball/<rev>, even though they produce the same result. Git and GitHub now also share a cache that maps revs to a store path and other info.
This commit is contained in:
parent
14a89aa8cd
commit
0ab64729e9
|
@ -170,43 +170,10 @@ static SourceInfo fetchInput(EvalState & state, const FlakeRef & resolvedRef)
|
||||||
|
|
||||||
// This only downloads only one revision of the repo, not the entire history.
|
// This only downloads only one revision of the repo, not the entire history.
|
||||||
if (auto refData = std::get_if<FlakeRef::IsGitHub>(&resolvedRef.data)) {
|
if (auto refData = std::get_if<FlakeRef::IsGitHub>(&resolvedRef.data)) {
|
||||||
|
return doGit(exportGitHub(state.store, refData->owner, refData->repo, resolvedRef.ref, resolvedRef.rev));
|
||||||
// FIXME: use regular /archive URLs instead? api.github.com
|
|
||||||
// might have stricter rate limits.
|
|
||||||
|
|
||||||
auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s",
|
|
||||||
refData->owner, refData->repo,
|
|
||||||
resolvedRef.rev ? resolvedRef.rev->to_string(Base16, false)
|
|
||||||
: resolvedRef.ref ? *resolvedRef.ref : "master");
|
|
||||||
|
|
||||||
std::string accessToken = settings.githubAccessToken.get();
|
|
||||||
if (accessToken != "")
|
|
||||||
url += "?access_token=" + accessToken;
|
|
||||||
|
|
||||||
CachedDownloadRequest request(url);
|
|
||||||
request.unpack = true;
|
|
||||||
request.name = "source";
|
|
||||||
request.ttl = resolvedRef.rev ? 1000000000 : settings.tarballTtl;
|
|
||||||
request.getLastModified = true;
|
|
||||||
auto result = getDownloader()->downloadCached(state.store, request);
|
|
||||||
|
|
||||||
if (!result.etag)
|
|
||||||
throw Error("did not receive an ETag header from '%s'", url);
|
|
||||||
|
|
||||||
if (result.etag->size() != 42 || (*result.etag)[0] != '"' || (*result.etag)[41] != '"')
|
|
||||||
throw Error("ETag header '%s' from '%s' is not a Git revision", *result.etag, url);
|
|
||||||
|
|
||||||
FlakeRef ref(resolvedRef.baseRef());
|
|
||||||
ref.rev = Hash(std::string(*result.etag, 1, result.etag->size() - 2), htSHA1);
|
|
||||||
SourceInfo info(ref);
|
|
||||||
info.storePath = result.storePath;
|
|
||||||
info.narHash = state.store->queryPathInfo(info.storePath)->narHash;
|
|
||||||
info.lastModified = result.lastModified;
|
|
||||||
|
|
||||||
return info;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This downloads the entire git history
|
// This downloads the entire git history.
|
||||||
else if (auto refData = std::get_if<FlakeRef::IsGit>(&resolvedRef.data)) {
|
else if (auto refData = std::get_if<FlakeRef::IsGit>(&resolvedRef.data)) {
|
||||||
return doGit(exportGit(state.store, refData->uri, resolvedRef.ref, resolvedRef.rev, "source"));
|
return doGit(exportGit(state.store, refData->uri, resolvedRef.ref, resolvedRef.rev, "source"));
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,60 @@ namespace nix {
|
||||||
|
|
||||||
extern std::regex revRegex;
|
extern std::regex revRegex;
|
||||||
|
|
||||||
|
static Path getCacheInfoPathFor(const std::string & name, const Hash & rev)
|
||||||
|
{
|
||||||
|
Path cacheDir = getCacheDir() + "/nix/git-revs";
|
||||||
|
std::string linkName =
|
||||||
|
name == "source"
|
||||||
|
? rev.gitRev()
|
||||||
|
: hashString(htSHA512, name + std::string("\0"s) + rev.gitRev()).to_string(Base32, false);
|
||||||
|
return cacheDir + "/" + linkName + ".link";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cacheGitInfo(const std::string & name, const GitInfo & gitInfo)
|
||||||
|
{
|
||||||
|
nlohmann::json json;
|
||||||
|
json["storePath"] = gitInfo.storePath;
|
||||||
|
json["name"] = name;
|
||||||
|
json["rev"] = gitInfo.rev.gitRev();
|
||||||
|
if (gitInfo.revCount)
|
||||||
|
json["revCount"] = *gitInfo.revCount;
|
||||||
|
json["lastModified"] = gitInfo.lastModified;
|
||||||
|
|
||||||
|
auto cacheInfoPath = getCacheInfoPathFor(name, gitInfo.rev);
|
||||||
|
createDirs(dirOf(cacheInfoPath));
|
||||||
|
writeFile(cacheInfoPath, json.dump());
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::optional<GitInfo> lookupGitInfo(
|
||||||
|
ref<Store> store,
|
||||||
|
const std::string & name,
|
||||||
|
const Hash & rev)
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
auto json = nlohmann::json::parse(readFile(getCacheInfoPathFor(name, rev)));
|
||||||
|
|
||||||
|
assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == rev);
|
||||||
|
|
||||||
|
Path storePath = json["storePath"];
|
||||||
|
|
||||||
|
if (store->isValidPath(storePath)) {
|
||||||
|
GitInfo gitInfo;
|
||||||
|
gitInfo.storePath = storePath;
|
||||||
|
gitInfo.rev = rev;
|
||||||
|
if (json.find("revCount") != json.end())
|
||||||
|
gitInfo.revCount = json["revCount"];
|
||||||
|
gitInfo.lastModified = json["lastModified"];
|
||||||
|
return gitInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (SysError & e) {
|
||||||
|
if (e.errNo != ENOENT) throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
GitInfo exportGit(ref<Store> store, std::string uri,
|
GitInfo exportGit(ref<Store> store, std::string uri,
|
||||||
std::optional<std::string> ref,
|
std::optional<std::string> ref,
|
||||||
std::optional<Hash> rev,
|
std::optional<Hash> rev,
|
||||||
|
@ -25,6 +79,17 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
||||||
{
|
{
|
||||||
assert(!rev || rev->type == htSHA1);
|
assert(!rev || rev->type == htSHA1);
|
||||||
|
|
||||||
|
if (rev) {
|
||||||
|
if (auto gitInfo = lookupGitInfo(store, name, *rev)) {
|
||||||
|
// If this gitInfo was produced by exportGitHub, then it won't
|
||||||
|
// have a revCount. So we have to do a full clone.
|
||||||
|
if (gitInfo->revCount) {
|
||||||
|
gitInfo->ref = ref;
|
||||||
|
return *gitInfo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (hasPrefix(uri, "git+")) uri = std::string(uri, 4);
|
if (hasPrefix(uri, "git+")) uri = std::string(uri, 4);
|
||||||
|
|
||||||
bool isLocal = hasPrefix(uri, "/") && pathExists(uri + "/.git");
|
bool isLocal = hasPrefix(uri, "/") && pathExists(uri + "/.git");
|
||||||
|
@ -100,9 +165,6 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
||||||
isLocal = true;
|
isLocal = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
deletePath(getCacheDir() + "/nix/git");
|
|
||||||
deletePath(getCacheDir() + "/nix/gitv2");
|
|
||||||
|
|
||||||
Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, uri).to_string(Base32, false);
|
Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, uri).to_string(Base32, false);
|
||||||
Path repoDir;
|
Path repoDir;
|
||||||
|
|
||||||
|
@ -179,6 +241,13 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
||||||
rev = Hash(chomp(readFile(localRefFile)), htSHA1);
|
rev = Hash(chomp(readFile(localRefFile)), htSHA1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (auto gitInfo = lookupGitInfo(store, name, *rev)) {
|
||||||
|
if (gitInfo->revCount) {
|
||||||
|
gitInfo->ref = ref;
|
||||||
|
return *gitInfo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME: check whether rev is an ancestor of ref.
|
// FIXME: check whether rev is an ancestor of ref.
|
||||||
GitInfo gitInfo;
|
GitInfo gitInfo;
|
||||||
gitInfo.ref = *ref;
|
gitInfo.ref = *ref;
|
||||||
|
@ -186,29 +255,6 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
||||||
|
|
||||||
printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri);
|
printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri);
|
||||||
|
|
||||||
std::string storeLinkName = hashString(htSHA512,
|
|
||||||
name + std::string("\0"s) + gitInfo.rev.gitRev()).to_string(Base32, false);
|
|
||||||
Path storeLink = cacheDir + "/" + storeLinkName + ".link";
|
|
||||||
PathLocks storeLinkLock({storeLink}, fmt("waiting for lock on '%1%'...", storeLink)); // FIXME: broken
|
|
||||||
|
|
||||||
try {
|
|
||||||
auto json = nlohmann::json::parse(readFile(storeLink));
|
|
||||||
|
|
||||||
assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == gitInfo.rev);
|
|
||||||
|
|
||||||
Path storePath = json["storePath"];
|
|
||||||
|
|
||||||
if (store->isValidPath(storePath)) {
|
|
||||||
gitInfo.storePath = storePath;
|
|
||||||
gitInfo.revCount = json["revCount"];
|
|
||||||
gitInfo.lastModified = json["lastModified"];
|
|
||||||
return gitInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (SysError & e) {
|
|
||||||
if (e.errNo != ENOENT) throw;
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXME: should pipe this, or find some better way to extract a
|
// FIXME: should pipe this, or find some better way to extract a
|
||||||
// revision.
|
// revision.
|
||||||
auto tar = runProgram("git", true, { "-C", repoDir, "archive", gitInfo.rev.gitRev() });
|
auto tar = runProgram("git", true, { "-C", repoDir, "archive", gitInfo.rev.gitRev() });
|
||||||
|
@ -223,15 +269,55 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
||||||
gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", gitInfo.rev.gitRev() }));
|
gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", gitInfo.rev.gitRev() }));
|
||||||
gitInfo.lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "show", "-s", "--format=%ct", gitInfo.rev.gitRev() }));
|
gitInfo.lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "show", "-s", "--format=%ct", gitInfo.rev.gitRev() }));
|
||||||
|
|
||||||
nlohmann::json json;
|
cacheGitInfo(name, gitInfo);
|
||||||
json["storePath"] = gitInfo.storePath;
|
|
||||||
json["uri"] = uri;
|
|
||||||
json["name"] = name;
|
|
||||||
json["rev"] = gitInfo.rev.gitRev();
|
|
||||||
json["revCount"] = gitInfo.revCount;
|
|
||||||
json["lastModified"] = gitInfo.lastModified;
|
|
||||||
|
|
||||||
writeFile(storeLink, json.dump());
|
return gitInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
GitInfo exportGitHub(
|
||||||
|
ref<Store> store,
|
||||||
|
const std::string & owner,
|
||||||
|
const std::string & repo,
|
||||||
|
std::optional<std::string> ref,
|
||||||
|
std::optional<Hash> rev)
|
||||||
|
{
|
||||||
|
if (rev) {
|
||||||
|
if (auto gitInfo = lookupGitInfo(store, "source", *rev))
|
||||||
|
return *gitInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: use regular /archive URLs instead? api.github.com
|
||||||
|
// might have stricter rate limits.
|
||||||
|
|
||||||
|
auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s",
|
||||||
|
owner, repo, rev ? rev->to_string(Base16, false) : ref ? *ref : "master");
|
||||||
|
|
||||||
|
std::string accessToken = settings.githubAccessToken.get();
|
||||||
|
if (accessToken != "")
|
||||||
|
url += "?access_token=" + accessToken;
|
||||||
|
|
||||||
|
CachedDownloadRequest request(url);
|
||||||
|
request.unpack = true;
|
||||||
|
request.name = "source";
|
||||||
|
request.ttl = rev ? 1000000000 : settings.tarballTtl;
|
||||||
|
request.getLastModified = true;
|
||||||
|
auto result = getDownloader()->downloadCached(store, request);
|
||||||
|
|
||||||
|
if (!result.etag)
|
||||||
|
throw Error("did not receive an ETag header from '%s'", url);
|
||||||
|
|
||||||
|
if (result.etag->size() != 42 || (*result.etag)[0] != '"' || (*result.etag)[41] != '"')
|
||||||
|
throw Error("ETag header '%s' from '%s' is not a Git revision", *result.etag, url);
|
||||||
|
|
||||||
|
assert(result.lastModified);
|
||||||
|
|
||||||
|
GitInfo gitInfo;
|
||||||
|
gitInfo.storePath = result.storePath;
|
||||||
|
gitInfo.rev = Hash(std::string(*result.etag, 1, result.etag->size() - 2), htSHA1);
|
||||||
|
gitInfo.lastModified = *result.lastModified;
|
||||||
|
|
||||||
|
// FIXME: this can overwrite a cache file that contains a revCount.
|
||||||
|
cacheGitInfo("source", gitInfo);
|
||||||
|
|
||||||
return gitInfo;
|
return gitInfo;
|
||||||
}
|
}
|
||||||
|
@ -283,7 +369,8 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va
|
||||||
mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, PathSet({gitInfo.storePath}));
|
mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, PathSet({gitInfo.storePath}));
|
||||||
mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev.gitRev());
|
mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev.gitRev());
|
||||||
mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.rev.gitShortRev());
|
mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.rev.gitShortRev());
|
||||||
mkInt(*state.allocAttr(v, state.symbols.create("revCount")), gitInfo.revCount);
|
assert(gitInfo.revCount);
|
||||||
|
mkInt(*state.allocAttr(v, state.symbols.create("revCount")), *gitInfo.revCount);
|
||||||
v.attrs->sort();
|
v.attrs->sort();
|
||||||
|
|
||||||
if (state.allowedPaths)
|
if (state.allowedPaths)
|
||||||
|
|
|
@ -9,15 +9,24 @@ namespace nix {
|
||||||
struct GitInfo
|
struct GitInfo
|
||||||
{
|
{
|
||||||
Path storePath;
|
Path storePath;
|
||||||
std::string ref;
|
std::optional<std::string> ref;
|
||||||
Hash rev{htSHA1};
|
Hash rev{htSHA1};
|
||||||
uint64_t revCount;
|
std::optional<uint64_t> revCount;
|
||||||
time_t lastModified;
|
time_t lastModified;
|
||||||
};
|
};
|
||||||
|
|
||||||
GitInfo exportGit(ref<Store> store, std::string uri,
|
GitInfo exportGit(
|
||||||
|
ref<Store> store,
|
||||||
|
std::string uri,
|
||||||
std::optional<std::string> ref,
|
std::optional<std::string> ref,
|
||||||
std::optional<Hash> rev,
|
std::optional<Hash> rev,
|
||||||
const std::string & name);
|
const std::string & name);
|
||||||
|
|
||||||
|
GitInfo exportGitHub(
|
||||||
|
ref<Store> store,
|
||||||
|
const std::string & owner,
|
||||||
|
const std::string & repo,
|
||||||
|
std::optional<std::string> ref,
|
||||||
|
std::optional<Hash> rev);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue