forked from lix-project/lix
Improve GitHub caching
In particular, when building a flake lock file, inputs like 'nixpkgs' are now downloaded only once. Previously, it would fetch https://api.github.com/repos/<owner>/<repo>/tarball/<ref> and then later https://api.github.com/repos/<owner>/<repo>/tarball/<rev>, even though they produce the same result. Git and GitHub now also share a cache that maps revs to a store path and other info.
This commit is contained in:
parent
14a89aa8cd
commit
0ab64729e9
3 changed files with 136 additions and 73 deletions
|
@ -170,43 +170,10 @@ static SourceInfo fetchInput(EvalState & state, const FlakeRef & resolvedRef)
|
|||
|
||||
// This only downloads only one revision of the repo, not the entire history.
|
||||
if (auto refData = std::get_if<FlakeRef::IsGitHub>(&resolvedRef.data)) {
|
||||
|
||||
// FIXME: use regular /archive URLs instead? api.github.com
|
||||
// might have stricter rate limits.
|
||||
|
||||
auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s",
|
||||
refData->owner, refData->repo,
|
||||
resolvedRef.rev ? resolvedRef.rev->to_string(Base16, false)
|
||||
: resolvedRef.ref ? *resolvedRef.ref : "master");
|
||||
|
||||
std::string accessToken = settings.githubAccessToken.get();
|
||||
if (accessToken != "")
|
||||
url += "?access_token=" + accessToken;
|
||||
|
||||
CachedDownloadRequest request(url);
|
||||
request.unpack = true;
|
||||
request.name = "source";
|
||||
request.ttl = resolvedRef.rev ? 1000000000 : settings.tarballTtl;
|
||||
request.getLastModified = true;
|
||||
auto result = getDownloader()->downloadCached(state.store, request);
|
||||
|
||||
if (!result.etag)
|
||||
throw Error("did not receive an ETag header from '%s'", url);
|
||||
|
||||
if (result.etag->size() != 42 || (*result.etag)[0] != '"' || (*result.etag)[41] != '"')
|
||||
throw Error("ETag header '%s' from '%s' is not a Git revision", *result.etag, url);
|
||||
|
||||
FlakeRef ref(resolvedRef.baseRef());
|
||||
ref.rev = Hash(std::string(*result.etag, 1, result.etag->size() - 2), htSHA1);
|
||||
SourceInfo info(ref);
|
||||
info.storePath = result.storePath;
|
||||
info.narHash = state.store->queryPathInfo(info.storePath)->narHash;
|
||||
info.lastModified = result.lastModified;
|
||||
|
||||
return info;
|
||||
return doGit(exportGitHub(state.store, refData->owner, refData->repo, resolvedRef.ref, resolvedRef.rev));
|
||||
}
|
||||
|
||||
// This downloads the entire git history
|
||||
// This downloads the entire git history.
|
||||
else if (auto refData = std::get_if<FlakeRef::IsGit>(&resolvedRef.data)) {
|
||||
return doGit(exportGit(state.store, refData->uri, resolvedRef.ref, resolvedRef.rev, "source"));
|
||||
}
|
||||
|
|
|
@ -18,6 +18,60 @@ namespace nix {
|
|||
|
||||
extern std::regex revRegex;
|
||||
|
||||
static Path getCacheInfoPathFor(const std::string & name, const Hash & rev)
|
||||
{
|
||||
Path cacheDir = getCacheDir() + "/nix/git-revs";
|
||||
std::string linkName =
|
||||
name == "source"
|
||||
? rev.gitRev()
|
||||
: hashString(htSHA512, name + std::string("\0"s) + rev.gitRev()).to_string(Base32, false);
|
||||
return cacheDir + "/" + linkName + ".link";
|
||||
}
|
||||
|
||||
static void cacheGitInfo(const std::string & name, const GitInfo & gitInfo)
|
||||
{
|
||||
nlohmann::json json;
|
||||
json["storePath"] = gitInfo.storePath;
|
||||
json["name"] = name;
|
||||
json["rev"] = gitInfo.rev.gitRev();
|
||||
if (gitInfo.revCount)
|
||||
json["revCount"] = *gitInfo.revCount;
|
||||
json["lastModified"] = gitInfo.lastModified;
|
||||
|
||||
auto cacheInfoPath = getCacheInfoPathFor(name, gitInfo.rev);
|
||||
createDirs(dirOf(cacheInfoPath));
|
||||
writeFile(cacheInfoPath, json.dump());
|
||||
}
|
||||
|
||||
static std::optional<GitInfo> lookupGitInfo(
|
||||
ref<Store> store,
|
||||
const std::string & name,
|
||||
const Hash & rev)
|
||||
{
|
||||
try {
|
||||
auto json = nlohmann::json::parse(readFile(getCacheInfoPathFor(name, rev)));
|
||||
|
||||
assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == rev);
|
||||
|
||||
Path storePath = json["storePath"];
|
||||
|
||||
if (store->isValidPath(storePath)) {
|
||||
GitInfo gitInfo;
|
||||
gitInfo.storePath = storePath;
|
||||
gitInfo.rev = rev;
|
||||
if (json.find("revCount") != json.end())
|
||||
gitInfo.revCount = json["revCount"];
|
||||
gitInfo.lastModified = json["lastModified"];
|
||||
return gitInfo;
|
||||
}
|
||||
|
||||
} catch (SysError & e) {
|
||||
if (e.errNo != ENOENT) throw;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
GitInfo exportGit(ref<Store> store, std::string uri,
|
||||
std::optional<std::string> ref,
|
||||
std::optional<Hash> rev,
|
||||
|
@ -25,6 +79,17 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
|||
{
|
||||
assert(!rev || rev->type == htSHA1);
|
||||
|
||||
if (rev) {
|
||||
if (auto gitInfo = lookupGitInfo(store, name, *rev)) {
|
||||
// If this gitInfo was produced by exportGitHub, then it won't
|
||||
// have a revCount. So we have to do a full clone.
|
||||
if (gitInfo->revCount) {
|
||||
gitInfo->ref = ref;
|
||||
return *gitInfo;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasPrefix(uri, "git+")) uri = std::string(uri, 4);
|
||||
|
||||
bool isLocal = hasPrefix(uri, "/") && pathExists(uri + "/.git");
|
||||
|
@ -100,9 +165,6 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
|||
isLocal = true;
|
||||
}
|
||||
|
||||
deletePath(getCacheDir() + "/nix/git");
|
||||
deletePath(getCacheDir() + "/nix/gitv2");
|
||||
|
||||
Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, uri).to_string(Base32, false);
|
||||
Path repoDir;
|
||||
|
||||
|
@ -179,6 +241,13 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
|||
rev = Hash(chomp(readFile(localRefFile)), htSHA1);
|
||||
}
|
||||
|
||||
if (auto gitInfo = lookupGitInfo(store, name, *rev)) {
|
||||
if (gitInfo->revCount) {
|
||||
gitInfo->ref = ref;
|
||||
return *gitInfo;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: check whether rev is an ancestor of ref.
|
||||
GitInfo gitInfo;
|
||||
gitInfo.ref = *ref;
|
||||
|
@ -186,29 +255,6 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
|||
|
||||
printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri);
|
||||
|
||||
std::string storeLinkName = hashString(htSHA512,
|
||||
name + std::string("\0"s) + gitInfo.rev.gitRev()).to_string(Base32, false);
|
||||
Path storeLink = cacheDir + "/" + storeLinkName + ".link";
|
||||
PathLocks storeLinkLock({storeLink}, fmt("waiting for lock on '%1%'...", storeLink)); // FIXME: broken
|
||||
|
||||
try {
|
||||
auto json = nlohmann::json::parse(readFile(storeLink));
|
||||
|
||||
assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == gitInfo.rev);
|
||||
|
||||
Path storePath = json["storePath"];
|
||||
|
||||
if (store->isValidPath(storePath)) {
|
||||
gitInfo.storePath = storePath;
|
||||
gitInfo.revCount = json["revCount"];
|
||||
gitInfo.lastModified = json["lastModified"];
|
||||
return gitInfo;
|
||||
}
|
||||
|
||||
} catch (SysError & e) {
|
||||
if (e.errNo != ENOENT) throw;
|
||||
}
|
||||
|
||||
// FIXME: should pipe this, or find some better way to extract a
|
||||
// revision.
|
||||
auto tar = runProgram("git", true, { "-C", repoDir, "archive", gitInfo.rev.gitRev() });
|
||||
|
@ -223,15 +269,55 @@ GitInfo exportGit(ref<Store> store, std::string uri,
|
|||
gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", gitInfo.rev.gitRev() }));
|
||||
gitInfo.lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "show", "-s", "--format=%ct", gitInfo.rev.gitRev() }));
|
||||
|
||||
nlohmann::json json;
|
||||
json["storePath"] = gitInfo.storePath;
|
||||
json["uri"] = uri;
|
||||
json["name"] = name;
|
||||
json["rev"] = gitInfo.rev.gitRev();
|
||||
json["revCount"] = gitInfo.revCount;
|
||||
json["lastModified"] = gitInfo.lastModified;
|
||||
cacheGitInfo(name, gitInfo);
|
||||
|
||||
writeFile(storeLink, json.dump());
|
||||
return gitInfo;
|
||||
}
|
||||
|
||||
GitInfo exportGitHub(
|
||||
ref<Store> store,
|
||||
const std::string & owner,
|
||||
const std::string & repo,
|
||||
std::optional<std::string> ref,
|
||||
std::optional<Hash> rev)
|
||||
{
|
||||
if (rev) {
|
||||
if (auto gitInfo = lookupGitInfo(store, "source", *rev))
|
||||
return *gitInfo;
|
||||
}
|
||||
|
||||
// FIXME: use regular /archive URLs instead? api.github.com
|
||||
// might have stricter rate limits.
|
||||
|
||||
auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s",
|
||||
owner, repo, rev ? rev->to_string(Base16, false) : ref ? *ref : "master");
|
||||
|
||||
std::string accessToken = settings.githubAccessToken.get();
|
||||
if (accessToken != "")
|
||||
url += "?access_token=" + accessToken;
|
||||
|
||||
CachedDownloadRequest request(url);
|
||||
request.unpack = true;
|
||||
request.name = "source";
|
||||
request.ttl = rev ? 1000000000 : settings.tarballTtl;
|
||||
request.getLastModified = true;
|
||||
auto result = getDownloader()->downloadCached(store, request);
|
||||
|
||||
if (!result.etag)
|
||||
throw Error("did not receive an ETag header from '%s'", url);
|
||||
|
||||
if (result.etag->size() != 42 || (*result.etag)[0] != '"' || (*result.etag)[41] != '"')
|
||||
throw Error("ETag header '%s' from '%s' is not a Git revision", *result.etag, url);
|
||||
|
||||
assert(result.lastModified);
|
||||
|
||||
GitInfo gitInfo;
|
||||
gitInfo.storePath = result.storePath;
|
||||
gitInfo.rev = Hash(std::string(*result.etag, 1, result.etag->size() - 2), htSHA1);
|
||||
gitInfo.lastModified = *result.lastModified;
|
||||
|
||||
// FIXME: this can overwrite a cache file that contains a revCount.
|
||||
cacheGitInfo("source", gitInfo);
|
||||
|
||||
return gitInfo;
|
||||
}
|
||||
|
@ -283,7 +369,8 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va
|
|||
mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, PathSet({gitInfo.storePath}));
|
||||
mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev.gitRev());
|
||||
mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.rev.gitShortRev());
|
||||
mkInt(*state.allocAttr(v, state.symbols.create("revCount")), gitInfo.revCount);
|
||||
assert(gitInfo.revCount);
|
||||
mkInt(*state.allocAttr(v, state.symbols.create("revCount")), *gitInfo.revCount);
|
||||
v.attrs->sort();
|
||||
|
||||
if (state.allowedPaths)
|
||||
|
|
|
@ -9,15 +9,24 @@ namespace nix {
|
|||
struct GitInfo
|
||||
{
|
||||
Path storePath;
|
||||
std::string ref;
|
||||
std::optional<std::string> ref;
|
||||
Hash rev{htSHA1};
|
||||
uint64_t revCount;
|
||||
std::optional<uint64_t> revCount;
|
||||
time_t lastModified;
|
||||
};
|
||||
|
||||
GitInfo exportGit(ref<Store> store, std::string uri,
|
||||
GitInfo exportGit(
|
||||
ref<Store> store,
|
||||
std::string uri,
|
||||
std::optional<std::string> ref,
|
||||
std::optional<Hash> rev,
|
||||
const std::string & name);
|
||||
|
||||
GitInfo exportGitHub(
|
||||
ref<Store> store,
|
||||
const std::string & owner,
|
||||
const std::string & repo,
|
||||
std::optional<std::string> ref,
|
||||
std::optional<Hash> rev);
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue