From 99d5204baaef211234d50f20610fa43d304888ce Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 20 Nov 2023 20:04:37 +0100 Subject: [PATCH 1/3] Persistently cache InputAccessor::fetchToStore() This avoids repeated copying of the same source tree between Nix invocations. It requires the accessor to have a "fingerprint" (e.g. a Git revision) that uniquely determines its contents. --- src/libfetchers/fetchers.cc | 5 +++++ src/libfetchers/fetchers.hh | 9 +++++++++ src/libfetchers/git.cc | 14 +++++++++++++- src/libfetchers/github.cc | 8 ++++++++ src/libfetchers/input-accessor.cc | 30 ++++++++++++++++++++++++++++++ src/libfetchers/input-accessor.hh | 2 ++ src/libfetchers/mercurial.cc | 8 ++++++++ src/libstore/content-address.hh | 4 ++-- 8 files changed, 77 insertions(+), 3 deletions(-) diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index c2513e076..60208619e 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -108,6 +108,11 @@ Input Input::fromAttrs(Attrs && attrs) return std::move(*res); } +std::optional Input::getFingerprint(ref store) const +{ + return scheme ? scheme->getFingerprint(store, *this) : std::nullopt; +} + ParsedURL Input::toURL() const { if (!scheme) diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index ce5aa4c69..5f3254b6d 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -113,6 +113,12 @@ public: std::optional getRev() const; std::optional getRevCount() const; std::optional getLastModified() const; + + /** + * For locked inputs, return a string that uniquely specifies the + * content of the input (typically a commit hash or content hash). + */ + std::optional getFingerprint(ref store) const; }; @@ -180,6 +186,9 @@ struct InputScheme virtual bool isDirect(const Input & input) const { return true; } + + virtual std::optional getFingerprint(ref store, const Input & input) const + { return std::nullopt; } }; void registerInputScheme(std::shared_ptr && fetcher); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 7208a0b6d..6b461499b 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -700,10 +700,22 @@ struct GitInputScheme : InputScheme auto repoInfo = getRepoInfo(input); - return + auto [accessor, final] = input.getRef() || input.getRev() || !repoInfo.isLocal ? getAccessorFromCommit(store, repoInfo, std::move(input)) : getAccessorFromWorkdir(store, repoInfo, std::move(input)); + + accessor->fingerprint = final.getFingerprint(store); + + return {accessor, std::move(final)}; + } + + std::optional getFingerprint(ref store, const Input & input) const override + { + if (auto rev = input.getRev()) + return rev->gitRev() + (getSubmodulesAttr(input) ? ";s" : ""); + else + return std::nullopt; } }; diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 6c9b29721..661ad4884 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -229,6 +229,14 @@ struct GitArchiveInputScheme : InputScheme { return Xp::Flakes; } + + std::optional getFingerprint(ref store, const Input & input) const override + { + if (auto rev = input.getRev()) + return rev->gitRev(); + else + return std::nullopt; + } }; struct GitHubInputScheme : GitArchiveInputScheme diff --git a/src/libfetchers/input-accessor.cc b/src/libfetchers/input-accessor.cc index d1d450cf7..53502c621 100644 --- a/src/libfetchers/input-accessor.cc +++ b/src/libfetchers/input-accessor.cc @@ -1,5 +1,6 @@ #include "input-accessor.hh" #include "store-api.hh" +#include "cache.hh" namespace nix { @@ -11,6 +12,30 @@ StorePath InputAccessor::fetchToStore( PathFilter * filter, RepairFlag repair) { + // FIXME: add an optimisation for the case where the accessor is + // an FSInputAccessor pointing to a store path. + + std::optional cacheKey; + + if (!filter && fingerprint) { + cacheKey = fetchers::Attrs{ + {"_what", "fetchToStore"}, + {"store", store->storeDir}, + {"name", std::string(name)}, + {"fingerprint", *fingerprint}, + {"method", (uint8_t) method}, + {"path", path.abs()} + }; + if (auto res = fetchers::getCache()->lookup(*cacheKey)) { + StorePath storePath(fetchers::getStrAttr(*res, "storePath")); + if (store->isValidPath(storePath)) { + debug("store path cache hit for '%s'", showPath(path)); + return storePath; + } + } + } else + debug("source path '%s' is uncacheable", showPath(path)); + Activity act(*logger, lvlChatty, actUnknown, fmt("copying '%s' to the store", showPath(path))); auto source = sinkToSource([&](Sink & sink) { @@ -25,6 +50,11 @@ StorePath InputAccessor::fetchToStore( ? store->computeStorePathFromDump(*source, name, method, htSHA256).first : store->addToStoreFromDump(*source, name, method, htSHA256, repair); + if (cacheKey) + fetchers::getCache()->upsert( + *cacheKey, + fetchers::Attrs{{"storePath", std::string(storePath.to_string())}}); + return storePath; } diff --git a/src/libfetchers/input-accessor.hh b/src/libfetchers/input-accessor.hh index 9c688a234..26d17f064 100644 --- a/src/libfetchers/input-accessor.hh +++ b/src/libfetchers/input-accessor.hh @@ -18,6 +18,8 @@ class Store; struct InputAccessor : virtual SourceAccessor, std::enable_shared_from_this { + std::optional fingerprint; + /** * Return the maximum last-modified time of the files in this * tree, if available. diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 9244acf39..aa991a75d 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -339,6 +339,14 @@ struct MercurialInputScheme : InputScheme return makeResult(infoAttrs, std::move(storePath)); } + + std::optional getFingerprint(ref store, const Input & input) const override + { + if (auto rev = input.getRev()) + return rev->gitRev(); + else + return std::nullopt; + } }; static auto rMercurialInputScheme = OnStartup([] { registerInputScheme(std::make_unique()); }); diff --git a/src/libstore/content-address.hh b/src/libstore/content-address.hh index c4d619bdc..bdb558907 100644 --- a/src/libstore/content-address.hh +++ b/src/libstore/content-address.hh @@ -39,12 +39,12 @@ enum struct FileIngestionMethod : uint8_t { /** * Flat-file hashing. Directly ingest the contents of a single file */ - Flat = false, + Flat = 0, /** * Recursive (or NAR) hashing. Serializes the file-system object in Nix * Archive format and ingest that */ - Recursive = true + Recursive = 1 }; /** From 61b76f5f34db7f863a6f22bd9083f677b339fcf6 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 22 Nov 2023 11:26:12 +0100 Subject: [PATCH 2/3] Apply suggestion Co-authored-by: John Ericson --- src/libfetchers/input-accessor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libfetchers/input-accessor.cc b/src/libfetchers/input-accessor.cc index 53502c621..8e10cf2e2 100644 --- a/src/libfetchers/input-accessor.cc +++ b/src/libfetchers/input-accessor.cc @@ -27,7 +27,7 @@ StorePath InputAccessor::fetchToStore( {"path", path.abs()} }; if (auto res = fetchers::getCache()->lookup(*cacheKey)) { - StorePath storePath(fetchers::getStrAttr(*res, "storePath")); + StorePath storePath{fetchers::getStrAttr(*res, "storePath")}; if (store->isValidPath(storePath)) { debug("store path cache hit for '%s'", showPath(path)); return storePath; From b1ab592f28f08da5dc7c060e5c3b19dc66dbc111 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 22 Nov 2023 11:44:02 +0100 Subject: [PATCH 3/3] Use the StorePath-based cache interface --- src/libfetchers/input-accessor.cc | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/libfetchers/input-accessor.cc b/src/libfetchers/input-accessor.cc index 8e10cf2e2..85dc4609f 100644 --- a/src/libfetchers/input-accessor.cc +++ b/src/libfetchers/input-accessor.cc @@ -26,12 +26,9 @@ StorePath InputAccessor::fetchToStore( {"method", (uint8_t) method}, {"path", path.abs()} }; - if (auto res = fetchers::getCache()->lookup(*cacheKey)) { - StorePath storePath{fetchers::getStrAttr(*res, "storePath")}; - if (store->isValidPath(storePath)) { - debug("store path cache hit for '%s'", showPath(path)); - return storePath; - } + if (auto res = fetchers::getCache()->lookup(store, *cacheKey)) { + debug("store path cache hit for '%s'", showPath(path)); + return res->second; } } else debug("source path '%s' is uncacheable", showPath(path)); @@ -51,9 +48,7 @@ StorePath InputAccessor::fetchToStore( : store->addToStoreFromDump(*source, name, method, htSHA256, repair); if (cacheKey) - fetchers::getCache()->upsert( - *cacheKey, - fetchers::Attrs{{"storePath", std::string(storePath.to_string())}}); + fetchers::getCache()->add(store, *cacheKey, {}, storePath, true); return storePath; }