From 3ebe1341abe1b0ad59bd4925517af18d9200f818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Fri, 17 Mar 2023 15:51:08 +0100 Subject: [PATCH 1/4] Make `RewritingSink` accept a map of rewrites Giving it the same semantics as `rewriteStrings`. Also add some tests for it --- src/libexpr/primops.cc | 2 +- src/libstore/build/local-derivation-goal.cc | 2 +- src/libstore/path-references.cc | 73 +++++++++++++++++ src/libstore/path-references.hh | 25 ++++++ src/{libstore => libutil}/references.cc | 86 +++++---------------- src/{libstore => libutil}/references.hh | 23 +----- src/libutil/tests/references.cc | 46 +++++++++++ 7 files changed, 169 insertions(+), 88 deletions(-) create mode 100644 src/libstore/path-references.cc create mode 100644 src/libstore/path-references.hh rename src/{libstore => libutil}/references.cc (61%) rename src/{libstore => libutil}/references.hh (61%) create mode 100644 src/libutil/tests/references.cc diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index cfae1e5f8..572e76ec6 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -6,7 +6,7 @@ #include "globals.hh" #include "json-to-value.hh" #include "names.hh" -#include "references.hh" +#include "path-references.hh" #include "store-api.hh" #include "util.hh" #include "value-to-json.hh" diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 05d6685da..6f7ab8a3d 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -4,7 +4,7 @@ #include "worker.hh" #include "builtins.hh" #include "builtins/buildenv.hh" -#include "references.hh" +#include "path-references.hh" #include "finally.hh" #include "util.hh" #include "archive.hh" diff --git a/src/libstore/path-references.cc b/src/libstore/path-references.cc new file mode 100644 index 000000000..33cf66ce3 --- /dev/null +++ b/src/libstore/path-references.cc @@ -0,0 +1,73 @@ +#include "path-references.hh" +#include "hash.hh" +#include "util.hh" +#include "archive.hh" + +#include +#include +#include +#include + + +namespace nix { + + +PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map && backMap) + : RefScanSink(std::move(hashes)) + , backMap(std::move(backMap)) +{ } + +PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs) +{ + StringSet hashes; + std::map backMap; + + for (auto & i : refs) { + std::string hashPart(i.hashPart()); + auto inserted = backMap.emplace(hashPart, i).second; + assert(inserted); + hashes.insert(hashPart); + } + + return PathRefScanSink(std::move(hashes), std::move(backMap)); +} + +StorePathSet PathRefScanSink::getResultPaths() +{ + /* Map the hashes found back to their store paths. */ + StorePathSet found; + for (auto & i : getResult()) { + auto j = backMap.find(i); + assert(j != backMap.end()); + found.insert(j->second); + } + + return found; +} + + +std::pair scanForReferences( + const std::string & path, + const StorePathSet & refs) +{ + HashSink hashSink { htSHA256 }; + auto found = scanForReferences(hashSink, path, refs); + auto hash = hashSink.finish(); + return std::pair(found, hash); +} + +StorePathSet scanForReferences( + Sink & toTee, + const Path & path, + const StorePathSet & refs) +{ + PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs); + TeeSink sink { refsSink, toTee }; + + /* Look for the hashes in the NAR dump of the path. */ + dumpPath(path, sink); + + return refsSink.getResultPaths(); +} + +} diff --git a/src/libstore/path-references.hh b/src/libstore/path-references.hh new file mode 100644 index 000000000..7b44e3261 --- /dev/null +++ b/src/libstore/path-references.hh @@ -0,0 +1,25 @@ +#pragma once + +#include "references.hh" +#include "path.hh" + +namespace nix { + +std::pair scanForReferences(const Path & path, const StorePathSet & refs); + +StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs); + +class PathRefScanSink : public RefScanSink +{ + std::map backMap; + + PathRefScanSink(StringSet && hashes, std::map && backMap); + +public: + + static PathRefScanSink fromPaths(const StorePathSet & refs); + + StorePathSet getResultPaths(); +}; + +} diff --git a/src/libstore/references.cc b/src/libutil/references.cc similarity index 61% rename from src/libstore/references.cc rename to src/libutil/references.cc index 345f4528b..74003584a 100644 --- a/src/libstore/references.cc +++ b/src/libutil/references.cc @@ -6,6 +6,7 @@ #include #include #include +#include namespace nix { @@ -66,69 +67,20 @@ void RefScanSink::operator () (std::string_view data) } -PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map && backMap) - : RefScanSink(std::move(hashes)) - , backMap(std::move(backMap)) -{ } - -PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs) -{ - StringSet hashes; - std::map backMap; - - for (auto & i : refs) { - std::string hashPart(i.hashPart()); - auto inserted = backMap.emplace(hashPart, i).second; - assert(inserted); - hashes.insert(hashPart); - } - - return PathRefScanSink(std::move(hashes), std::move(backMap)); -} - -StorePathSet PathRefScanSink::getResultPaths() -{ - /* Map the hashes found back to their store paths. */ - StorePathSet found; - for (auto & i : getResult()) { - auto j = backMap.find(i); - assert(j != backMap.end()); - found.insert(j->second); - } - - return found; -} - - -std::pair scanForReferences( - const std::string & path, - const StorePathSet & refs) -{ - HashSink hashSink { htSHA256 }; - auto found = scanForReferences(hashSink, path, refs); - auto hash = hashSink.finish(); - return std::pair(found, hash); -} - -StorePathSet scanForReferences( - Sink & toTee, - const Path & path, - const StorePathSet & refs) -{ - PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs); - TeeSink sink { refsSink, toTee }; - - /* Look for the hashes in the NAR dump of the path. */ - dumpPath(path, sink); - - return refsSink.getResultPaths(); -} - - RewritingSink::RewritingSink(const std::string & from, const std::string & to, Sink & nextSink) - : from(from), to(to), nextSink(nextSink) + : RewritingSink({{from, to}}, nextSink) { - assert(from.size() == to.size()); +} + +RewritingSink::RewritingSink(const StringMap & rewrites, Sink & nextSink) + : rewrites(rewrites), nextSink(nextSink) +{ + long unsigned int maxRewriteSize = 0; + for (auto & [from, to] : rewrites) { + assert(from.size() == to.size()); + maxRewriteSize = std::max(maxRewriteSize, from.size()); + } + this->maxRewriteSize = maxRewriteSize; } void RewritingSink::operator () (std::string_view data) @@ -136,13 +88,13 @@ void RewritingSink::operator () (std::string_view data) std::string s(prev); s.append(data); - size_t j = 0; - while ((j = s.find(from, j)) != std::string::npos) { - matches.push_back(pos + j); - s.replace(j, from.size(), to); - } + s = rewriteStrings(s, rewrites); - prev = s.size() < from.size() ? s : std::string(s, s.size() - from.size() + 1, from.size() - 1); + prev = s.size() < maxRewriteSize + ? s + : maxRewriteSize == 0 + ? "" + : std::string(s, s.size() - maxRewriteSize + 1, maxRewriteSize - 1); auto consumed = s.size() - prev.size(); diff --git a/src/libstore/references.hh b/src/libutil/references.hh similarity index 61% rename from src/libstore/references.hh rename to src/libutil/references.hh index 52d71b333..ffd730e7b 100644 --- a/src/libstore/references.hh +++ b/src/libutil/references.hh @@ -2,14 +2,9 @@ ///@file #include "hash.hh" -#include "path.hh" namespace nix { -std::pair scanForReferences(const Path & path, const StorePathSet & refs); - -StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs); - class RefScanSink : public Sink { StringSet hashes; @@ -28,28 +23,18 @@ public: void operator () (std::string_view data) override; }; -class PathRefScanSink : public RefScanSink -{ - std::map backMap; - - PathRefScanSink(StringSet && hashes, std::map && backMap); - -public: - - static PathRefScanSink fromPaths(const StorePathSet & refs); - - StorePathSet getResultPaths(); -}; - struct RewritingSink : Sink { - std::string from, to, prev; + const StringMap rewrites; + long unsigned int maxRewriteSize; + std::string prev; Sink & nextSink; uint64_t pos = 0; std::vector matches; RewritingSink(const std::string & from, const std::string & to, Sink & nextSink); + RewritingSink(const StringMap & rewrites, Sink & nextSink); void operator () (std::string_view data) override; diff --git a/src/libutil/tests/references.cc b/src/libutil/tests/references.cc new file mode 100644 index 000000000..a517d9aa1 --- /dev/null +++ b/src/libutil/tests/references.cc @@ -0,0 +1,46 @@ +#include "references.hh" +#include + +namespace nix { + +using std::string; + +struct RewriteParams { + string originalString, finalString; + StringMap rewrites; + + friend std::ostream& operator<<(std::ostream& os, const RewriteParams& bar) { + StringSet strRewrites; + for (auto & [from, to] : bar.rewrites) + strRewrites.insert(from + "->" + to); + return os << + "OriginalString: " << bar.originalString << std::endl << + "Rewrites: " << concatStringsSep(",", strRewrites) << std::endl << + "Expected result: " << bar.finalString; + } +}; + +class RewriteTest : public ::testing::TestWithParam { +}; + +TEST_P(RewriteTest, IdentityRewriteIsIdentity) { + RewriteParams param = GetParam(); + StringSink rewritten; + auto rewriter = RewritingSink(param.rewrites, rewritten); + rewriter(param.originalString); + rewriter.flush(); + ASSERT_EQ(rewritten.s, param.finalString); +} + +INSTANTIATE_TEST_CASE_P( + references, + RewriteTest, + ::testing::Values( + RewriteParams{ "foooo", "baroo", {{"foo", "bar"}, {"bar", "baz"}}}, + RewriteParams{ "foooo", "bazoo", {{"fou", "bar"}, {"foo", "baz"}}}, + RewriteParams{ "foooo", "foooo", {}} + ) +); + +} + From a917fb0d53544f7fd29f24d5984702296d457347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Fri, 17 Mar 2023 15:51:08 +0100 Subject: [PATCH 2/4] Use a RewritingSink in derivation goal Possibly this will make it stream --- src/libstore/build/local-derivation-goal.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 6f7ab8a3d..4650b8bae 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -2384,13 +2384,16 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() if (!outputRewrites.empty()) { debug("rewriting hashes in '%1%'; cross fingers", actualPath); - /* FIXME: this is in-memory. */ - StringSink sink; - dumpPath(actualPath, sink); + /* FIXME: Is this actually streaming? */ + auto source = sinkToSource([&](Sink & nextSink) { + RewritingSink rsink(outputRewrites, nextSink); + dumpPath(actualPath, rsink); + rsink.flush(); + }); + Path tmpPath = actualPath + ".tmp"; + restorePath(tmpPath, *source); deletePath(actualPath); - sink.s = rewriteStrings(sink.s, outputRewrites); - StringSource source(sink.s); - restorePath(actualPath, source); + movePath(tmpPath, actualPath); /* FIXME: set proper permissions in restorePath() so we don't have to do another traversal. */ From 34e1b464f0054623334355aac94cb1a3e4c35a96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Fri, 17 Mar 2023 15:51:08 +0100 Subject: [PATCH 3/4] Normalize the hash-rewriting process when building derivations --- src/libstore/build/local-derivation-goal.cc | 28 ++++++++++----------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 4650b8bae..e9fe1eb73 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -2379,14 +2379,14 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() continue; auto references = *referencesOpt; - auto rewriteOutput = [&]() { + auto rewriteOutput = [&](const StringMap & rewrites) { /* Apply hash rewriting if necessary. */ - if (!outputRewrites.empty()) { + if (!rewrites.empty()) { debug("rewriting hashes in '%1%'; cross fingers", actualPath); /* FIXME: Is this actually streaming? */ auto source = sinkToSource([&](Sink & nextSink) { - RewritingSink rsink(outputRewrites, nextSink); + RewritingSink rsink(rewrites, nextSink); dumpPath(actualPath, rsink); rsink.flush(); }); @@ -2442,7 +2442,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() "since recursive hashing is not enabled (one of outputHashMode={flat,text} is true)", actualPath); } - rewriteOutput(); + rewriteOutput(outputRewrites); /* FIXME optimize and deduplicate with addToStore */ std::string oldHashPart { scratchPath->hashPart() }; HashModuloSink caSink { outputHash.hashType, oldHashPart }; @@ -2480,16 +2480,14 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() Hash::dummy, }; if (*scratchPath != newInfo0.path) { - // Also rewrite the output path - auto source = sinkToSource([&](Sink & nextSink) { - RewritingSink rsink2(oldHashPart, std::string(newInfo0.path.hashPart()), nextSink); - dumpPath(actualPath, rsink2); - rsink2.flush(); - }); - Path tmpPath = actualPath + ".tmp"; - restorePath(tmpPath, *source); - deletePath(actualPath); - movePath(tmpPath, actualPath); + // If the path has some self-references, we need to rewrite + // them. + // (note that this doesn't invalidate the ca hash we calculated + // above because it's computed *modulo the self-references*, so + // it already takes this rewrite into account). + rewriteOutput( + StringMap{{oldHashPart, + std::string(newInfo0.path.hashPart())}}); } HashResult narHashAndSize = hashPath(htSHA256, actualPath); @@ -2511,7 +2509,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() outputRewrites.insert_or_assign( std::string { scratchPath->hashPart() }, std::string { requiredFinalPath.hashPart() }); - rewriteOutput(); + rewriteOutput(outputRewrites); auto narHashAndSize = hashPath(htSHA256, actualPath); ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first }; newInfo0.narSize = narHashAndSize.second; From d0cecbe87708bd61548afa51fea78ec927403c5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Wed, 24 May 2023 15:35:46 +0200 Subject: [PATCH 4/4] Disable the fetchClosure test for old daemons Broken because of the change introduced by #4282 --- tests/fetchClosure.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/fetchClosure.sh b/tests/fetchClosure.sh index a207f647c..21650eb06 100644 --- a/tests/fetchClosure.sh +++ b/tests/fetchClosure.sh @@ -5,6 +5,12 @@ enableFeatures "fetch-closure" clearStore clearCacheCache +# Old daemons don't properly zero out the self-references when +# calculating the CA hashes, so this breaks `nix store +# make-content-addressed` which expects the client and the daemon to +# compute the same hash +requireDaemonNewerThan "2.16.0pre20230524" + # Initialize binary cache. nonCaPath=$(nix build --json --file ./dependencies.nix --no-link | jq -r .[].outputs.out) caPath=$(nix store make-content-addressed --json $nonCaPath | jq -r '.rewrites | map(.) | .[]')