From e7ed9ae0c711c4efd83756b16379549ecff52355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Na=C3=AFm=20Favier?= Date: Fri, 4 Nov 2022 14:19:31 +0100 Subject: [PATCH 1/3] Restrict `readFile` context to references that appear in the string When calling `builtins.readFile` on a store path, the references of that path are currently added to the resulting string's context. This change makes those references the *possible* context of the string, but filters them to keep only the references whose hash actually appears in the string, similarly to what is done for determining the runtime references of a path. --- src/libexpr/primops.cc | 5 ++++ src/libstore/references.cc | 57 +++++++++++++++++++++++++------------- src/libstore/references.hh | 13 +++++++++ 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 28b998474..ff620ca63 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -5,6 +5,7 @@ #include "globals.hh" #include "json-to-value.hh" #include "names.hh" +#include "references.hh" #include "store-api.hh" #include "util.hh" #include "json.hh" @@ -1542,6 +1543,10 @@ static void prim_readFile(EvalState & state, const PosIdx pos, Value * * args, V refs = state.store->queryPathInfo(state.store->toStorePath(path).first)->references; } catch (Error &) { // FIXME: should be InvalidPathError } + // Re-scan references to filter down to just the ones that actually occur in the file. + auto refsSink = PathRefScanSink::fromPaths(refs); + refsSink << s; + refs = refsSink.getResultPaths(); } auto context = state.store->printStorePathSet(refs); v.mkString(s, context); diff --git a/src/libstore/references.cc b/src/libstore/references.cc index 34dce092c..3bb297fc8 100644 --- a/src/libstore/references.cc +++ b/src/libstore/references.cc @@ -67,6 +67,40 @@ void RefScanSink::operator () (std::string_view data) } +PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map && backMap) + : RefScanSink(std::move(hashes)) + , backMap(std::move(backMap)) +{ } + +PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs) +{ + StringSet hashes; + std::map backMap; + + for (auto & i : refs) { + std::string hashPart(i.hashPart()); + auto inserted = backMap.emplace(hashPart, i).second; + assert(inserted); + hashes.insert(hashPart); + } + + return PathRefScanSink(std::move(hashes), std::move(backMap)); +} + +StorePathSet PathRefScanSink::getResultPaths() +{ + /* Map the hashes found back to their store paths. */ + StorePathSet found; + for (auto & i : getResult()) { + auto j = backMap.find(i); + assert(j != backMap.end()); + found.insert(j->second); + } + + return found; +} + + std::pair scanForReferences( const std::string & path, const StorePathSet & refs) @@ -82,30 +116,13 @@ StorePathSet scanForReferences( const Path & path, const StorePathSet & refs) { - StringSet hashes; - std::map backMap; - - for (auto & i : refs) { - std::string hashPart(i.hashPart()); - auto inserted = backMap.emplace(hashPart, i).second; - assert(inserted); - hashes.insert(hashPart); - } + PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs); + TeeSink sink { refsSink, toTee }; /* Look for the hashes in the NAR dump of the path. */ - RefScanSink refsSink(std::move(hashes)); - TeeSink sink { refsSink, toTee }; dumpPath(path, sink); - /* Map the hashes found back to their store paths. */ - StorePathSet found; - for (auto & i : refsSink.getResult()) { - auto j = backMap.find(i); - assert(j != backMap.end()); - found.insert(j->second); - } - - return found; + return refsSink.getResultPaths(); } diff --git a/src/libstore/references.hh b/src/libstore/references.hh index a6119c861..6f381f96c 100644 --- a/src/libstore/references.hh +++ b/src/libstore/references.hh @@ -27,6 +27,19 @@ public: void operator () (std::string_view data) override; }; +class PathRefScanSink : public RefScanSink +{ + std::map backMap; + + PathRefScanSink(StringSet && hashes, std::map && backMap); + +public: + + static PathRefScanSink fromPaths(const StorePathSet & refs); + + StorePathSet getResultPaths(); +}; + struct RewritingSink : Sink { std::string from, to, prev; From 6bf873651740b1552dea6f30c7778dff11bc52ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Mon, 14 Nov 2022 15:03:53 +0100 Subject: [PATCH 2/3] Add release-notes for the context-restriction in readFile --- doc/manual/src/release-notes/rl-next.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 68f7d1a9d..2069e4578 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -5,3 +5,8 @@ arguments will be ignored and the resulting derivation will have `__impure` set to `true`, making it an impure derivation. +* If `builtins.readFile` is called on a file with context, then only the parts + of that context that appear in the content of the file are retained. + This avoids a lot of spurious errors where some benign strings end-up having + a context just because they are read from a store path + ([#7260](https://github.com/NixOS/nix/pull/7260)). From cb39e9a99e21812b424d7d2318157163ab97fc82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Mon, 14 Nov 2022 15:13:46 +0100 Subject: [PATCH 3/3] Test that the result of `readFile` gets ref-scanned --- tests/readfile-context.builder.sh | 1 - tests/readfile-context.nix | 19 ++++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) delete mode 100644 tests/readfile-context.builder.sh diff --git a/tests/readfile-context.builder.sh b/tests/readfile-context.builder.sh deleted file mode 100644 index 7084a08cb..000000000 --- a/tests/readfile-context.builder.sh +++ /dev/null @@ -1 +0,0 @@ -echo "$input" > $out diff --git a/tests/readfile-context.nix b/tests/readfile-context.nix index 600036a94..54cd1afd9 100644 --- a/tests/readfile-context.nix +++ b/tests/readfile-context.nix @@ -6,14 +6,23 @@ let dependent = mkDerivation { name = "dependent"; - builder = ./readfile-context.builder.sh; - input = "${input}/hello"; + buildCommand = '' + mkdir -p $out + echo -n "$input1" > "$out/file1" + echo -n "$input2" > "$out/file2" + ''; + input1 = "${input}/hello"; + input2 = "hello"; }; readDependent = mkDerivation { - name = "read-dependent"; - builder = ./readfile-context.builder.sh; - input = builtins.readFile dependent; + # Will evaluate correctly because file2 doesn't have any references, + # even though the `dependent` derivation does. + name = builtins.readFile (dependent + "/file2"); + buildCommand = '' + echo "$input" > "$out" + ''; + input = builtins.readFile (dependent + "/file1"); }; in readDependent