Merge pull request #7260 from ncfavier/readFile-scan-references

Restrict `readFile` context to references that appear in the string
This commit is contained in:
Théophane Hufschmitt 2022-11-15 16:22:28 +01:00 committed by GitHub
commit daf1423a4a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 74 additions and 26 deletions

View file

@ -5,3 +5,8 @@
arguments will be ignored and the resulting derivation will have
`__impure` set to `true`, making it an impure derivation.
* If `builtins.readFile` is called on a file with context, then only the parts
of that context that appear in the content of the file are retained.
This avoids a lot of spurious errors where some benign strings end-up having
a context just because they are read from a store path
([#7260](https://github.com/NixOS/nix/pull/7260)).

View file

@ -5,6 +5,7 @@
#include "globals.hh"
#include "json-to-value.hh"
#include "names.hh"
#include "references.hh"
#include "store-api.hh"
#include "util.hh"
#include "json.hh"
@ -1542,6 +1543,10 @@ static void prim_readFile(EvalState & state, const PosIdx pos, Value * * args, V
refs = state.store->queryPathInfo(state.store->toStorePath(path).first)->references;
} catch (Error &) { // FIXME: should be InvalidPathError
}
// Re-scan references to filter down to just the ones that actually occur in the file.
auto refsSink = PathRefScanSink::fromPaths(refs);
refsSink << s;
refs = refsSink.getResultPaths();
}
auto context = state.store->printStorePathSet(refs);
v.mkString(s, context);

View file

@ -67,6 +67,40 @@ void RefScanSink::operator () (std::string_view data)
}
PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap)
: RefScanSink(std::move(hashes))
, backMap(std::move(backMap))
{ }
PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs)
{
StringSet hashes;
std::map<std::string, StorePath> backMap;
for (auto & i : refs) {
std::string hashPart(i.hashPart());
auto inserted = backMap.emplace(hashPart, i).second;
assert(inserted);
hashes.insert(hashPart);
}
return PathRefScanSink(std::move(hashes), std::move(backMap));
}
StorePathSet PathRefScanSink::getResultPaths()
{
/* Map the hashes found back to their store paths. */
StorePathSet found;
for (auto & i : getResult()) {
auto j = backMap.find(i);
assert(j != backMap.end());
found.insert(j->second);
}
return found;
}
std::pair<StorePathSet, HashResult> scanForReferences(
const std::string & path,
const StorePathSet & refs)
@ -82,30 +116,13 @@ StorePathSet scanForReferences(
const Path & path,
const StorePathSet & refs)
{
StringSet hashes;
std::map<std::string, StorePath> backMap;
for (auto & i : refs) {
std::string hashPart(i.hashPart());
auto inserted = backMap.emplace(hashPart, i).second;
assert(inserted);
hashes.insert(hashPart);
}
PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs);
TeeSink sink { refsSink, toTee };
/* Look for the hashes in the NAR dump of the path. */
RefScanSink refsSink(std::move(hashes));
TeeSink sink { refsSink, toTee };
dumpPath(path, sink);
/* Map the hashes found back to their store paths. */
StorePathSet found;
for (auto & i : refsSink.getResult()) {
auto j = backMap.find(i);
assert(j != backMap.end());
found.insert(j->second);
}
return found;
return refsSink.getResultPaths();
}

View file

@ -27,6 +27,19 @@ public:
void operator () (std::string_view data) override;
};
class PathRefScanSink : public RefScanSink
{
std::map<std::string, StorePath> backMap;
PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap);
public:
static PathRefScanSink fromPaths(const StorePathSet & refs);
StorePathSet getResultPaths();
};
struct RewritingSink : Sink
{
std::string from, to, prev;

View file

@ -1 +0,0 @@
echo "$input" > $out

View file

@ -6,14 +6,23 @@ let
dependent = mkDerivation {
name = "dependent";
builder = ./readfile-context.builder.sh;
input = "${input}/hello";
buildCommand = ''
mkdir -p $out
echo -n "$input1" > "$out/file1"
echo -n "$input2" > "$out/file2"
'';
input1 = "${input}/hello";
input2 = "hello";
};
readDependent = mkDerivation {
name = "read-dependent";
builder = ./readfile-context.builder.sh;
input = builtins.readFile dependent;
# Will evaluate correctly because file2 doesn't have any references,
# even though the `dependent` derivation does.
name = builtins.readFile (dependent + "/file2");
buildCommand = ''
echo "$input" > "$out"
'';
input = builtins.readFile (dependent + "/file1");
};
in readDependent