Merge pull request #5331 from edolstra/references

Add a test for RefScanSink and clean up the code
This commit is contained in:
Eelco Dolstra 2021-10-04 15:06:01 +02:00 committed by GitHub
commit d8a2f7f81d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 124 additions and 53 deletions

View file

@ -4,6 +4,7 @@ makefiles = \
src/libutil/local.mk \ src/libutil/local.mk \
src/libutil/tests/local.mk \ src/libutil/tests/local.mk \
src/libstore/local.mk \ src/libstore/local.mk \
src/libstore/tests/local.mk \
src/libfetchers/local.mk \ src/libfetchers/local.mk \
src/libmain/local.mk \ src/libmain/local.mk \
src/libexpr/local.mk \ src/libexpr/local.mk \

View file

@ -2140,8 +2140,7 @@ void LocalDerivationGoal::registerOutputs()
/* Pass blank Sink as we are not ready to hash data at this stage. */ /* Pass blank Sink as we are not ready to hash data at this stage. */
NullSink blank; NullSink blank;
auto references = worker.store.parseStorePathSet( auto references = scanForReferences(blank, actualPath, referenceablePaths);
scanForReferences(blank, actualPath, worker.store.printStorePathSet(referenceablePaths)));
outputReferencesIfUnregistered.insert_or_assign( outputReferencesIfUnregistered.insert_or_assign(
outputName, outputName,

View file

@ -11,11 +11,13 @@
namespace nix { namespace nix {
static unsigned int refLength = 32; /* characters */ static size_t refLength = 32; /* characters */
static void search(const unsigned char * s, size_t len, static void search(
StringSet & hashes, StringSet & seen) std::string_view s,
StringSet & hashes,
StringSet & seen)
{ {
static std::once_flag initialised; static std::once_flag initialised;
static bool isBase32[256]; static bool isBase32[256];
@ -25,7 +27,7 @@ static void search(const unsigned char * s, size_t len,
isBase32[(unsigned char) base32Chars[i]] = true; isBase32[(unsigned char) base32Chars[i]] = true;
}); });
for (size_t i = 0; i + refLength <= len; ) { for (size_t i = 0; i + refLength <= s.size(); ) {
int j; int j;
bool match = true; bool match = true;
for (j = refLength - 1; j >= 0; --j) for (j = refLength - 1; j >= 0; --j)
@ -35,7 +37,7 @@ static void search(const unsigned char * s, size_t len,
break; break;
} }
if (!match) continue; if (!match) continue;
string ref((const char *) s + i, refLength); std::string ref(s.substr(i, refLength));
if (hashes.erase(ref)) { if (hashes.erase(ref)) {
debug(format("found reference to '%1%' at offset '%2%'") debug(format("found reference to '%1%' at offset '%2%'")
% ref % i); % ref % i);
@ -46,69 +48,60 @@ static void search(const unsigned char * s, size_t len,
} }
struct RefScanSink : Sink void RefScanSink::operator () (std::string_view data)
{
StringSet hashes;
StringSet seen;
string tail;
RefScanSink() { }
void operator () (std::string_view data) override
{ {
/* It's possible that a reference spans the previous and current /* It's possible that a reference spans the previous and current
fragment, so search in the concatenation of the tail of the fragment, so search in the concatenation of the tail of the
previous fragment and the start of the current fragment. */ previous fragment and the start of the current fragment. */
string s = tail + std::string(data, 0, refLength); auto s = tail;
search((const unsigned char *) s.data(), s.size(), hashes, seen); s.append(data.data(), refLength);
search(s, hashes, seen);
search((const unsigned char *) data.data(), data.size(), hashes, seen); search(data, hashes, seen);
size_t tailLen = data.size() <= refLength ? data.size() : refLength; auto tailLen = std::min(data.size(), refLength);
tail = std::string(tail, tail.size() < refLength - tailLen ? 0 : tail.size() - (refLength - tailLen)); auto rest = refLength - tailLen;
tail.append({data.data() + data.size() - tailLen, tailLen}); if (rest < tail.size())
tail = tail.substr(tail.size() - rest);
tail.append(data.data() + data.size() - tailLen, tailLen);
} }
};
std::pair<PathSet, HashResult> scanForReferences(const string & path, std::pair<StorePathSet, HashResult> scanForReferences(
const PathSet & refs) const string & path,
const StorePathSet & refs)
{ {
HashSink hashSink { htSHA256 }; HashSink hashSink { htSHA256 };
auto found = scanForReferences(hashSink, path, refs); auto found = scanForReferences(hashSink, path, refs);
auto hash = hashSink.finish(); auto hash = hashSink.finish();
return std::pair<PathSet, HashResult>(found, hash); return std::pair<StorePathSet, HashResult>(found, hash);
} }
PathSet scanForReferences(Sink & toTee, StorePathSet scanForReferences(
const string & path, const PathSet & refs) Sink & toTee,
const Path & path,
const StorePathSet & refs)
{ {
RefScanSink refsSink; StringSet hashes;
TeeSink sink { refsSink, toTee }; std::map<std::string, StorePath> backMap;
std::map<string, Path> backMap;
for (auto & i : refs) { for (auto & i : refs) {
auto baseName = std::string(baseNameOf(i)); std::string hashPart(i.hashPart());
string::size_type pos = baseName.find('-'); auto inserted = backMap.emplace(hashPart, i).second;
if (pos == string::npos) assert(inserted);
throw Error("bad reference '%1%'", i); hashes.insert(hashPart);
string s = string(baseName, 0, pos);
assert(s.size() == refLength);
assert(backMap.find(s) == backMap.end());
// parseHash(htSHA256, s);
refsSink.hashes.insert(s);
backMap[s] = i;
} }
/* Look for the hashes in the NAR dump of the path. */ /* Look for the hashes in the NAR dump of the path. */
RefScanSink refsSink(std::move(hashes));
TeeSink sink { refsSink, toTee };
dumpPath(path, sink); dumpPath(path, sink);
/* Map the hashes found back to their store paths. */ /* Map the hashes found back to their store paths. */
PathSet found; StorePathSet found;
for (auto & i : refsSink.seen) { for (auto & i : refsSink.getResult()) {
std::map<string, Path>::iterator j; auto j = backMap.find(i);
if ((j = backMap.find(i)) == backMap.end()) abort(); assert(j != backMap.end());
found.insert(j->second); found.insert(j->second);
} }

View file

@ -1,13 +1,31 @@
#pragma once #pragma once
#include "types.hh"
#include "hash.hh" #include "hash.hh"
#include "path.hh"
namespace nix { namespace nix {
std::pair<PathSet, HashResult> scanForReferences(const Path & path, const PathSet & refs); std::pair<StorePathSet, HashResult> scanForReferences(const Path & path, const StorePathSet & refs);
PathSet scanForReferences(Sink & toTee, const Path & path, const PathSet & refs); StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs);
class RefScanSink : public Sink
{
StringSet hashes;
StringSet seen;
std::string tail;
public:
RefScanSink(StringSet && hashes) : hashes(hashes)
{ }
StringSet & getResult()
{ return seen; }
void operator () (std::string_view data) override;
};
struct RewritingSink : Sink struct RewritingSink : Sink
{ {

View file

@ -0,0 +1,15 @@
check: libstore-tests_RUN
programs += libstore-tests
libstore-tests_DIR := $(d)
libstore-tests_INSTALL_DIR :=
libstore-tests_SOURCES := $(wildcard $(d)/*.cc)
libstore-tests_CXXFLAGS += -I src/libstore -I src/libutil
libstore-tests_LIBS = libstore
libstore-tests_LDFLAGS := $(GTEST_LIBS)

View file

@ -0,0 +1,45 @@
#include "references.hh"
#include <gtest/gtest.h>
namespace nix {
TEST(references, scan)
{
std::string hash1 = "dc04vv14dak1c1r48qa0m23vr9jy8sm0";
std::string hash2 = "zc842j0rz61mjsp3h3wp5ly71ak6qgdn";
{
RefScanSink scanner(StringSet{hash1});
auto s = "foobar";
scanner(s);
ASSERT_EQ(scanner.getResult(), StringSet{});
}
{
RefScanSink scanner(StringSet{hash1});
auto s = "foobar" + hash1 + "xyzzy";
scanner(s);
ASSERT_EQ(scanner.getResult(), StringSet{hash1});
}
{
RefScanSink scanner(StringSet{hash1, hash2});
auto s = "foobar" + hash1 + "xyzzy" + hash2;
scanner(((std::string_view) s).substr(0, 10));
scanner(((std::string_view) s).substr(10, 5));
scanner(((std::string_view) s).substr(15, 5));
scanner(((std::string_view) s).substr(20));
ASSERT_EQ(scanner.getResult(), StringSet({hash1, hash2}));
}
{
RefScanSink scanner(StringSet{hash1, hash2});
auto s = "foobar" + hash1 + "xyzzy" + hash2;
for (auto & i : s)
scanner(std::string(1, i));
ASSERT_EQ(scanner.getResult(), StringSet({hash1, hash2}));
}
}
}