Add a test for RefScanSink and clean up the code

Issue #5322.
This commit is contained in:
Eelco Dolstra 2021-10-04 14:29:42 +02:00
parent ef34fd0656
commit 77ebbc9f54
5 changed files with 105 additions and 30 deletions

View file

@ -4,6 +4,7 @@ makefiles = \
src/libutil/local.mk \ src/libutil/local.mk \
src/libutil/tests/local.mk \ src/libutil/tests/local.mk \
src/libstore/local.mk \ src/libstore/local.mk \
src/libstore/tests/local.mk \
src/libfetchers/local.mk \ src/libfetchers/local.mk \
src/libmain/local.mk \ src/libmain/local.mk \
src/libexpr/local.mk \ src/libexpr/local.mk \

View file

@ -11,11 +11,13 @@
namespace nix { namespace nix {
static unsigned int refLength = 32; /* characters */ static size_t refLength = 32; /* characters */
static void search(const unsigned char * s, size_t len, static void search(
StringSet & hashes, StringSet & seen) std::string_view s,
StringSet & hashes,
StringSet & seen)
{ {
static std::once_flag initialised; static std::once_flag initialised;
static bool isBase32[256]; static bool isBase32[256];
@ -25,7 +27,7 @@ static void search(const unsigned char * s, size_t len,
isBase32[(unsigned char) base32Chars[i]] = true; isBase32[(unsigned char) base32Chars[i]] = true;
}); });
for (size_t i = 0; i + refLength <= len; ) { for (size_t i = 0; i + refLength <= s.size(); ) {
int j; int j;
bool match = true; bool match = true;
for (j = refLength - 1; j >= 0; --j) for (j = refLength - 1; j >= 0; --j)
@ -35,7 +37,7 @@ static void search(const unsigned char * s, size_t len,
break; break;
} }
if (!match) continue; if (!match) continue;
string ref((const char *) s + i, refLength); std::string ref(s.substr(i, refLength));
if (hashes.erase(ref)) { if (hashes.erase(ref)) {
debug(format("found reference to '%1%' at offset '%2%'") debug(format("found reference to '%1%' at offset '%2%'")
% ref % i); % ref % i);
@ -46,30 +48,23 @@ static void search(const unsigned char * s, size_t len,
} }
struct RefScanSink : Sink void RefScanSink::operator () (std::string_view data)
{ {
StringSet hashes;
StringSet seen;
string tail;
RefScanSink() { }
void operator () (std::string_view data) override
{
/* It's possible that a reference spans the previous and current /* It's possible that a reference spans the previous and current
fragment, so search in the concatenation of the tail of the fragment, so search in the concatenation of the tail of the
previous fragment and the start of the current fragment. */ previous fragment and the start of the current fragment. */
string s = tail + std::string(data, 0, refLength); auto s = tail;
search((const unsigned char *) s.data(), s.size(), hashes, seen); s.append(data.data(), refLength);
search(s, hashes, seen);
search((const unsigned char *) data.data(), data.size(), hashes, seen); search(data, hashes, seen);
size_t tailLen = data.size() <= refLength ? data.size() : refLength; auto tailLen = std::min(data.size(), refLength);
tail = std::string(tail, tail.size() < refLength - tailLen ? 0 : tail.size() - (refLength - tailLen)); auto rest = refLength - tailLen;
tail.append({data.data() + data.size() - tailLen, tailLen}); if (rest < tail.size())
} tail = tail.substr(tail.size() - rest);
}; tail.append(data.data() + data.size() - tailLen, tailLen);
}
std::pair<StorePathSet, HashResult> scanForReferences( std::pair<StorePathSet, HashResult> scanForReferences(
@ -87,23 +82,24 @@ StorePathSet scanForReferences(
const Path & path, const Path & path,
const StorePathSet & refs) const StorePathSet & refs)
{ {
RefScanSink refsSink; StringSet hashes;
TeeSink sink { refsSink, toTee };
std::map<std::string, StorePath> backMap; std::map<std::string, StorePath> backMap;
for (auto & i : refs) { for (auto & i : refs) {
std::string hashPart(i.hashPart()); std::string hashPart(i.hashPart());
auto inserted = backMap.emplace(hashPart, i).second; auto inserted = backMap.emplace(hashPart, i).second;
assert(inserted); assert(inserted);
refsSink.hashes.insert(hashPart); hashes.insert(hashPart);
} }
/* Look for the hashes in the NAR dump of the path. */ /* Look for the hashes in the NAR dump of the path. */
RefScanSink refsSink(std::move(hashes));
TeeSink sink { refsSink, toTee };
dumpPath(path, sink); dumpPath(path, sink);
/* Map the hashes found back to their store paths. */ /* Map the hashes found back to their store paths. */
StorePathSet found; StorePathSet found;
for (auto & i : refsSink.seen) { for (auto & i : refsSink.getResult()) {
auto j = backMap.find(i); auto j = backMap.find(i);
assert(j != backMap.end()); assert(j != backMap.end());
found.insert(j->second); found.insert(j->second);

View file

@ -9,6 +9,24 @@ std::pair<StorePathSet, HashResult> scanForReferences(const Path & path, const S
StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs); StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs);
class RefScanSink : public Sink
{
StringSet hashes;
StringSet seen;
std::string tail;
public:
RefScanSink(StringSet && hashes) : hashes(hashes)
{ }
StringSet & getResult()
{ return seen; }
void operator () (std::string_view data) override;
};
struct RewritingSink : Sink struct RewritingSink : Sink
{ {
std::string from, to, prev; std::string from, to, prev;

View file

@ -0,0 +1,15 @@
check: libstore-tests_RUN
programs += libstore-tests
libstore-tests_DIR := $(d)
libstore-tests_INSTALL_DIR :=
libstore-tests_SOURCES := $(wildcard $(d)/*.cc)
libstore-tests_CXXFLAGS += -I src/libstore -I src/libutil
libstore-tests_LIBS = libstore
libstore-tests_LDFLAGS := $(GTEST_LIBS)

View file

@ -0,0 +1,45 @@
#include "references.hh"
#include <gtest/gtest.h>
namespace nix {
TEST(references, scan)
{
std::string hash1 = "dc04vv14dak1c1r48qa0m23vr9jy8sm0";
std::string hash2 = "zc842j0rz61mjsp3h3wp5ly71ak6qgdn";
{
RefScanSink scanner(StringSet{hash1});
auto s = "foobar";
scanner(s);
ASSERT_EQ(scanner.getResult(), StringSet{});
}
{
RefScanSink scanner(StringSet{hash1});
auto s = "foobar" + hash1 + "xyzzy";
scanner(s);
ASSERT_EQ(scanner.getResult(), StringSet{hash1});
}
{
RefScanSink scanner(StringSet{hash1, hash2});
auto s = "foobar" + hash1 + "xyzzy" + hash2;
scanner(((std::string_view) s).substr(0, 10));
scanner(((std::string_view) s).substr(10, 5));
scanner(((std::string_view) s).substr(15, 5));
scanner(((std::string_view) s).substr(20));
ASSERT_EQ(scanner.getResult(), StringSet({hash1, hash2}));
}
{
RefScanSink scanner(StringSet{hash1, hash2});
auto s = "foobar" + hash1 + "xyzzy" + hash2;
for (auto & i : s)
scanner(std::string(1, i));
ASSERT_EQ(scanner.getResult(), StringSet({hash1, hash2}));
}
}
}