forked from lix-project/lix
libutil: remove RewritingSink match/size tracking
size tracking can be done with a LengthSink and a tee. match tracking
was defeated by never having done any match tracking, all users would
see the same (empty) set of matches at all times. match tracking with
bytes offsets alone would not be sufficient in the general case, only
because computeHashModulo uses a single rewrite could it have worked.
Change-Id: Idb214b5222e0ea24f450f5505712a342b63d7570
This commit is contained in:
parent
5af76dee37
commit
55a32f24d3
2 changed files with 7 additions and 10 deletions
|
@ -96,15 +96,12 @@ void RewritingSink::operator () (std::string_view data)
|
||||||
|
|
||||||
auto consumed = s.size() - prev.size();
|
auto consumed = s.size() - prev.size();
|
||||||
|
|
||||||
pos += consumed;
|
|
||||||
|
|
||||||
if (consumed) nextSink(s.substr(0, consumed));
|
if (consumed) nextSink(s.substr(0, consumed));
|
||||||
}
|
}
|
||||||
|
|
||||||
void RewritingSink::flush()
|
void RewritingSink::flush()
|
||||||
{
|
{
|
||||||
if (prev.empty()) return;
|
if (prev.empty()) return;
|
||||||
pos += prev.size();
|
|
||||||
nextSink(prev);
|
nextSink(prev);
|
||||||
prev.clear();
|
prev.clear();
|
||||||
}
|
}
|
||||||
|
@ -112,20 +109,23 @@ void RewritingSink::flush()
|
||||||
HashResult computeHashModulo(HashType ht, const std::string & modulus, Source & source)
|
HashResult computeHashModulo(HashType ht, const std::string & modulus, Source & source)
|
||||||
{
|
{
|
||||||
HashSink hashSink(ht);
|
HashSink hashSink(ht);
|
||||||
|
LengthSink lengthSink;
|
||||||
RewritingSink rewritingSink(modulus, std::string(modulus.size(), 0), hashSink);
|
RewritingSink rewritingSink(modulus, std::string(modulus.size(), 0), hashSink);
|
||||||
|
|
||||||
source.drainInto(rewritingSink);
|
TeeSink tee{rewritingSink, lengthSink};
|
||||||
|
source.drainInto(tee);
|
||||||
rewritingSink.flush();
|
rewritingSink.flush();
|
||||||
|
|
||||||
/* Hash the positions of the self-references. This ensures that a
|
/* Hash the positions of the self-references. This ensures that a
|
||||||
NAR with self-references and a NAR with some of the
|
NAR with self-references and a NAR with some of the
|
||||||
self-references already zeroed out do not produce a hash
|
self-references already zeroed out do not produce a hash
|
||||||
collision. FIXME: proof. */
|
collision. FIXME: proof. */
|
||||||
for (auto & pos : rewritingSink.matches)
|
// NOTE(horrors) RewritingSink didn't track any matches!
|
||||||
hashSink(fmt("|%d", pos));
|
//for (auto & pos : rewritingSource.matches)
|
||||||
|
// hashSink(fmt("|%d", pos));
|
||||||
|
|
||||||
auto h = hashSink.finish();
|
auto h = hashSink.finish();
|
||||||
return {h.first, rewritingSink.pos};
|
return {h.first, lengthSink.length};
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,9 +29,6 @@ struct RewritingSink : Sink
|
||||||
std::string::size_type maxRewriteSize;
|
std::string::size_type maxRewriteSize;
|
||||||
std::string prev;
|
std::string prev;
|
||||||
Sink & nextSink;
|
Sink & nextSink;
|
||||||
uint64_t pos = 0;
|
|
||||||
|
|
||||||
std::vector<uint64_t> matches;
|
|
||||||
|
|
||||||
RewritingSink(const std::string & from, const std::string & to, Sink & nextSink);
|
RewritingSink(const std::string & from, const std::string & to, Sink & nextSink);
|
||||||
RewritingSink(const StringMap & rewrites, Sink & nextSink);
|
RewritingSink(const StringMap & rewrites, Sink & nextSink);
|
||||||
|
|
Loading…
Reference in a new issue