forked from lix-project/lix
libutil: make rewriteStrings sound
this is used in CA rewriting, replacement of placeholders in
derivations, generating scripts for devShells, and some more
places. in all of these transitive replacements are unsound,
and overlapping replacements would be as well. there even is
a test that transitive replacements do not happen (in the CA
RewriteSink suite), but none for overlapping replacements. a
minimally surprising binary rewriter surely would not do any
of these replacements, the only reason we have not seen this
break yet is probably that rewriteStrings is only called for
store paths and things that look like store paths (and those
should never overlap nor admit such transitive replacements)
Change-Id: I6fc29f939d5061d9f56c752624a823ece8437c07
This commit is contained in:
parent
076dfd30c6
commit
d55b158e24
3 changed files with 90 additions and 7 deletions
|
@ -1385,13 +1385,31 @@ std::string replaceStrings(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string rewriteStrings(std::string s, const StringMap & rewrites)
|
Rewriter::Rewriter(std::map<std::string, std::string> rewrites)
|
||||||
|
: rewrites(std::move(rewrites))
|
||||||
|
{
|
||||||
|
for (const auto & [k, v] : this->rewrites) {
|
||||||
|
assert(!k.empty());
|
||||||
|
initials.push_back(k[0]);
|
||||||
|
}
|
||||||
|
std::ranges::sort(initials);
|
||||||
|
auto [firstDupe, end] = std::ranges::unique(initials);
|
||||||
|
initials.erase(firstDupe, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Rewriter::operator()(std::string s)
|
||||||
{
|
{
|
||||||
for (auto & i : rewrites) {
|
|
||||||
if (i.first == i.second) continue;
|
|
||||||
size_t j = 0;
|
size_t j = 0;
|
||||||
while ((j = s.find(i.first, j)) != std::string::npos)
|
while ((j = s.find_first_of(initials, j)) != std::string::npos) {
|
||||||
s.replace(j, i.first.size(), i.second);
|
size_t skip = 1;
|
||||||
|
for (auto & [from, to] : rewrites) {
|
||||||
|
if (s.compare(j, from.size(), from) == 0) {
|
||||||
|
s.replace(j, from.size(), to);
|
||||||
|
skip = to.size();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
j += skip;
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
|
@ -604,7 +604,33 @@ std::string replaceStrings(
|
||||||
std::string_view to);
|
std::string_view to);
|
||||||
|
|
||||||
|
|
||||||
std::string rewriteStrings(std::string s, const StringMap & rewrites);
|
/**
|
||||||
|
* Rewrites a string given a map of replacements, applying the replacements in
|
||||||
|
* sorted order, only once, considering only the strings appearing in the input
|
||||||
|
* string in performing replacement.
|
||||||
|
*
|
||||||
|
* - Replacements are not performed on intermediate strings. That is, for an input
|
||||||
|
* `"abb"` with replacements `{"ab" -> "ba"}`, the result is `"bab"`.
|
||||||
|
* - Transitive replacements are not performed. For example, for the input `"abcde"`
|
||||||
|
* with replacements `{"a" -> "b", "b" -> "c", "e" -> "b"}`, the result is
|
||||||
|
* `"bccdb"`.
|
||||||
|
*/
|
||||||
|
class Rewriter
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
std::string initials;
|
||||||
|
std::map<std::string, std::string> rewrites;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit Rewriter(std::map<std::string, std::string> rewrites);
|
||||||
|
|
||||||
|
std::string operator()(std::string s);
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::string rewriteStrings(std::string s, const StringMap & rewrites)
|
||||||
|
{
|
||||||
|
return Rewriter(rewrites)(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -404,6 +404,45 @@ namespace nix {
|
||||||
ASSERT_EQ(rewriteStrings("this and that", rewrites), "that and that");
|
ASSERT_EQ(rewriteStrings("this and that", rewrites), "that and that");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(rewriteStrings, intransitive) {
|
||||||
|
StringMap rewrites;
|
||||||
|
// transitivity can happen both in forward and reverse iteration order of the rewrite map.
|
||||||
|
rewrites["a"] = "b";
|
||||||
|
rewrites["b"] = "c";
|
||||||
|
rewrites["e"] = "b";
|
||||||
|
|
||||||
|
ASSERT_EQ(rewriteStrings("abcde", rewrites), "bccdb");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(rewriteStrings, nonoverlapping) {
|
||||||
|
StringMap rewrites;
|
||||||
|
rewrites["ab"] = "ca";
|
||||||
|
|
||||||
|
ASSERT_EQ(rewriteStrings("abb", rewrites), "cab");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(rewriteStrings, differentLength) {
|
||||||
|
StringMap rewrites;
|
||||||
|
rewrites["a"] = "an has a trea";
|
||||||
|
|
||||||
|
ASSERT_EQ(rewriteStrings("cat", rewrites), "can has a treat");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(rewriteStrings, sorted) {
|
||||||
|
StringMap rewrites;
|
||||||
|
rewrites["a"] = "meow";
|
||||||
|
rewrites["abc"] = "puppy";
|
||||||
|
|
||||||
|
ASSERT_EQ(rewriteStrings("abcde", rewrites), "meowbcde");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(rewriteStrings, multiple) {
|
||||||
|
StringMap rewrites;
|
||||||
|
rewrites["a"] = "b";
|
||||||
|
|
||||||
|
ASSERT_EQ(rewriteStrings("a1a2a3a", rewrites), "b1b2b3b");
|
||||||
|
}
|
||||||
|
|
||||||
TEST(rewriteStrings, doesntOccur) {
|
TEST(rewriteStrings, doesntOccur) {
|
||||||
StringMap rewrites;
|
StringMap rewrites;
|
||||||
rewrites["foo"] = "bar";
|
rewrites["foo"] = "bar";
|
||||||
|
|
Loading…
Reference in a new issue