From d55b158e24fd4b56c6d8017626a4db7d6f56dd1f Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Wed, 1 May 2024 23:12:56 +0200 Subject: [PATCH] libutil: make rewriteStrings sound this is used in CA rewriting, replacement of placeholders in derivations, generating scripts for devShells, and some more places. in all of these transitive replacements are unsound, and overlapping replacements would be as well. there even is a test that transitive replacements do not happen (in the CA RewriteSink suite), but none for overlapping replacements. a minimally surprising binary rewriter surely would not do any of these replacements, the only reason we have not seen this break yet is probably that rewriteStrings is only called for store paths and things that look like store paths (and those should never overlap nor admit such transitive replacements) Change-Id: I6fc29f939d5061d9f56c752624a823ece8437c07 --- src/libutil/util.cc | 30 ++++++++++++++++++++++------ src/libutil/util.hh | 28 +++++++++++++++++++++++++- tests/unit/libutil/tests.cc | 39 +++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 7 deletions(-) diff --git a/src/libutil/util.cc b/src/libutil/util.cc index dc724db3e..bc2dd1802 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -1385,13 +1385,31 @@ std::string replaceStrings( } -std::string rewriteStrings(std::string s, const StringMap & rewrites) +Rewriter::Rewriter(std::map rewrites) + : rewrites(std::move(rewrites)) { - for (auto & i : rewrites) { - if (i.first == i.second) continue; - size_t j = 0; - while ((j = s.find(i.first, j)) != std::string::npos) - s.replace(j, i.first.size(), i.second); + for (const auto & [k, v] : this->rewrites) { + assert(!k.empty()); + initials.push_back(k[0]); + } + std::ranges::sort(initials); + auto [firstDupe, end] = std::ranges::unique(initials); + initials.erase(firstDupe, end); +} + +std::string Rewriter::operator()(std::string s) +{ + size_t j = 0; + while ((j = s.find_first_of(initials, j)) != std::string::npos) { + size_t skip = 1; + for (auto & [from, to] : rewrites) { + if (s.compare(j, from.size(), from) == 0) { + s.replace(j, from.size(), to); + skip = to.size(); + break; + } + } + j += skip; } return s; } diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 9c2385e84..ac4aa1d3a 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -604,7 +604,33 @@ std::string replaceStrings( std::string_view to); -std::string rewriteStrings(std::string s, const StringMap & rewrites); +/** + * Rewrites a string given a map of replacements, applying the replacements in + * sorted order, only once, considering only the strings appearing in the input + * string in performing replacement. + * + * - Replacements are not performed on intermediate strings. That is, for an input + * `"abb"` with replacements `{"ab" -> "ba"}`, the result is `"bab"`. + * - Transitive replacements are not performed. For example, for the input `"abcde"` + * with replacements `{"a" -> "b", "b" -> "c", "e" -> "b"}`, the result is + * `"bccdb"`. + */ +class Rewriter +{ +private: + std::string initials; + std::map rewrites; + +public: + explicit Rewriter(std::map rewrites); + + std::string operator()(std::string s); +}; + +inline std::string rewriteStrings(std::string s, const StringMap & rewrites) +{ + return Rewriter(rewrites)(s); +} /** diff --git a/tests/unit/libutil/tests.cc b/tests/unit/libutil/tests.cc index f55c56548..720370066 100644 --- a/tests/unit/libutil/tests.cc +++ b/tests/unit/libutil/tests.cc @@ -404,6 +404,45 @@ namespace nix { ASSERT_EQ(rewriteStrings("this and that", rewrites), "that and that"); } + TEST(rewriteStrings, intransitive) { + StringMap rewrites; + // transitivity can happen both in forward and reverse iteration order of the rewrite map. + rewrites["a"] = "b"; + rewrites["b"] = "c"; + rewrites["e"] = "b"; + + ASSERT_EQ(rewriteStrings("abcde", rewrites), "bccdb"); + } + + TEST(rewriteStrings, nonoverlapping) { + StringMap rewrites; + rewrites["ab"] = "ca"; + + ASSERT_EQ(rewriteStrings("abb", rewrites), "cab"); + } + + TEST(rewriteStrings, differentLength) { + StringMap rewrites; + rewrites["a"] = "an has a trea"; + + ASSERT_EQ(rewriteStrings("cat", rewrites), "can has a treat"); + } + + TEST(rewriteStrings, sorted) { + StringMap rewrites; + rewrites["a"] = "meow"; + rewrites["abc"] = "puppy"; + + ASSERT_EQ(rewriteStrings("abcde", rewrites), "meowbcde"); + } + + TEST(rewriteStrings, multiple) { + StringMap rewrites; + rewrites["a"] = "b"; + + ASSERT_EQ(rewriteStrings("a1a2a3a", rewrites), "b1b2b3b"); + } + TEST(rewriteStrings, doesntOccur) { StringMap rewrites; rewrites["foo"] = "bar";