From 900b85408435f6f7118e5d46f606d377452cdb12 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 12 Dec 2022 19:57:32 +0100 Subject: [PATCH] Add CanonPath wrapper to represent canonicalized paths --- src/libutil/canon-path.cc | 103 +++++++++++++++++++ src/libutil/canon-path.hh | 173 ++++++++++++++++++++++++++++++++ src/libutil/tests/canon-path.cc | 155 ++++++++++++++++++++++++++++ 3 files changed, 431 insertions(+) create mode 100644 src/libutil/canon-path.cc create mode 100644 src/libutil/canon-path.hh create mode 100644 src/libutil/tests/canon-path.cc diff --git a/src/libutil/canon-path.cc b/src/libutil/canon-path.cc new file mode 100644 index 000000000..b132b4262 --- /dev/null +++ b/src/libutil/canon-path.cc @@ -0,0 +1,103 @@ +#include "canon-path.hh" +#include "util.hh" + +namespace nix { + +CanonPath CanonPath::root = CanonPath("/"); + +CanonPath::CanonPath(std::string_view raw) + : path(absPath((Path) raw, "/")) +{ } + +CanonPath::CanonPath(std::string_view raw, const CanonPath & root) + : path(absPath((Path) raw, root.abs())) +{ } + +std::optional CanonPath::parent() const +{ + if (isRoot()) return std::nullopt; + return CanonPath(unchecked_t(), path.substr(0, std::max((size_t) 1, path.rfind('/')))); +} + +void CanonPath::pop() +{ + assert(!isRoot()); + path.resize(std::max((size_t) 1, path.rfind('/'))); +} + +bool CanonPath::isWithin(const CanonPath & parent) const +{ + return !( + path.size() < parent.path.size() + || path.substr(0, parent.path.size()) != parent.path + || (parent.path.size() > 1 && path.size() > parent.path.size() + && path[parent.path.size()] != '/')); +} + +CanonPath CanonPath::removePrefix(const CanonPath & prefix) const +{ + assert(isWithin(prefix)); + if (prefix.isRoot()) return *this; + if (path.size() == prefix.path.size()) return root; + return CanonPath(unchecked_t(), path.substr(prefix.path.size())); +} + +void CanonPath::extend(const CanonPath & x) +{ + if (x.isRoot()) return; + if (isRoot()) + path += x.rel(); + else + path += x.abs(); +} + +CanonPath CanonPath::operator + (const CanonPath & x) const +{ + auto res = *this; + res.extend(x); + return res; +} + +void CanonPath::push(std::string_view c) +{ + assert(c.find('/') == c.npos); + assert(c != "." && c != ".."); + if (!isRoot()) path += '/'; + path += c; +} + +CanonPath CanonPath::operator + (std::string_view c) const +{ + auto res = *this; + res.push(c); + return res; +} + +bool CanonPath::isAllowed(const std::set & allowed) const +{ + /* Check if `this` is an exact match or the parent of an + allowed path. */ + auto lb = allowed.lower_bound(*this); + if (lb != allowed.end()) { + if (lb->isWithin(*this)) + return true; + } + + /* Check if a parent of `this` is allowed. */ + auto path = *this; + while (!path.isRoot()) { + path.pop(); + if (allowed.count(path)) + return true; + } + + return false; +} + +std::ostream & operator << (std::ostream & stream, const CanonPath & path) +{ + stream << path.abs(); + return stream; +} + +} diff --git a/src/libutil/canon-path.hh b/src/libutil/canon-path.hh new file mode 100644 index 000000000..c5e7f0596 --- /dev/null +++ b/src/libutil/canon-path.hh @@ -0,0 +1,173 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace nix { + +/* A canonical representation of a path. It ensures the following: + + - It always starts with a slash. + + - It never ends with a slash, except if the path is "/". + + - A slash is never followed by a slash (i.e. no empty components). + + - There are no components equal to '.' or '..'. + + Note that the path does not need to correspond to an actually + existing path, and there is no guarantee that symlinks are + resolved. +*/ +class CanonPath +{ + std::string path; + +public: + + /* Construct a canon path from a non-canonical path. Any '.', '..' + or empty components are removed. */ + CanonPath(std::string_view raw); + + explicit CanonPath(const char * raw) + : CanonPath(std::string_view(raw)) + { } + + struct unchecked_t { }; + + CanonPath(unchecked_t _, std::string path) + : path(std::move(path)) + { } + + static CanonPath root; + + /* If `raw` starts with a slash, return + `CanonPath(raw)`. Otherwise return a `CanonPath` representing + `root + "/" + raw`. */ + CanonPath(std::string_view raw, const CanonPath & root); + + bool isRoot() const + { return path.size() <= 1; } + + explicit operator std::string_view() const + { return path; } + + const std::string & abs() const + { return path; } + + /* Like abs(), but return an empty string if this path is + '/'. Thus the returned string never ends in a slash. */ + const std::string & absOrEmpty() const + { + const static std::string epsilon; + return isRoot() ? epsilon : path; + } + + const char * c_str() const + { return path.c_str(); } + + std::string_view rel() const + { return ((std::string_view) path).substr(1); } + + struct Iterator + { + std::string_view remaining; + size_t slash; + + Iterator(std::string_view remaining) + : remaining(remaining) + , slash(remaining.find('/')) + { } + + bool operator != (const Iterator & x) const + { return remaining.data() != x.remaining.data(); } + + const std::string_view operator * () const + { return remaining.substr(0, slash); } + + void operator ++ () + { + if (slash == remaining.npos) + remaining = remaining.substr(remaining.size()); + else { + remaining = remaining.substr(slash + 1); + slash = remaining.find('/'); + } + } + }; + + Iterator begin() const { return Iterator(rel()); } + Iterator end() const { return Iterator(rel().substr(path.size() - 1)); } + + std::optional parent() const; + + /* Remove the last component. Panics if this path is the root. */ + void pop(); + + std::optional dirOf() const + { + if (isRoot()) return std::nullopt; + return path.substr(0, path.rfind('/')); + } + + std::optional baseName() const + { + if (isRoot()) return std::nullopt; + return ((std::string_view) path).substr(path.rfind('/') + 1); + } + + bool operator == (const CanonPath & x) const + { return path == x.path; } + + bool operator != (const CanonPath & x) const + { return path != x.path; } + + /* Compare paths lexicographically except that path separators + are sorted before any other character. That is, in the sorted order + a directory is always followed directly by its children. For + instance, 'foo' < 'foo/bar' < 'foo!'. */ + bool operator < (const CanonPath & x) const + { + auto i = path.begin(); + auto j = x.path.begin(); + for ( ; i != path.end() && j != x.path.end(); ++i, ++j) { + auto c_i = *i; + if (c_i == '/') c_i = 0; + auto c_j = *j; + if (c_j == '/') c_j = 0; + if (c_i < c_j) return true; + if (c_i > c_j) return false; + } + return i == path.end() && j != x.path.end(); + } + + /* Return true if `this` is equal to `parent` or a child of + `parent`. */ + bool isWithin(const CanonPath & parent) const; + + CanonPath removePrefix(const CanonPath & prefix) const; + + /* Append another path to this one. */ + void extend(const CanonPath & x); + + /* Concatenate two paths. */ + CanonPath operator + (const CanonPath & x) const; + + /* Add a path component to this one. It must not contain any slashes. */ + void push(std::string_view c); + + CanonPath operator + (std::string_view c) const; + + /* Check whether access to this path is allowed, which is the case + if 1) `this` is within any of the `allowed` paths; or 2) any of + the `allowed` paths are within `this`. (The latter condition + ensures access to the parents of allowed paths.) */ + bool isAllowed(const std::set & allowed) const; +}; + +std::ostream & operator << (std::ostream & stream, const CanonPath & path); + +} diff --git a/src/libutil/tests/canon-path.cc b/src/libutil/tests/canon-path.cc new file mode 100644 index 000000000..c1c5adadf --- /dev/null +++ b/src/libutil/tests/canon-path.cc @@ -0,0 +1,155 @@ +#include "canon-path.hh" + +#include + +namespace nix { + + TEST(CanonPath, basic) { + { + CanonPath p("/"); + ASSERT_EQ(p.abs(), "/"); + ASSERT_EQ(p.rel(), ""); + ASSERT_EQ(p.baseName(), std::nullopt); + ASSERT_EQ(p.dirOf(), std::nullopt); + ASSERT_FALSE(p.parent()); + } + + { + CanonPath p("/foo//"); + ASSERT_EQ(p.abs(), "/foo"); + ASSERT_EQ(p.rel(), "foo"); + ASSERT_EQ(*p.baseName(), "foo"); + ASSERT_EQ(*p.dirOf(), ""); // FIXME: do we want this? + ASSERT_EQ(p.parent()->abs(), "/"); + } + + { + CanonPath p("foo/bar"); + ASSERT_EQ(p.abs(), "/foo/bar"); + ASSERT_EQ(p.rel(), "foo/bar"); + ASSERT_EQ(*p.baseName(), "bar"); + ASSERT_EQ(*p.dirOf(), "/foo"); + ASSERT_EQ(p.parent()->abs(), "/foo"); + } + + { + CanonPath p("foo//bar/"); + ASSERT_EQ(p.abs(), "/foo/bar"); + ASSERT_EQ(p.rel(), "foo/bar"); + ASSERT_EQ(*p.baseName(), "bar"); + ASSERT_EQ(*p.dirOf(), "/foo"); + } + } + + TEST(CanonPath, pop) { + CanonPath p("foo/bar/x"); + ASSERT_EQ(p.abs(), "/foo/bar/x"); + p.pop(); + ASSERT_EQ(p.abs(), "/foo/bar"); + p.pop(); + ASSERT_EQ(p.abs(), "/foo"); + p.pop(); + ASSERT_EQ(p.abs(), "/"); + } + + TEST(CanonPath, removePrefix) { + CanonPath p1("foo/bar"); + CanonPath p2("foo/bar/a/b/c"); + ASSERT_EQ(p2.removePrefix(p1).abs(), "/a/b/c"); + ASSERT_EQ(p1.removePrefix(p1).abs(), "/"); + ASSERT_EQ(p1.removePrefix(CanonPath("/")).abs(), "/foo/bar"); + } + + TEST(CanonPath, iter) { + { + CanonPath p("a//foo/bar//"); + std::vector ss; + for (auto & c : p) ss.push_back(c); + ASSERT_EQ(ss, std::vector({"a", "foo", "bar"})); + } + + { + CanonPath p("/"); + std::vector ss; + for (auto & c : p) ss.push_back(c); + ASSERT_EQ(ss, std::vector()); + } + } + + TEST(CanonPath, concat) { + { + CanonPath p1("a//foo/bar//"); + CanonPath p2("xyzzy/bla"); + ASSERT_EQ((p1 + p2).abs(), "/a/foo/bar/xyzzy/bla"); + } + + { + CanonPath p1("/"); + CanonPath p2("/a/b"); + ASSERT_EQ((p1 + p2).abs(), "/a/b"); + } + + { + CanonPath p1("/a/b"); + CanonPath p2("/"); + ASSERT_EQ((p1 + p2).abs(), "/a/b"); + } + + { + CanonPath p("/foo/bar"); + ASSERT_EQ((p + "x").abs(), "/foo/bar/x"); + } + + { + CanonPath p("/"); + ASSERT_EQ((p + "foo" + "bar").abs(), "/foo/bar"); + } + } + + TEST(CanonPath, within) { + { + ASSERT_TRUE(CanonPath("foo").isWithin(CanonPath("foo"))); + ASSERT_FALSE(CanonPath("foo").isWithin(CanonPath("bar"))); + ASSERT_FALSE(CanonPath("foo").isWithin(CanonPath("fo"))); + ASSERT_TRUE(CanonPath("foo/bar").isWithin(CanonPath("foo"))); + ASSERT_FALSE(CanonPath("foo").isWithin(CanonPath("foo/bar"))); + ASSERT_TRUE(CanonPath("/foo/bar/default.nix").isWithin(CanonPath("/"))); + ASSERT_TRUE(CanonPath("/").isWithin(CanonPath("/"))); + } + } + + TEST(CanonPath, sort) { + ASSERT_FALSE(CanonPath("foo") < CanonPath("foo")); + ASSERT_TRUE (CanonPath("foo") < CanonPath("foo/bar")); + ASSERT_TRUE (CanonPath("foo/bar") < CanonPath("foo!")); + ASSERT_FALSE(CanonPath("foo!") < CanonPath("foo")); + ASSERT_TRUE (CanonPath("foo") < CanonPath("foo!")); + } + + TEST(CanonPath, allowed) { + { + std::set allowed { + CanonPath("foo/bar"), + CanonPath("foo!"), + CanonPath("xyzzy"), + CanonPath("a/b/c"), + }; + + ASSERT_TRUE (CanonPath("foo/bar").isAllowed(allowed)); + ASSERT_TRUE (CanonPath("foo/bar/bla").isAllowed(allowed)); + ASSERT_TRUE (CanonPath("foo").isAllowed(allowed)); + ASSERT_FALSE(CanonPath("bar").isAllowed(allowed)); + ASSERT_FALSE(CanonPath("bar/a").isAllowed(allowed)); + ASSERT_TRUE (CanonPath("a").isAllowed(allowed)); + ASSERT_TRUE (CanonPath("a/b").isAllowed(allowed)); + ASSERT_TRUE (CanonPath("a/b/c").isAllowed(allowed)); + ASSERT_TRUE (CanonPath("a/b/c/d").isAllowed(allowed)); + ASSERT_TRUE (CanonPath("a/b/c/d/e").isAllowed(allowed)); + ASSERT_FALSE(CanonPath("a/b/a").isAllowed(allowed)); + ASSERT_FALSE(CanonPath("a/b/d").isAllowed(allowed)); + ASSERT_FALSE(CanonPath("aaa").isAllowed(allowed)); + ASSERT_FALSE(CanonPath("zzz").isAllowed(allowed)); + ASSERT_TRUE (CanonPath("/").isAllowed(allowed)); + } + } +}