Merge pull request #7450 from edolstra/canon-path

Add CanonPath wrapper to represent canonicalized paths
This commit is contained in:
Eelco Dolstra 2022-12-19 16:21:11 +01:00 committed by GitHub
commit d00bfe4833
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 431 additions and 0 deletions

103
src/libutil/canon-path.cc Normal file
View file

@ -0,0 +1,103 @@
#include "canon-path.hh"
#include "util.hh"
namespace nix {
CanonPath CanonPath::root = CanonPath("/");
CanonPath::CanonPath(std::string_view raw)
: path(absPath((Path) raw, "/"))
{ }
CanonPath::CanonPath(std::string_view raw, const CanonPath & root)
: path(absPath((Path) raw, root.abs()))
{ }
std::optional<CanonPath> CanonPath::parent() const
{
if (isRoot()) return std::nullopt;
return CanonPath(unchecked_t(), path.substr(0, std::max((size_t) 1, path.rfind('/'))));
}
void CanonPath::pop()
{
assert(!isRoot());
path.resize(std::max((size_t) 1, path.rfind('/')));
}
bool CanonPath::isWithin(const CanonPath & parent) const
{
return !(
path.size() < parent.path.size()
|| path.substr(0, parent.path.size()) != parent.path
|| (parent.path.size() > 1 && path.size() > parent.path.size()
&& path[parent.path.size()] != '/'));
}
CanonPath CanonPath::removePrefix(const CanonPath & prefix) const
{
assert(isWithin(prefix));
if (prefix.isRoot()) return *this;
if (path.size() == prefix.path.size()) return root;
return CanonPath(unchecked_t(), path.substr(prefix.path.size()));
}
void CanonPath::extend(const CanonPath & x)
{
if (x.isRoot()) return;
if (isRoot())
path += x.rel();
else
path += x.abs();
}
CanonPath CanonPath::operator + (const CanonPath & x) const
{
auto res = *this;
res.extend(x);
return res;
}
void CanonPath::push(std::string_view c)
{
assert(c.find('/') == c.npos);
assert(c != "." && c != "..");
if (!isRoot()) path += '/';
path += c;
}
CanonPath CanonPath::operator + (std::string_view c) const
{
auto res = *this;
res.push(c);
return res;
}
bool CanonPath::isAllowed(const std::set<CanonPath> & allowed) const
{
/* Check if `this` is an exact match or the parent of an
allowed path. */
auto lb = allowed.lower_bound(*this);
if (lb != allowed.end()) {
if (lb->isWithin(*this))
return true;
}
/* Check if a parent of `this` is allowed. */
auto path = *this;
while (!path.isRoot()) {
path.pop();
if (allowed.count(path))
return true;
}
return false;
}
std::ostream & operator << (std::ostream & stream, const CanonPath & path)
{
stream << path.abs();
return stream;
}
}

173
src/libutil/canon-path.hh Normal file
View file

@ -0,0 +1,173 @@
#pragma once
#include <string>
#include <optional>
#include <cassert>
#include <iostream>
#include <set>
namespace nix {
/* A canonical representation of a path. It ensures the following:
- It always starts with a slash.
- It never ends with a slash, except if the path is "/".
- A slash is never followed by a slash (i.e. no empty components).
- There are no components equal to '.' or '..'.
Note that the path does not need to correspond to an actually
existing path, and there is no guarantee that symlinks are
resolved.
*/
class CanonPath
{
std::string path;
public:
/* Construct a canon path from a non-canonical path. Any '.', '..'
or empty components are removed. */
CanonPath(std::string_view raw);
explicit CanonPath(const char * raw)
: CanonPath(std::string_view(raw))
{ }
struct unchecked_t { };
CanonPath(unchecked_t _, std::string path)
: path(std::move(path))
{ }
static CanonPath root;
/* If `raw` starts with a slash, return
`CanonPath(raw)`. Otherwise return a `CanonPath` representing
`root + "/" + raw`. */
CanonPath(std::string_view raw, const CanonPath & root);
bool isRoot() const
{ return path.size() <= 1; }
explicit operator std::string_view() const
{ return path; }
const std::string & abs() const
{ return path; }
/* Like abs(), but return an empty string if this path is
'/'. Thus the returned string never ends in a slash. */
const std::string & absOrEmpty() const
{
const static std::string epsilon;
return isRoot() ? epsilon : path;
}
const char * c_str() const
{ return path.c_str(); }
std::string_view rel() const
{ return ((std::string_view) path).substr(1); }
struct Iterator
{
std::string_view remaining;
size_t slash;
Iterator(std::string_view remaining)
: remaining(remaining)
, slash(remaining.find('/'))
{ }
bool operator != (const Iterator & x) const
{ return remaining.data() != x.remaining.data(); }
const std::string_view operator * () const
{ return remaining.substr(0, slash); }
void operator ++ ()
{
if (slash == remaining.npos)
remaining = remaining.substr(remaining.size());
else {
remaining = remaining.substr(slash + 1);
slash = remaining.find('/');
}
}
};
Iterator begin() const { return Iterator(rel()); }
Iterator end() const { return Iterator(rel().substr(path.size() - 1)); }
std::optional<CanonPath> parent() const;
/* Remove the last component. Panics if this path is the root. */
void pop();
std::optional<std::string_view> dirOf() const
{
if (isRoot()) return std::nullopt;
return path.substr(0, path.rfind('/'));
}
std::optional<std::string_view> baseName() const
{
if (isRoot()) return std::nullopt;
return ((std::string_view) path).substr(path.rfind('/') + 1);
}
bool operator == (const CanonPath & x) const
{ return path == x.path; }
bool operator != (const CanonPath & x) const
{ return path != x.path; }
/* Compare paths lexicographically except that path separators
are sorted before any other character. That is, in the sorted order
a directory is always followed directly by its children. For
instance, 'foo' < 'foo/bar' < 'foo!'. */
bool operator < (const CanonPath & x) const
{
auto i = path.begin();
auto j = x.path.begin();
for ( ; i != path.end() && j != x.path.end(); ++i, ++j) {
auto c_i = *i;
if (c_i == '/') c_i = 0;
auto c_j = *j;
if (c_j == '/') c_j = 0;
if (c_i < c_j) return true;
if (c_i > c_j) return false;
}
return i == path.end() && j != x.path.end();
}
/* Return true if `this` is equal to `parent` or a child of
`parent`. */
bool isWithin(const CanonPath & parent) const;
CanonPath removePrefix(const CanonPath & prefix) const;
/* Append another path to this one. */
void extend(const CanonPath & x);
/* Concatenate two paths. */
CanonPath operator + (const CanonPath & x) const;
/* Add a path component to this one. It must not contain any slashes. */
void push(std::string_view c);
CanonPath operator + (std::string_view c) const;
/* Check whether access to this path is allowed, which is the case
if 1) `this` is within any of the `allowed` paths; or 2) any of
the `allowed` paths are within `this`. (The latter condition
ensures access to the parents of allowed paths.) */
bool isAllowed(const std::set<CanonPath> & allowed) const;
};
std::ostream & operator << (std::ostream & stream, const CanonPath & path);
}

View file

@ -0,0 +1,155 @@
#include "canon-path.hh"
#include <gtest/gtest.h>
namespace nix {
TEST(CanonPath, basic) {
{
CanonPath p("/");
ASSERT_EQ(p.abs(), "/");
ASSERT_EQ(p.rel(), "");
ASSERT_EQ(p.baseName(), std::nullopt);
ASSERT_EQ(p.dirOf(), std::nullopt);
ASSERT_FALSE(p.parent());
}
{
CanonPath p("/foo//");
ASSERT_EQ(p.abs(), "/foo");
ASSERT_EQ(p.rel(), "foo");
ASSERT_EQ(*p.baseName(), "foo");
ASSERT_EQ(*p.dirOf(), ""); // FIXME: do we want this?
ASSERT_EQ(p.parent()->abs(), "/");
}
{
CanonPath p("foo/bar");
ASSERT_EQ(p.abs(), "/foo/bar");
ASSERT_EQ(p.rel(), "foo/bar");
ASSERT_EQ(*p.baseName(), "bar");
ASSERT_EQ(*p.dirOf(), "/foo");
ASSERT_EQ(p.parent()->abs(), "/foo");
}
{
CanonPath p("foo//bar/");
ASSERT_EQ(p.abs(), "/foo/bar");
ASSERT_EQ(p.rel(), "foo/bar");
ASSERT_EQ(*p.baseName(), "bar");
ASSERT_EQ(*p.dirOf(), "/foo");
}
}
TEST(CanonPath, pop) {
CanonPath p("foo/bar/x");
ASSERT_EQ(p.abs(), "/foo/bar/x");
p.pop();
ASSERT_EQ(p.abs(), "/foo/bar");
p.pop();
ASSERT_EQ(p.abs(), "/foo");
p.pop();
ASSERT_EQ(p.abs(), "/");
}
TEST(CanonPath, removePrefix) {
CanonPath p1("foo/bar");
CanonPath p2("foo/bar/a/b/c");
ASSERT_EQ(p2.removePrefix(p1).abs(), "/a/b/c");
ASSERT_EQ(p1.removePrefix(p1).abs(), "/");
ASSERT_EQ(p1.removePrefix(CanonPath("/")).abs(), "/foo/bar");
}
TEST(CanonPath, iter) {
{
CanonPath p("a//foo/bar//");
std::vector<std::string_view> ss;
for (auto & c : p) ss.push_back(c);
ASSERT_EQ(ss, std::vector<std::string_view>({"a", "foo", "bar"}));
}
{
CanonPath p("/");
std::vector<std::string_view> ss;
for (auto & c : p) ss.push_back(c);
ASSERT_EQ(ss, std::vector<std::string_view>());
}
}
TEST(CanonPath, concat) {
{
CanonPath p1("a//foo/bar//");
CanonPath p2("xyzzy/bla");
ASSERT_EQ((p1 + p2).abs(), "/a/foo/bar/xyzzy/bla");
}
{
CanonPath p1("/");
CanonPath p2("/a/b");
ASSERT_EQ((p1 + p2).abs(), "/a/b");
}
{
CanonPath p1("/a/b");
CanonPath p2("/");
ASSERT_EQ((p1 + p2).abs(), "/a/b");
}
{
CanonPath p("/foo/bar");
ASSERT_EQ((p + "x").abs(), "/foo/bar/x");
}
{
CanonPath p("/");
ASSERT_EQ((p + "foo" + "bar").abs(), "/foo/bar");
}
}
TEST(CanonPath, within) {
{
ASSERT_TRUE(CanonPath("foo").isWithin(CanonPath("foo")));
ASSERT_FALSE(CanonPath("foo").isWithin(CanonPath("bar")));
ASSERT_FALSE(CanonPath("foo").isWithin(CanonPath("fo")));
ASSERT_TRUE(CanonPath("foo/bar").isWithin(CanonPath("foo")));
ASSERT_FALSE(CanonPath("foo").isWithin(CanonPath("foo/bar")));
ASSERT_TRUE(CanonPath("/foo/bar/default.nix").isWithin(CanonPath("/")));
ASSERT_TRUE(CanonPath("/").isWithin(CanonPath("/")));
}
}
TEST(CanonPath, sort) {
ASSERT_FALSE(CanonPath("foo") < CanonPath("foo"));
ASSERT_TRUE (CanonPath("foo") < CanonPath("foo/bar"));
ASSERT_TRUE (CanonPath("foo/bar") < CanonPath("foo!"));
ASSERT_FALSE(CanonPath("foo!") < CanonPath("foo"));
ASSERT_TRUE (CanonPath("foo") < CanonPath("foo!"));
}
TEST(CanonPath, allowed) {
{
std::set<CanonPath> allowed {
CanonPath("foo/bar"),
CanonPath("foo!"),
CanonPath("xyzzy"),
CanonPath("a/b/c"),
};
ASSERT_TRUE (CanonPath("foo/bar").isAllowed(allowed));
ASSERT_TRUE (CanonPath("foo/bar/bla").isAllowed(allowed));
ASSERT_TRUE (CanonPath("foo").isAllowed(allowed));
ASSERT_FALSE(CanonPath("bar").isAllowed(allowed));
ASSERT_FALSE(CanonPath("bar/a").isAllowed(allowed));
ASSERT_TRUE (CanonPath("a").isAllowed(allowed));
ASSERT_TRUE (CanonPath("a/b").isAllowed(allowed));
ASSERT_TRUE (CanonPath("a/b/c").isAllowed(allowed));
ASSERT_TRUE (CanonPath("a/b/c/d").isAllowed(allowed));
ASSERT_TRUE (CanonPath("a/b/c/d/e").isAllowed(allowed));
ASSERT_FALSE(CanonPath("a/b/a").isAllowed(allowed));
ASSERT_FALSE(CanonPath("a/b/d").isAllowed(allowed));
ASSERT_FALSE(CanonPath("aaa").isAllowed(allowed));
ASSERT_FALSE(CanonPath("zzz").isAllowed(allowed));
ASSERT_TRUE (CanonPath("/").isAllowed(allowed));
}
}
}