Merge pull request #5906 from pennae/primops-optimization

optimize primops and utils by caching more and copying less
This commit is contained in:
Eelco Dolstra 2022-01-18 19:43:28 +01:00 committed by GitHub
commit 4af88a4c91
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 90 additions and 50 deletions

View file

@ -121,6 +121,8 @@ class BindingsBuilder
Bindings * bindings; Bindings * bindings;
public: public:
// needed by std::back_inserter
using value_type = Attr;
EvalState & state; EvalState & state;
@ -134,6 +136,11 @@ public:
} }
void insert(const Attr & attr) void insert(const Attr & attr)
{
push_back(attr);
}
void push_back(const Attr & attr)
{ {
bindings->push_back(attr); bindings->push_back(attr);
} }

View file

@ -425,6 +425,11 @@ EvalState::EvalState(
, sDescription(symbols.create("description")) , sDescription(symbols.create("description"))
, sSelf(symbols.create("self")) , sSelf(symbols.create("self"))
, sEpsilon(symbols.create("")) , sEpsilon(symbols.create(""))
, sStartSet(symbols.create("startSet"))
, sOperator(symbols.create("operator"))
, sKey(symbols.create("key"))
, sPath(symbols.create("path"))
, sPrefix(symbols.create("prefix"))
, repair(NoRepair) , repair(NoRepair)
, emptyBindings(0) , emptyBindings(0)
, store(store) , store(store)

View file

@ -80,7 +80,8 @@ public:
sContentAddressed, sContentAddressed,
sOutputHash, sOutputHashAlgo, sOutputHashMode, sOutputHash, sOutputHashAlgo, sOutputHashMode,
sRecurseForDerivations, sRecurseForDerivations,
sDescription, sSelf, sEpsilon; sDescription, sSelf, sEpsilon, sStartSet, sOperator, sKey, sPath,
sPrefix;
Symbol sDerivationNix; Symbol sDerivationNix;
/* If set, force copying files to the Nix store even if they /* If set, force copying files to the Nix store even if they
@ -399,6 +400,7 @@ private:
friend struct ExprSelect; friend struct ExprSelect;
friend void prim_getAttr(EvalState & state, const Pos & pos, Value * * args, Value & v); friend void prim_getAttr(EvalState & state, const Pos & pos, Value * * args, Value & v);
friend void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v); friend void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v);
friend void prim_split(EvalState & state, const Pos & pos, Value * * args, Value & v);
friend struct Value; friend struct Value;
}; };

View file

@ -12,6 +12,8 @@
#include "value-to-xml.hh" #include "value-to-xml.hh"
#include "primops.hh" #include "primops.hh"
#include <boost/container/small_vector.hpp>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <unistd.h> #include <unistd.h>
@ -592,16 +594,16 @@ typedef list<Value *> ValueList;
static Bindings::iterator getAttr( static Bindings::iterator getAttr(
EvalState & state, EvalState & state,
string funcName, std::string_view funcName,
string attrName, Symbol attrSym,
Bindings * attrSet, Bindings * attrSet,
const Pos & pos) const Pos & pos)
{ {
Bindings::iterator value = attrSet->find(state.symbols.create(attrName)); Bindings::iterator value = attrSet->find(attrSym);
if (value == attrSet->end()) { if (value == attrSet->end()) {
hintformat errorMsg = hintfmt( hintformat errorMsg = hintfmt(
"attribute '%s' missing for call to '%s'", "attribute '%s' missing for call to '%s'",
attrName, attrSym,
funcName funcName
); );
@ -635,7 +637,7 @@ static void prim_genericClosure(EvalState & state, const Pos & pos, Value * * ar
Bindings::iterator startSet = getAttr( Bindings::iterator startSet = getAttr(
state, state,
"genericClosure", "genericClosure",
"startSet", state.sStartSet,
args[0]->attrs, args[0]->attrs,
pos pos
); );
@ -650,7 +652,7 @@ static void prim_genericClosure(EvalState & state, const Pos & pos, Value * * ar
Bindings::iterator op = getAttr( Bindings::iterator op = getAttr(
state, state,
"genericClosure", "genericClosure",
"operator", state.sOperator,
args[0]->attrs, args[0]->attrs,
pos pos
); );
@ -672,7 +674,7 @@ static void prim_genericClosure(EvalState & state, const Pos & pos, Value * * ar
state.forceAttrs(*e, pos); state.forceAttrs(*e, pos);
Bindings::iterator key = Bindings::iterator key =
e->attrs->find(state.symbols.create("key")); e->attrs->find(state.sKey);
if (key == e->attrs->end()) if (key == e->attrs->end())
throw EvalError({ throw EvalError({
.msg = hintfmt("attribute 'key' required"), .msg = hintfmt("attribute 'key' required"),
@ -1079,10 +1081,10 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * *
} else { } else {
auto s = state.coerceToString(*i->pos, *i->value, context, true); auto s = state.coerceToString(*i->pos, *i->value, context, true);
drv.env.emplace(key, s); drv.env.emplace(key, s);
if (i->name == state.sBuilder) drv.builder = s; if (i->name == state.sBuilder) drv.builder = std::move(s);
else if (i->name == state.sSystem) drv.platform = s; else if (i->name == state.sSystem) drv.platform = std::move(s);
else if (i->name == state.sOutputHash) outputHash = s; else if (i->name == state.sOutputHash) outputHash = std::move(s);
else if (i->name == state.sOutputHashAlgo) outputHashAlgo = s; else if (i->name == state.sOutputHashAlgo) outputHashAlgo = std::move(s);
else if (i->name == state.sOutputHashMode) handleHashMode(s); else if (i->name == state.sOutputHashMode) handleHashMode(s);
else if (i->name == state.sOutputs) else if (i->name == state.sOutputs)
handleOutputs(tokenizeString<Strings>(s)); handleOutputs(tokenizeString<Strings>(s));
@ -1498,14 +1500,14 @@ static void prim_findFile(EvalState & state, const Pos & pos, Value * * args, Va
state.forceAttrs(*v2, pos); state.forceAttrs(*v2, pos);
string prefix; string prefix;
Bindings::iterator i = v2->attrs->find(state.symbols.create("prefix")); Bindings::iterator i = v2->attrs->find(state.sPrefix);
if (i != v2->attrs->end()) if (i != v2->attrs->end())
prefix = state.forceStringNoCtx(*i->value, pos); prefix = state.forceStringNoCtx(*i->value, pos);
i = getAttr( i = getAttr(
state, state,
"findFile", "findFile",
"path", state.sPath,
v2->attrs, v2->attrs,
pos pos
); );
@ -2163,7 +2165,10 @@ static void prim_attrValues(EvalState & state, const Pos & pos, Value * * args,
v.listElems()[n++] = (Value *) &i; v.listElems()[n++] = (Value *) &i;
std::sort(v.listElems(), v.listElems() + n, std::sort(v.listElems(), v.listElems() + n,
[](Value * v1, Value * v2) { return (string) ((Attr *) v1)->name < (string) ((Attr *) v2)->name; }); [](Value * v1, Value * v2) {
std::string_view s1 = ((Attr *) v1)->name, s2 = ((Attr *) v2)->name;
return s1 < s2;
});
for (unsigned int i = 0; i < n; ++i) for (unsigned int i = 0; i < n; ++i)
v.listElems()[i] = ((Attr *) v.listElems()[i])->value; v.listElems()[i] = ((Attr *) v.listElems()[i])->value;
@ -2184,11 +2189,10 @@ void prim_getAttr(EvalState & state, const Pos & pos, Value * * args, Value & v)
{ {
string attr = state.forceStringNoCtx(*args[0], pos); string attr = state.forceStringNoCtx(*args[0], pos);
state.forceAttrs(*args[1], pos); state.forceAttrs(*args[1], pos);
// !!! Should we create a symbol here or just do a lookup?
Bindings::iterator i = getAttr( Bindings::iterator i = getAttr(
state, state,
"getAttr", "getAttr",
attr, state.symbols.create(attr),
args[1]->attrs, args[1]->attrs,
pos pos
); );
@ -2268,21 +2272,25 @@ static void prim_removeAttrs(EvalState & state, const Pos & pos, Value * * args,
state.forceAttrs(*args[0], pos); state.forceAttrs(*args[0], pos);
state.forceList(*args[1], pos); state.forceList(*args[1], pos);
/* Get the attribute names to be removed. */ /* Get the attribute names to be removed.
std::set<Symbol> names; We keep them as Attrs instead of Symbols so std::set_difference
can be used to remove them from attrs[0]. */
boost::container::small_vector<Attr, 64> names;
names.reserve(args[1]->listSize());
for (auto elem : args[1]->listItems()) { for (auto elem : args[1]->listItems()) {
state.forceStringNoCtx(*elem, pos); state.forceStringNoCtx(*elem, pos);
names.insert(state.symbols.create(elem->string.s)); names.emplace_back(state.symbols.create(elem->string.s), nullptr);
} }
std::sort(names.begin(), names.end());
/* Copy all attributes not in that set. Note that we don't need /* Copy all attributes not in that set. Note that we don't need
to sort v.attrs because it's a subset of an already sorted to sort v.attrs because it's a subset of an already sorted
vector. */ vector. */
auto attrs = state.buildBindings(args[0]->attrs->size()); auto attrs = state.buildBindings(args[0]->attrs->size());
for (auto & i : *args[0]->attrs) { std::set_difference(
if (!names.count(i.name)) args[0]->attrs->begin(), args[0]->attrs->end(),
attrs.insert(i); names.begin(), names.end(),
} std::back_inserter(attrs));
v.mkAttrs(attrs.alreadySorted()); v.mkAttrs(attrs.alreadySorted());
} }
@ -3520,18 +3528,20 @@ static RegisterPrimOp primop_match({
/* Split a string with a regular expression, and return a list of the /* Split a string with a regular expression, and return a list of the
non-matching parts interleaved by the lists of the matching groups. */ non-matching parts interleaved by the lists of the matching groups. */
static void prim_split(EvalState & state, const Pos & pos, Value * * args, Value & v) void prim_split(EvalState & state, const Pos & pos, Value * * args, Value & v)
{ {
auto re = state.forceStringNoCtx(*args[0], pos); auto re = state.forceStringNoCtx(*args[0], pos);
try { try {
std::regex regex(re, std::regex::extended); auto regex = state.regexCache->cache.find(re);
if (regex == state.regexCache->cache.end())
regex = state.regexCache->cache.emplace(re, std::regex(re, std::regex::extended)).first;
PathSet context; PathSet context;
const std::string str = state.forceString(*args[1], context, pos); const std::string str = state.forceString(*args[1], context, pos);
auto begin = std::sregex_iterator(str.begin(), str.end(), regex); auto begin = std::sregex_iterator(str.begin(), str.end(), regex->second);
auto end = std::sregex_iterator(); auto end = std::sregex_iterator();
// Any matches results are surrounded by non-matching results. // Any matches results are surrounded by non-matching results.

View file

@ -22,6 +22,7 @@ typedef std::map<string, string> StringMap;
/* Paths are just strings. */ /* Paths are just strings. */
typedef string Path; typedef string Path;
typedef std::string_view PathView;
typedef list<Path> Paths; typedef list<Path> Paths;
typedef set<Path> PathSet; typedef set<Path> PathSet;

View file

@ -106,16 +106,16 @@ Path absPath(Path path, std::optional<Path> dir, bool resolveSymlinks)
} }
Path canonPath(const Path & path, bool resolveSymlinks) Path canonPath(PathView path, bool resolveSymlinks)
{ {
assert(path != ""); assert(path != "");
string s; string s;
s.reserve(256);
if (path[0] != '/') if (path[0] != '/')
throw Error("not an absolute path: '%1%'", path); throw Error("not an absolute path: '%1%'", path);
string::const_iterator i = path.begin(), end = path.end();
string temp; string temp;
/* Count the number of times we follow a symlink and stop at some /* Count the number of times we follow a symlink and stop at some
@ -125,33 +125,37 @@ Path canonPath(const Path & path, bool resolveSymlinks)
while (1) { while (1) {
/* Skip slashes. */ /* Skip slashes. */
while (i != end && *i == '/') i++; while (!path.empty() && path[0] == '/') path.remove_prefix(1);
if (i == end) break; if (path.empty()) break;
/* Ignore `.'. */ /* Ignore `.'. */
if (*i == '.' && (i + 1 == end || i[1] == '/')) if (path == "." || path.substr(0, 2) == "./")
i++; path.remove_prefix(1);
/* If `..', delete the last component. */ /* If `..', delete the last component. */
else if (*i == '.' && i + 1 < end && i[1] == '.' && else if (path == ".." || path.substr(0, 3) == "../")
(i + 2 == end || i[2] == '/'))
{ {
if (!s.empty()) s.erase(s.rfind('/')); if (!s.empty()) s.erase(s.rfind('/'));
i += 2; path.remove_prefix(2);
} }
/* Normal component; copy it. */ /* Normal component; copy it. */
else { else {
s += '/'; s += '/';
while (i != end && *i != '/') s += *i++; if (const auto slash = path.find('/'); slash == string::npos) {
s += path;
path = {};
} else {
s += path.substr(0, slash);
path = path.substr(slash + 1);
}
/* If s points to a symlink, resolve it and continue from there */ /* If s points to a symlink, resolve it and continue from there */
if (resolveSymlinks && isLink(s)) { if (resolveSymlinks && isLink(s)) {
if (++followCount >= maxFollow) if (++followCount >= maxFollow)
throw Error("infinite symlink recursion in path '%1%'", path); throw Error("infinite symlink recursion in path '%1%'", path);
temp = readLink(s) + string(i, end); temp = concatStrings(readLink(s), path);
i = temp.begin(); path = temp;
end = temp.end();
if (!temp.empty() && temp[0] == '/') { if (!temp.empty() && temp[0] == '/') {
s.clear(); /* restart for symlinks pointing to absolute path */ s.clear(); /* restart for symlinks pointing to absolute path */
} else { } else {
@ -164,7 +168,7 @@ Path canonPath(const Path & path, bool resolveSymlinks)
} }
} }
return s.empty() ? "/" : s; return s.empty() ? "/" : std::move(s);
} }
@ -1231,23 +1235,22 @@ void _interrupted()
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
template<class C> C tokenizeString(std::string_view s, const string & separators) template<class C> C tokenizeString(std::string_view s, std::string_view separators)
{ {
C result; C result;
string::size_type pos = s.find_first_not_of(separators, 0); string::size_type pos = s.find_first_not_of(separators, 0);
while (pos != string::npos) { while (pos != string::npos) {
string::size_type end = s.find_first_of(separators, pos + 1); string::size_type end = s.find_first_of(separators, pos + 1);
if (end == string::npos) end = s.size(); if (end == string::npos) end = s.size();
string token(s, pos, end - pos); result.insert(result.end(), string(s, pos, end - pos));
result.insert(result.end(), token);
pos = s.find_first_not_of(separators, end); pos = s.find_first_not_of(separators, end);
} }
return result; return result;
} }
template Strings tokenizeString(std::string_view s, const string & separators); template Strings tokenizeString(std::string_view s, std::string_view separators);
template StringSet tokenizeString(std::string_view s, const string & separators); template StringSet tokenizeString(std::string_view s, std::string_view separators);
template vector<string> tokenizeString(std::string_view s, const string & separators); template vector<string> tokenizeString(std::string_view s, std::string_view separators);
string chomp(std::string_view s) string chomp(std::string_view s)

View file

@ -56,7 +56,7 @@ Path absPath(Path path,
double or trailing slashes. Optionally resolves all symlink double or trailing slashes. Optionally resolves all symlink
components such that each component of the resulting path is *not* components such that each component of the resulting path is *not*
a symbolic link. */ a symbolic link. */
Path canonPath(const Path & path, bool resolveSymlinks = false); Path canonPath(PathView path, bool resolveSymlinks = false);
/* Return the directory part of the given canonical path, i.e., /* Return the directory part of the given canonical path, i.e.,
everything before the final `/'. If the path is the root or an everything before the final `/'. If the path is the root or an
@ -368,15 +368,19 @@ MakeError(FormatError, Error);
/* String tokenizer. */ /* String tokenizer. */
template<class C> C tokenizeString(std::string_view s, const string & separators = " \t\n\r"); template<class C> C tokenizeString(std::string_view s, std::string_view separators = " \t\n\r");
/* Concatenate the given strings with a separator between the /* Concatenate the given strings with a separator between the
elements. */ elements. */
template<class C> template<class C>
string concatStringsSep(const string & sep, const C & ss) string concatStringsSep(const std::string_view sep, const C & ss)
{ {
size_t size = 0;
// need a cast to string_view since this is also called with Symbols
for (const auto & s : ss) size += sep.size() + std::string_view(s).size();
string s; string s;
s.reserve(size);
for (auto & i : ss) { for (auto & i : ss) {
if (s.size() != 0) s += sep; if (s.size() != 0) s += sep;
s += i; s += i;
@ -384,6 +388,14 @@ string concatStringsSep(const string & sep, const C & ss)
return s; return s;
} }
template<class ... Parts>
auto concatStrings(Parts && ... parts)
-> std::enable_if_t<(... && std::is_convertible_v<Parts, std::string_view>), string>
{
std::string_view views[sizeof...(parts)] = { parts... };
return concatStringsSep({}, views);
}
/* Add quotes around a collection of strings. */ /* Add quotes around a collection of strings. */
template<class C> Strings quoteStrings(const C & c) template<class C> Strings quoteStrings(const C & c)