From 44c92a1667ce829518178a77a9de0a53609d284f Mon Sep 17 00:00:00 2001 From: pennae Date: Wed, 12 Jan 2022 16:02:29 +0100 Subject: [PATCH 1/6] use more string_view in utils MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit there's a couple places that can be easily converted from using strings to using string_views instead. gives a slight (~1%) boost to system eval. # before nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.946 s ± 0.026 s [User: 2.655 s, System: 0.209 s] Range (min … max): 2.905 s … 2.995 s 20 runs # after Time (mean ± σ): 2.928 s ± 0.024 s [User: 2.638 s, System: 0.211 s] Range (min … max): 2.893 s … 2.970 s 20 runs --- src/libutil/types.hh | 1 + src/libutil/util.cc | 43 +++++++++++++++++++++++-------------------- src/libutil/util.hh | 18 +++++++++++++++--- 3 files changed, 39 insertions(+), 23 deletions(-) diff --git a/src/libutil/types.hh b/src/libutil/types.hh index 9c85fef62..8f72c926f 100644 --- a/src/libutil/types.hh +++ b/src/libutil/types.hh @@ -22,6 +22,7 @@ typedef std::map StringMap; /* Paths are just strings. */ typedef string Path; +typedef std::string_view PathView; typedef list Paths; typedef set PathSet; diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 9edd69c64..47876709e 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -106,16 +106,16 @@ Path absPath(Path path, std::optional dir, bool resolveSymlinks) } -Path canonPath(const Path & path, bool resolveSymlinks) +Path canonPath(PathView path, bool resolveSymlinks) { assert(path != ""); string s; + s.reserve(256); if (path[0] != '/') throw Error("not an absolute path: '%1%'", path); - string::const_iterator i = path.begin(), end = path.end(); string temp; /* Count the number of times we follow a symlink and stop at some @@ -125,33 +125,37 @@ Path canonPath(const Path & path, bool resolveSymlinks) while (1) { /* Skip slashes. */ - while (i != end && *i == '/') i++; - if (i == end) break; + while (!path.empty() && path[0] == '/') path.remove_prefix(1); + if (path.empty()) break; /* Ignore `.'. */ - if (*i == '.' && (i + 1 == end || i[1] == '/')) - i++; + if (path == "." || path.substr(0, 2) == "./") + path.remove_prefix(1); /* If `..', delete the last component. */ - else if (*i == '.' && i + 1 < end && i[1] == '.' && - (i + 2 == end || i[2] == '/')) + else if (path == ".." || path.substr(0, 3) == "../") { if (!s.empty()) s.erase(s.rfind('/')); - i += 2; + path.remove_prefix(2); } /* Normal component; copy it. */ else { s += '/'; - while (i != end && *i != '/') s += *i++; + if (const auto slash = path.find('/'); slash == string::npos) { + s += path; + path = {}; + } else { + s += path.substr(0, slash); + path = path.substr(slash + 1); + } /* If s points to a symlink, resolve it and continue from there */ if (resolveSymlinks && isLink(s)) { if (++followCount >= maxFollow) throw Error("infinite symlink recursion in path '%1%'", path); - temp = readLink(s) + string(i, end); - i = temp.begin(); - end = temp.end(); + temp = concatStrings(readLink(s), path); + path = temp; if (!temp.empty() && temp[0] == '/') { s.clear(); /* restart for symlinks pointing to absolute path */ } else { @@ -164,7 +168,7 @@ Path canonPath(const Path & path, bool resolveSymlinks) } } - return s.empty() ? "/" : s; + return s.empty() ? "/" : std::move(s); } @@ -1229,23 +1233,22 @@ void _interrupted() ////////////////////////////////////////////////////////////////////// -template C tokenizeString(std::string_view s, const string & separators) +template C tokenizeString(std::string_view s, std::string_view separators) { C result; string::size_type pos = s.find_first_not_of(separators, 0); while (pos != string::npos) { string::size_type end = s.find_first_of(separators, pos + 1); if (end == string::npos) end = s.size(); - string token(s, pos, end - pos); - result.insert(result.end(), token); + result.insert(result.end(), string(s, pos, end - pos)); pos = s.find_first_not_of(separators, end); } return result; } -template Strings tokenizeString(std::string_view s, const string & separators); -template StringSet tokenizeString(std::string_view s, const string & separators); -template vector tokenizeString(std::string_view s, const string & separators); +template Strings tokenizeString(std::string_view s, std::string_view separators); +template StringSet tokenizeString(std::string_view s, std::string_view separators); +template vector tokenizeString(std::string_view s, std::string_view separators); string chomp(std::string_view s) diff --git a/src/libutil/util.hh b/src/libutil/util.hh index c900033f8..369c44f78 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -56,7 +56,7 @@ Path absPath(Path path, double or trailing slashes. Optionally resolves all symlink components such that each component of the resulting path is *not* a symbolic link. */ -Path canonPath(const Path & path, bool resolveSymlinks = false); +Path canonPath(PathView path, bool resolveSymlinks = false); /* Return the directory part of the given canonical path, i.e., everything before the final `/'. If the path is the root or an @@ -368,15 +368,19 @@ MakeError(FormatError, Error); /* String tokenizer. */ -template C tokenizeString(std::string_view s, const string & separators = " \t\n\r"); +template C tokenizeString(std::string_view s, std::string_view separators = " \t\n\r"); /* Concatenate the given strings with a separator between the elements. */ template -string concatStringsSep(const string & sep, const C & ss) +string concatStringsSep(const std::string_view sep, const C & ss) { + size_t size = 0; + // need a cast to string_view since this is also called with Symbols + for (const auto & s : ss) size += sep.size() + std::string_view(s).size(); string s; + s.reserve(size); for (auto & i : ss) { if (s.size() != 0) s += sep; s += i; @@ -384,6 +388,14 @@ string concatStringsSep(const string & sep, const C & ss) return s; } +template +auto concatStrings(Parts && ... parts) + -> std::enable_if_t<(... && std::is_convertible_v), string> +{ + std::string_view views[sizeof...(parts)] = { parts... }; + return concatStringsSep({}, views); +} + /* Add quotes around a collection of strings. */ template Strings quoteStrings(const C & c) From 1bebb1095a122d4741c4ed9cd9cb19f08753872b Mon Sep 17 00:00:00 2001 From: pennae Date: Wed, 12 Jan 2022 18:08:48 +0100 Subject: [PATCH 2/6] cache more often-used symbols for primops there's a few symbols in primops we can create once and pick them out of EvalState afterwards instead of creating them every time we need them. this gives almost 1% speedup to an uncached nix search. --- src/libexpr/eval.cc | 5 +++++ src/libexpr/eval.hh | 3 ++- src/libexpr/primops.cc | 21 ++++++++++----------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 61bccd6e2..94ffab175 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -425,6 +425,11 @@ EvalState::EvalState( , sDescription(symbols.create("description")) , sSelf(symbols.create("self")) , sEpsilon(symbols.create("")) + , sStartSet(symbols.create("startSet")) + , sOperator(symbols.create("operator")) + , sKey(symbols.create("key")) + , sPath(symbols.create("path")) + , sPrefix(symbols.create("prefix")) , repair(NoRepair) , emptyBindings(0) , store(store) diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 89814785e..348a1b291 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -80,7 +80,8 @@ public: sContentAddressed, sOutputHash, sOutputHashAlgo, sOutputHashMode, sRecurseForDerivations, - sDescription, sSelf, sEpsilon; + sDescription, sSelf, sEpsilon, sStartSet, sOperator, sKey, sPath, + sPrefix; Symbol sDerivationNix; /* If set, force copying files to the Nix store even if they diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 1d2a7d5d2..7b0e6e7df 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -592,16 +592,16 @@ typedef list ValueList; static Bindings::iterator getAttr( EvalState & state, - string funcName, - string attrName, + std::string_view funcName, + Symbol attrSym, Bindings * attrSet, const Pos & pos) { - Bindings::iterator value = attrSet->find(state.symbols.create(attrName)); + Bindings::iterator value = attrSet->find(attrSym); if (value == attrSet->end()) { hintformat errorMsg = hintfmt( "attribute '%s' missing for call to '%s'", - attrName, + attrSym, funcName ); @@ -635,7 +635,7 @@ static void prim_genericClosure(EvalState & state, const Pos & pos, Value * * ar Bindings::iterator startSet = getAttr( state, "genericClosure", - "startSet", + state.sStartSet, args[0]->attrs, pos ); @@ -650,7 +650,7 @@ static void prim_genericClosure(EvalState & state, const Pos & pos, Value * * ar Bindings::iterator op = getAttr( state, "genericClosure", - "operator", + state.sOperator, args[0]->attrs, pos ); @@ -672,7 +672,7 @@ static void prim_genericClosure(EvalState & state, const Pos & pos, Value * * ar state.forceAttrs(*e, pos); Bindings::iterator key = - e->attrs->find(state.symbols.create("key")); + e->attrs->find(state.sKey); if (key == e->attrs->end()) throw EvalError({ .msg = hintfmt("attribute 'key' required"), @@ -1498,14 +1498,14 @@ static void prim_findFile(EvalState & state, const Pos & pos, Value * * args, Va state.forceAttrs(*v2, pos); string prefix; - Bindings::iterator i = v2->attrs->find(state.symbols.create("prefix")); + Bindings::iterator i = v2->attrs->find(state.sPrefix); if (i != v2->attrs->end()) prefix = state.forceStringNoCtx(*i->value, pos); i = getAttr( state, "findFile", - "path", + state.sPath, v2->attrs, pos ); @@ -2184,11 +2184,10 @@ void prim_getAttr(EvalState & state, const Pos & pos, Value * * args, Value & v) { string attr = state.forceStringNoCtx(*args[0], pos); state.forceAttrs(*args[1], pos); - // !!! Should we create a symbol here or just do a lookup? Bindings::iterator i = getAttr( state, "getAttr", - attr, + state.symbols.create(attr), args[1]->attrs, pos ); From ef45787aae1c91dffce1e95db3d46740b0165319 Mon Sep 17 00:00:00 2001 From: pennae Date: Wed, 12 Jan 2022 18:17:59 +0100 Subject: [PATCH 3/6] avoid string copies in attrNames sort comparison symbols can also be cast to string_view, which compares the same but doesn't require a copy of both symbol names on every comparison. --- src/libexpr/primops.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 7b0e6e7df..365595c92 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -2163,7 +2163,10 @@ static void prim_attrValues(EvalState & state, const Pos & pos, Value * * args, v.listElems()[n++] = (Value *) &i; std::sort(v.listElems(), v.listElems() + n, - [](Value * v1, Value * v2) { return (string) ((Attr *) v1)->name < (string) ((Attr *) v2)->name; }); + [](Value * v1, Value * v2) { + std::string_view s1 = ((Attr *) v1)->name, s2 = ((Attr *) v2)->name; + return s1 < s2; + }); for (unsigned int i = 0; i < n; ++i) v.listElems()[i] = ((Attr *) v.listElems()[i])->value; From 6401e443a441f58f48e2cbab5286b89ec162835a Mon Sep 17 00:00:00 2001 From: pennae Date: Sun, 2 Jan 2022 00:30:57 +0100 Subject: [PATCH 4/6] move strings in derivationStrict the temporary will be discarded anyway, so we can move out of it and save many small allocations and copies. --- src/libexpr/primops.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 365595c92..003e588a4 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1079,10 +1079,10 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * * } else { auto s = state.coerceToString(*i->pos, *i->value, context, true); drv.env.emplace(key, s); - if (i->name == state.sBuilder) drv.builder = s; - else if (i->name == state.sSystem) drv.platform = s; - else if (i->name == state.sOutputHash) outputHash = s; - else if (i->name == state.sOutputHashAlgo) outputHashAlgo = s; + if (i->name == state.sBuilder) drv.builder = std::move(s); + else if (i->name == state.sSystem) drv.platform = std::move(s); + else if (i->name == state.sOutputHash) outputHash = std::move(s); + else if (i->name == state.sOutputHashAlgo) outputHashAlgo = std::move(s); else if (i->name == state.sOutputHashMode) handleHashMode(s); else if (i->name == state.sOutputs) handleOutputs(tokenizeString(s)); From c9fc975259e27220caeb4291f3dff453e65f1965 Mon Sep 17 00:00:00 2001 From: pennae Date: Sun, 2 Jan 2022 00:41:21 +0100 Subject: [PATCH 5/6] optimize removeAttrs builtin use a sorted array of symbols to be removed instead of a set. this saves a lot of memory allocations and slightly speeds up removal. --- src/libexpr/attr-set.hh | 7 +++++++ src/libexpr/primops.cc | 20 +++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/libexpr/attr-set.hh b/src/libexpr/attr-set.hh index 82c348287..3e4899efc 100644 --- a/src/libexpr/attr-set.hh +++ b/src/libexpr/attr-set.hh @@ -121,6 +121,8 @@ class BindingsBuilder Bindings * bindings; public: + // needed by std::back_inserter + using value_type = Attr; EvalState & state; @@ -134,6 +136,11 @@ public: } void insert(const Attr & attr) + { + push_back(attr); + } + + void push_back(const Attr & attr) { bindings->push_back(attr); } diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 003e588a4..839fbb95c 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -12,6 +12,8 @@ #include "value-to-xml.hh" #include "primops.hh" +#include + #include #include #include @@ -2270,21 +2272,25 @@ static void prim_removeAttrs(EvalState & state, const Pos & pos, Value * * args, state.forceAttrs(*args[0], pos); state.forceList(*args[1], pos); - /* Get the attribute names to be removed. */ - std::set names; + /* Get the attribute names to be removed. + We keep them as Attrs instead of Symbols so std::set_difference + can be used to remove them from attrs[0]. */ + boost::container::small_vector names; + names.reserve(args[1]->listSize()); for (auto elem : args[1]->listItems()) { state.forceStringNoCtx(*elem, pos); - names.insert(state.symbols.create(elem->string.s)); + names.emplace_back(state.symbols.create(elem->string.s), nullptr); } + std::sort(names.begin(), names.end()); /* Copy all attributes not in that set. Note that we don't need to sort v.attrs because it's a subset of an already sorted vector. */ auto attrs = state.buildBindings(args[0]->attrs->size()); - for (auto & i : *args[0]->attrs) { - if (!names.count(i.name)) - attrs.insert(i); - } + std::set_difference( + args[0]->attrs->begin(), args[0]->attrs->end(), + names.begin(), names.end(), + std::back_inserter(attrs)); v.mkAttrs(attrs.alreadySorted()); } From ad60dfde2af6bcdb77e50562a2f2b28107e28588 Mon Sep 17 00:00:00 2001 From: pennae Date: Sun, 2 Jan 2022 00:46:43 +0100 Subject: [PATCH 6/6] also cache split regexes, not just match regexes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gives about 1% improvement on system eval, a bit less on nix search. # before nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 7.419 s ± 0.045 s [User: 6.362 s, System: 0.794 s] Range (min … max): 7.335 s … 7.517 s 20 runs nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.921 s ± 0.023 s [User: 2.626 s, System: 0.210 s] Range (min … max): 2.883 s … 2.957 s 20 runs # after nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 7.370 s ± 0.059 s [User: 6.333 s, System: 0.791 s] Range (min … max): 7.286 s … 7.541 s 20 runs nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.891 s ± 0.033 s [User: 2.606 s, System: 0.210 s] Range (min … max): 2.823 s … 2.958 s 20 runs --- src/libexpr/eval.hh | 1 + src/libexpr/primops.cc | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 348a1b291..63d16bd2b 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -400,6 +400,7 @@ private: friend struct ExprSelect; friend void prim_getAttr(EvalState & state, const Pos & pos, Value * * args, Value & v); friend void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v); + friend void prim_split(EvalState & state, const Pos & pos, Value * * args, Value & v); friend struct Value; }; diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 839fbb95c..3004e9a0a 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -3528,18 +3528,20 @@ static RegisterPrimOp primop_match({ /* Split a string with a regular expression, and return a list of the non-matching parts interleaved by the lists of the matching groups. */ -static void prim_split(EvalState & state, const Pos & pos, Value * * args, Value & v) +void prim_split(EvalState & state, const Pos & pos, Value * * args, Value & v) { auto re = state.forceStringNoCtx(*args[0], pos); try { - std::regex regex(re, std::regex::extended); + auto regex = state.regexCache->cache.find(re); + if (regex == state.regexCache->cache.end()) + regex = state.regexCache->cache.emplace(re, std::regex(re, std::regex::extended)).first; PathSet context; const std::string str = state.forceString(*args[1], context, pos); - auto begin = std::sregex_iterator(str.begin(), str.end(), regex); + auto begin = std::sregex_iterator(str.begin(), str.end(), regex->second); auto end = std::sregex_iterator(); // Any matches results are surrounded by non-matching results.