From 0ba37444dae3b0076482e13879f4059c358ef2eb Mon Sep 17 00:00:00 2001 From: Tom Hubrecht Date: Thu, 30 May 2024 01:05:44 +0200 Subject: [PATCH] primops: Move functions to primops/attrset.cc Moved builtins: attrNames, attrValues, catAttrs, getAttr, groupBy, hasAttr, intersectAttrs, mapAttrs, removeAttrs, zipAttrsWith Change-Id: I55e2c9ef40ece5ba5bf4a96cae655f481d0b140b --- src/libexpr/meson.build | 1 + src/libexpr/primops.cc | 454 --------------------------- src/libexpr/primops.hh | 16 +- src/libexpr/primops/attrset.cc | 543 +++++++++++++++++++++++++++++++++ 4 files changed, 558 insertions(+), 456 deletions(-) create mode 100644 src/libexpr/primops/attrset.cc diff --git a/src/libexpr/meson.build b/src/libexpr/meson.build index fda6fde2c..fb7079d80 100644 --- a/src/libexpr/meson.build +++ b/src/libexpr/meson.build @@ -86,6 +86,7 @@ libexpr_sources = files( 'flake/flake.cc', 'flake/flakeref.cc', 'flake/lockfile.cc', + 'primops/attrset.cc', 'primops/context.cc', 'primops/fetchClosure.cc', 'primops/fetchMercurial.cc', diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 64a52dfd6..7e3e307a1 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -620,27 +620,6 @@ struct CompareValues } }; - -#if HAVE_BOEHMGC -typedef std::list> ValueList; -#else -typedef std::list ValueList; -#endif - - -static Bindings::iterator getAttr( - EvalState & state, - Symbol attrSym, - Bindings * attrSet, - std::string_view errorCtx) -{ - Bindings::iterator value = attrSet->find(attrSym); - if (value == attrSet->end()) { - state.error("attribute '%s' missing", state.symbols[attrSym]).withTrace(noPos, errorCtx).debugThrow(); - } - return value; -} - static void prim_genericClosure(EvalState & state, const PosIdx pos, Value * * args, Value & v) { state.forceAttrs(*args[0], noPos, "while evaluating the first argument passed to builtins.genericClosure"); @@ -2386,96 +2365,6 @@ static RegisterPrimOp primop_path({ * Sets *************************************************************/ - -/* Return the names of the attributes in a set as a sorted list of - strings. */ -static void prim_attrNames(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - state.forceAttrs(*args[0], pos, "while evaluating the argument passed to builtins.attrNames"); - - state.mkList(v, args[0]->attrs->size()); - - size_t n = 0; - for (auto & i : *args[0]->attrs) - (v.listElems()[n++] = state.allocValue())->mkString(state.symbols[i.name]); - - std::sort(v.listElems(), v.listElems() + n, - [](Value * v1, Value * v2) { return strcmp(v1->string.s, v2->string.s) < 0; }); -} - -static RegisterPrimOp primop_attrNames({ - .name = "__attrNames", - .args = {"set"}, - .doc = R"( - Return the names of the attributes in the set *set* in an - alphabetically sorted list. For instance, `builtins.attrNames { y - = 1; x = "foo"; }` evaluates to `[ "x" "y" ]`. - )", - .fun = prim_attrNames, -}); - -/* Return the values of the attributes in a set as a list, in the same - order as attrNames. */ -static void prim_attrValues(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - state.forceAttrs(*args[0], pos, "while evaluating the argument passed to builtins.attrValues"); - - state.mkList(v, args[0]->attrs->size()); - - unsigned int n = 0; - for (auto & i : *args[0]->attrs) - v.listElems()[n++] = (Value *) &i; - - std::sort(v.listElems(), v.listElems() + n, - [&](Value * v1, Value * v2) { - std::string_view s1 = state.symbols[((Attr *) v1)->name], - s2 = state.symbols[((Attr *) v2)->name]; - return s1 < s2; - }); - - for (unsigned int i = 0; i < n; ++i) - v.listElems()[i] = ((Attr *) v.listElems()[i])->value; -} - -static RegisterPrimOp primop_attrValues({ - .name = "__attrValues", - .args = {"set"}, - .doc = R"( - Return the values of the attributes in the set *set* in the order - corresponding to the sorted attribute names. - )", - .fun = prim_attrValues, -}); - -/* Dynamic version of the `.' operator. */ -void prim_getAttr(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - auto attr = state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.getAttr"); - state.forceAttrs(*args[1], pos, "while evaluating the second argument passed to builtins.getAttr"); - Bindings::iterator i = getAttr( - state, - state.symbols.create(attr), - args[1]->attrs, - "in the attribute set under consideration" - ); - // !!! add to stack trace? - if (state.countCalls && i->pos) state.attrSelects[i->pos]++; - state.forceValue(*i->value, pos); - v = *i->value; -} - -static RegisterPrimOp primop_getAttr({ - .name = "__getAttr", - .args = {"s", "set"}, - .doc = R"( - `getAttr` returns the attribute named *s* from *set*. Evaluation - aborts if the attribute doesn’t exist. This is a dynamic version of - the `.` operator, since *s* is an expression rather than an - identifier. - )", - .fun = prim_getAttr, -}); - /* Return position information of the specified attribute. */ static void prim_unsafeGetAttrPos(EvalState & state, const PosIdx pos, Value * * args, Value & v) { @@ -2542,25 +2431,6 @@ void makePositionThunks(EvalState & state, const PosIdx pos, Value & line, Value makeLazyPosAccessors(state, pos, line, column); } -/* Dynamic version of the `?' operator. */ -static void prim_hasAttr(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - auto attr = state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.hasAttr"); - state.forceAttrs(*args[1], pos, "while evaluating the second argument passed to builtins.hasAttr"); - v.mkBool(args[1]->attrs->find(state.symbols.create(attr)) != args[1]->attrs->end()); -} - -static RegisterPrimOp primop_hasAttr({ - .name = "__hasAttr", - .args = {"s", "set"}, - .doc = R"( - `hasAttr` returns `true` if *set* has an attribute named *s*, and - `false` otherwise. This is a dynamic version of the `?` operator, - since *s* is an expression rather than an identifier. - )", - .fun = prim_hasAttr, -}); - /* Determine whether the argument is a set. */ static void prim_isAttrs(EvalState & state, const PosIdx pos, Value * * args, Value & v) { @@ -2577,49 +2447,6 @@ static RegisterPrimOp primop_isAttrs({ .fun = prim_isAttrs, }); -static void prim_removeAttrs(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - state.forceAttrs(*args[0], pos, "while evaluating the first argument passed to builtins.removeAttrs"); - state.forceList(*args[1], pos, "while evaluating the second argument passed to builtins.removeAttrs"); - - /* Get the attribute names to be removed. - We keep them as Attrs instead of Symbols so std::set_difference - can be used to remove them from attrs[0]. */ - // 64: large enough to fit the attributes of a derivation - boost::container::small_vector names; - names.reserve(args[1]->listSize()); - for (auto elem : args[1]->listItems()) { - state.forceStringNoCtx(*elem, pos, "while evaluating the values of the second argument passed to builtins.removeAttrs"); - names.emplace_back(state.symbols.create(elem->string.s), nullptr); - } - std::sort(names.begin(), names.end()); - - /* Copy all attributes not in that set. Note that we don't need - to sort v.attrs because it's a subset of an already sorted - vector. */ - auto attrs = state.buildBindings(args[0]->attrs->size()); - std::set_difference( - args[0]->attrs->begin(), args[0]->attrs->end(), - names.begin(), names.end(), - std::back_inserter(attrs)); - v.mkAttrs(attrs.alreadySorted()); -} - -static RegisterPrimOp primop_removeAttrs({ - .name = "removeAttrs", - .args = {"set", "list"}, - .doc = R"( - Remove the attributes listed in *list* from *set*. The attributes - don’t have to exist in *set*. For instance, - - ```nix - removeAttrs { x = 1; y = 2; z = 3; } [ "a" "x" "z" ] - ``` - - evaluates to `{ y = 2; }`. - )", - .fun = prim_removeAttrs, -}); /* Builds a set from a list specifying (name, value) pairs. To be precise, a list [{name = "name1"; value = value1;} ... {name = @@ -2682,121 +2509,6 @@ static RegisterPrimOp primop_listToAttrs({ .fun = prim_listToAttrs, }); -static void prim_intersectAttrs(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - state.forceAttrs(*args[0], pos, "while evaluating the first argument passed to builtins.intersectAttrs"); - state.forceAttrs(*args[1], pos, "while evaluating the second argument passed to builtins.intersectAttrs"); - - Bindings &left = *args[0]->attrs; - Bindings &right = *args[1]->attrs; - - auto attrs = state.buildBindings(std::min(left.size(), right.size())); - - // The current implementation has good asymptotic complexity and is reasonably - // simple. Further optimization may be possible, but does not seem productive, - // considering the state of eval performance in 2022. - // - // I have looked for reusable and/or standard solutions and these are my - // findings: - // - // STL - // === - // std::set_intersection is not suitable, as it only performs a simultaneous - // linear scan; not taking advantage of random access. This is O(n + m), so - // linear in the largest set, which is not acceptable for callPackage in Nixpkgs. - // - // Simultaneous scan, with alternating simple binary search - // === - // One alternative algorithm scans the attrsets simultaneously, jumping - // forward using `lower_bound` in case of inequality. This should perform - // well on very similar sets, having a local and predictable access pattern. - // On dissimilar sets, it seems to need more comparisons than the current - // algorithm, as few consecutive attrs match. `lower_bound` could take - // advantage of the decreasing remaining search space, but this causes - // the medians to move, which can mean that they don't stay in the cache - // like they would with the current naive `find`. - // - // Double binary search - // === - // The optimal algorithm may be "Double binary search", which doesn't - // scan at all, but rather divides both sets simultaneously. - // See "Fast Intersection Algorithms for Sorted Sequences" by Baeza-Yates et al. - // https://cs.uwaterloo.ca/~ajsaling/papers/intersection_alg_app10.pdf - // The only downsides I can think of are not having a linear access pattern - // for similar sets, and having to maintain a more intricate algorithm. - // - // Adaptive - // === - // Finally one could run try a simultaneous scan, count misses and fall back - // to double binary search when the counter hit some threshold and/or ratio. - - if (left.size() < right.size()) { - for (auto & l : left) { - Bindings::iterator r = right.find(l.name); - if (r != right.end()) - attrs.insert(*r); - } - } - else { - for (auto & r : right) { - Bindings::iterator l = left.find(r.name); - if (l != left.end()) - attrs.insert(r); - } - } - - v.mkAttrs(attrs.alreadySorted()); -} - -static RegisterPrimOp primop_intersectAttrs({ - .name = "__intersectAttrs", - .args = {"e1", "e2"}, - .doc = R"( - Return a set consisting of the attributes in the set *e2* which have the - same name as some attribute in *e1*. - - Performs in O(*n* log *m*) where *n* is the size of the smaller set and *m* the larger set's size. - )", - .fun = prim_intersectAttrs, -}); - -static void prim_catAttrs(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - auto attrName = state.symbols.create(state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.catAttrs")); - state.forceList(*args[1], pos, "while evaluating the second argument passed to builtins.catAttrs"); - - SmallValueVector res(args[1]->listSize()); - size_t found = 0; - - for (auto v2 : args[1]->listItems()) { - state.forceAttrs(*v2, pos, "while evaluating an element in the list passed as second argument to builtins.catAttrs"); - Bindings::iterator i = v2->attrs->find(attrName); - if (i != v2->attrs->end()) - res[found++] = i->value; - } - - state.mkList(v, found); - for (unsigned int n = 0; n < found; ++n) - v.listElems()[n] = res[n]; -} - -static RegisterPrimOp primop_catAttrs({ - .name = "__catAttrs", - .args = {"attr", "list"}, - .doc = R"( - Collect each attribute named *attr* from a list of attribute - sets. Attrsets that don't contain the named attribute are - ignored. For example, - - ```nix - builtins.catAttrs "a" [{a = 1;} {b = 0;} {a = 2;}] - ``` - - evaluates to `[1 2]`. - )", - .fun = prim_catAttrs, -}); - static void prim_functionArgs(EvalState & state, const PosIdx pos, Value * * args, Value & v) { state.forceValue(*args[0], pos); @@ -2806,19 +2518,16 @@ static void prim_functionArgs(EvalState & state, const PosIdx pos, Value * * arg } if (!args[0]->isLambda()) state.error("'functionArgs' requires a function").atPos(pos).debugThrow(); - if (!args[0]->lambda.fun->hasFormals()) { v.mkAttrs(&state.emptyBindings); return; } - auto attrs = state.buildBindings(args[0]->lambda.fun->formals->formals.size()); for (auto & i : args[0]->lambda.fun->formals->formals) // !!! should optimise booleans (allocate only once) attrs.alloc(i.name, i.pos).mkBool(i.def); v.mkAttrs(attrs); } - static RegisterPrimOp primop_functionArgs({ .name = "__functionArgs", .args = {"f"}, @@ -2828,7 +2537,6 @@ static RegisterPrimOp primop_functionArgs({ denoting whether the corresponding argument has a default value. For instance, `functionArgs ({ x, y ? 123}: ...) = { x = false; y = true; }`. - "Formal argument" here refers to the attributes pattern-matched by the function. Plain lambdas are not included, e.g. `functionArgs (x: ...) = { }`. @@ -2837,117 +2545,7 @@ static RegisterPrimOp primop_functionArgs({ }); /* */ -static void prim_mapAttrs(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - state.forceAttrs(*args[1], pos, "while evaluating the second argument passed to builtins.mapAttrs"); - auto attrs = state.buildBindings(args[1]->attrs->size()); - - for (auto & i : *args[1]->attrs) { - Value * vName = state.allocValue(); - Value * vFun2 = state.allocValue(); - vName->mkString(state.symbols[i.name]); - vFun2->mkApp(args[0], vName); - attrs.alloc(i.name).mkApp(vFun2, i.value); - } - - v.mkAttrs(attrs.alreadySorted()); -} - -static RegisterPrimOp primop_mapAttrs({ - .name = "__mapAttrs", - .args = {"f", "attrset"}, - .doc = R"( - Apply function *f* to every element of *attrset*. For example, - - ```nix - builtins.mapAttrs (name: value: value * 10) { a = 1; b = 2; } - ``` - - evaluates to `{ a = 10; b = 20; }`. - )", - .fun = prim_mapAttrs, -}); - -static void prim_zipAttrsWith(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - // we will first count how many values are present for each given key. - // we then allocate a single attrset and pre-populate it with lists of - // appropriate sizes, stash the pointers to the list elements of each, - // and populate the lists. after that we replace the list in the every - // attribute with the merge function application. this way we need not - // use (slightly slower) temporary storage the GC does not know about. - - std::map> attrsSeen; - - state.forceFunction(*args[0], pos, "while evaluating the first argument passed to builtins.zipAttrsWith"); - state.forceList(*args[1], pos, "while evaluating the second argument passed to builtins.zipAttrsWith"); - const auto listSize = args[1]->listSize(); - const auto listElems = args[1]->listElems(); - - for (unsigned int n = 0; n < listSize; ++n) { - Value * vElem = listElems[n]; - state.forceAttrs(*vElem, noPos, "while evaluating a value of the list passed as second argument to builtins.zipAttrsWith"); - for (auto & attr : *vElem->attrs) - attrsSeen[attr.name].first++; - } - - auto attrs = state.buildBindings(attrsSeen.size()); - for (auto & [sym, elem] : attrsSeen) { - auto & list = attrs.alloc(sym); - state.mkList(list, elem.first); - elem.second = list.listElems(); - } - v.mkAttrs(attrs.alreadySorted()); - - for (unsigned int n = 0; n < listSize; ++n) { - Value * vElem = listElems[n]; - for (auto & attr : *vElem->attrs) - *attrsSeen[attr.name].second++ = attr.value; - } - - for (auto & attr : *v.attrs) { - auto name = state.allocValue(); - name->mkString(state.symbols[attr.name]); - auto call1 = state.allocValue(); - call1->mkApp(args[0], name); - auto call2 = state.allocValue(); - call2->mkApp(call1, attr.value); - attr.value = call2; - } -} - -static RegisterPrimOp primop_zipAttrsWith({ - .name = "__zipAttrsWith", - .args = {"f", "list"}, - .doc = R"( - Transpose a list of attribute sets into an attribute set of lists, - then apply `mapAttrs`. - - `f` receives two arguments: the attribute name and a non-empty - list of all values encountered for that attribute name. - - The result is an attribute set where the attribute names are the - union of the attribute names in each element of `list`. The attribute - values are the return values of `f`. - - ```nix - builtins.zipAttrsWith - (name: values: { inherit name values; }) - [ { a = "x"; } { a = "y"; b = "z"; } ] - ``` - - evaluates to - - ``` - { - a = { name = "a"; values = [ "x" "y" ]; }; - b = { name = "b"; values = [ "z" ]; }; - } - ``` - )", - .fun = prim_zipAttrsWith, -}); /************************************************************* @@ -3428,58 +3026,6 @@ static RegisterPrimOp primop_partition({ .fun = prim_partition, }); -static void prim_groupBy(EvalState & state, const PosIdx pos, Value * * args, Value & v) -{ - state.forceFunction(*args[0], pos, "while evaluating the first argument passed to builtins.groupBy"); - state.forceList(*args[1], pos, "while evaluating the second argument passed to builtins.groupBy"); - - ValueVectorMap attrs; - - for (auto vElem : args[1]->listItems()) { - Value res; - state.callFunction(*args[0], *vElem, res, pos); - auto name = state.forceStringNoCtx(res, pos, "while evaluating the return value of the grouping function passed to builtins.groupBy"); - auto sym = state.symbols.create(name); - auto vector = attrs.try_emplace(sym, ValueVector()).first; - vector->second.push_back(vElem); - } - - auto attrs2 = state.buildBindings(attrs.size()); - - for (auto & i : attrs) { - auto & list = attrs2.alloc(i.first); - auto size = i.second.size(); - state.mkList(list, size); - memcpy(list.listElems(), i.second.data(), sizeof(Value *) * size); - } - - v.mkAttrs(attrs2.alreadySorted()); -} - -static RegisterPrimOp primop_groupBy({ - .name = "__groupBy", - .args = {"f", "list"}, - .doc = R"( - Groups elements of *list* together by the string returned from the - function *f* called on each element. It returns an attribute set - where each attribute value contains the elements of *list* that are - mapped to the same corresponding attribute name returned by *f*. - - For example, - - ```nix - builtins.groupBy (builtins.substring 0 1) ["foo" "bar" "baz"] - ``` - - evaluates to - - ```nix - { b = [ "bar" "baz" ]; f = [ "foo" ]; } - ``` - )", - .fun = prim_groupBy, -}); - static void prim_concatMap(EvalState & state, const PosIdx pos, Value * * args, Value & v) { state.forceFunction(*args[0], pos, "while evaluating the first argument passed to builtins.concatMap"); diff --git a/src/libexpr/primops.hh b/src/libexpr/primops.hh index 9f76975db..8f3070fee 100644 --- a/src/libexpr/primops.hh +++ b/src/libexpr/primops.hh @@ -44,13 +44,25 @@ struct RegisterPrimOp /** * Load a ValueInitializer from a DSO and return whatever it initializes */ -void prim_importNative(EvalState & state, const PosIdx pos, Value * * args, Value & v); +void prim_importNative(EvalState & state, const PosIdx pos, Value ** args, Value & v); /** * Execute a program and parse its output */ -void prim_exec(EvalState & state, const PosIdx pos, Value * * args, Value & v); +void prim_exec(EvalState & state, const PosIdx pos, Value ** args, Value & v); void makePositionThunks(EvalState & state, const PosIdx pos, Value & line, Value & column); +#if HAVE_BOEHMGC +typedef std::list> ValueList; +#else +typedef std::list ValueList; +#endif + +/** + * getAttr wrapper + */ + +Bindings::iterator getAttr(EvalState & state, Symbol attrSym, Bindings * attrSet, std::string_view errorCtx); + } diff --git a/src/libexpr/primops/attrset.cc b/src/libexpr/primops/attrset.cc new file mode 100644 index 000000000..158d971a8 --- /dev/null +++ b/src/libexpr/primops/attrset.cc @@ -0,0 +1,543 @@ +#include "gc-small-vector.hh" +#include "primops.hh" + +namespace nix { + +Bindings::iterator +getAttr(EvalState & state, Symbol attrSym, Bindings * attrSet, std::string_view errorCtx) +{ + Bindings::iterator value = attrSet->find(attrSym); + if (value == attrSet->end()) { + state.error("attribute '%s' missing", state.symbols[attrSym]) + .withTrace(noPos, errorCtx) + .debugThrow(); + } + return value; +} + +/** + * builtins.attrNames + */ + +static void prim_attrNames(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + state.forceAttrs(*args[0], pos, "while evaluating the argument passed to builtins.attrNames"); + + state.mkList(v, args[0]->attrs->size()); + + size_t n = 0; + for (auto & i : *args[0]->attrs) { + (v.listElems()[n++] = state.allocValue())->mkString(state.symbols[i.name]); + } + + std::sort(v.listElems(), v.listElems() + n, [](Value * v1, Value * v2) { + return strcmp(v1->string.s, v2->string.s) < 0; + }); +} + +static RegisterPrimOp primop_attrNames({ + .name = "__attrNames", + .args = {"set"}, + .doc = R"( + Return the names of the attributes in the set *set* in an + alphabetically sorted list. For instance, `builtins.attrNames { y + = 1; x = "foo"; }` evaluates to `[ "x" "y" ]`. + )", + .fun = prim_attrNames, +}); + +/** + * builtins.attrNames + */ + +static void prim_attrValues(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + state.forceAttrs(*args[0], pos, "while evaluating the argument passed to builtins.attrValues"); + + state.mkList(v, args[0]->attrs->size()); + + unsigned int n = 0; + for (auto & i : *args[0]->attrs) { + v.listElems()[n++] = (Value *) &i; + } + + std::sort(v.listElems(), v.listElems() + n, [&](Value * v1, Value * v2) { + std::string_view s1 = state.symbols[((Attr *) v1)->name], + s2 = state.symbols[((Attr *) v2)->name]; + return s1 < s2; + }); + + for (unsigned int i = 0; i < n; ++i) { + v.listElems()[i] = ((Attr *) v.listElems()[i])->value; + } +} + +static RegisterPrimOp primop_attrValues({ + .name = "__attrValues", + .args = {"set"}, + .doc = R"( + Return the values of the attributes in the set *set* in the order + corresponding to the sorted attribute names. + )", + .fun = prim_attrValues, +}); + +/** + * builtins.catAttrs + */ + +static void prim_catAttrs(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto attrName = state.symbols.create(state.forceStringNoCtx( + *args[0], pos, "while evaluating the first argument passed to builtins.catAttrs" + )); + state.forceList( + *args[1], pos, "while evaluating the second argument passed to builtins.catAttrs" + ); + + SmallValueVector res(args[1]->listSize()); + size_t found = 0; + + for (auto v2 : args[1]->listItems()) { + state.forceAttrs( + *v2, + pos, + "while evaluating an element in the list passed as second argument to builtins.catAttrs" + ); + Bindings::iterator i = v2->attrs->find(attrName); + if (i != v2->attrs->end()) { + res[found++] = i->value; + } + } + + state.mkList(v, found); + for (unsigned int n = 0; n < found; ++n) { + v.listElems()[n] = res[n]; + } +} + +static RegisterPrimOp primop_catAttrs({ + .name = "__catAttrs", + .args = {"attr", "list"}, + .doc = R"( + Collect each attribute named *attr* from a list of attribute + sets. Attrsets that don't contain the named attribute are + ignored. For example, + + ```nix + builtins.catAttrs "a" [{a = 1;} {b = 0;} {a = 2;}] + ``` + + evaluates to `[1 2]`. + )", + .fun = prim_catAttrs, +}); + +/** + * builtins.getAttr + */ + +void prim_getAttr(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto attr = state.forceStringNoCtx( + *args[0], pos, "while evaluating the first argument passed to builtins.getAttr" + ); + state.forceAttrs( + *args[1], pos, "while evaluating the second argument passed to builtins.getAttr" + ); + Bindings::iterator i = getAttr( + state, + state.symbols.create(attr), + args[1]->attrs, + "in the attribute set under consideration" + ); + // !!! add to stack trace? + if (state.countCalls && i->pos) { + state.attrSelects[i->pos]++; + } + state.forceValue(*i->value, pos); + v = *i->value; +} + +static RegisterPrimOp primop_getAttr({ + .name = "__getAttr", + .args = {"s", "set"}, + .doc = R"( + `getAttr` returns the attribute named *s* from *set*. Evaluation + aborts if the attribute doesn’t exist. This is a dynamic version of + the `.` operator, since *s* is an expression rather than an + identifier. + )", + .fun = prim_getAttr, +}); + +/** + * builtins.groupBy + */ + +static void prim_groupBy(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + state.forceFunction( + *args[0], pos, "while evaluating the first argument passed to builtins.groupBy" + ); + state.forceList( + *args[1], pos, "while evaluating the second argument passed to builtins.groupBy" + ); + + ValueVectorMap attrs; + + for (auto vElem : args[1]->listItems()) { + Value res; + state.callFunction(*args[0], *vElem, res, pos); + auto name = state.forceStringNoCtx( + res, + pos, + "while evaluating the return value of the grouping function passed to builtins.groupBy" + ); + auto sym = state.symbols.create(name); + auto vector = attrs.try_emplace(sym, ValueVector()).first; + vector->second.push_back(vElem); + } + + auto attrs2 = state.buildBindings(attrs.size()); + + for (auto & i : attrs) { + auto & list = attrs2.alloc(i.first); + auto size = i.second.size(); + state.mkList(list, size); + memcpy(list.listElems(), i.second.data(), sizeof(Value *) * size); + } + + v.mkAttrs(attrs2.alreadySorted()); +} + +static RegisterPrimOp primop_groupBy({ + .name = "__groupBy", + .args = {"f", "list"}, + .doc = R"( + Groups elements of *list* together by the string returned from the + function *f* called on each element. It returns an attribute set + where each attribute value contains the elements of *list* that are + mapped to the same corresponding attribute name returned by *f*. + + For example, + + ```nix + builtins.groupBy (builtins.substring 0 1) ["foo" "bar" "baz"] + ``` + + evaluates to + + ```nix + { b = [ "bar" "baz" ]; f = [ "foo" ]; } + ``` + )", + .fun = prim_groupBy, +}); + +/** + * builtins.hasAttr + */ + +static void prim_hasAttr(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto attr = state.forceStringNoCtx( + *args[0], pos, "while evaluating the first argument passed to builtins.hasAttr" + ); + state.forceAttrs( + *args[1], pos, "while evaluating the second argument passed to builtins.hasAttr" + ); + v.mkBool(args[1]->attrs->find(state.symbols.create(attr)) != args[1]->attrs->end()); +} + +static RegisterPrimOp primop_hasAttr({ + .name = "__hasAttr", + .args = {"s", "set"}, + .doc = R"( + `hasAttr` returns `true` if *set* has an attribute named *s*, and + `false` otherwise. This is a dynamic version of the `?` operator, + since *s* is an expression rather than an identifier. + )", + .fun = prim_hasAttr, +}); + +/** + * builtins.intersectAttrs + */ + +static void prim_intersectAttrs(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + state.forceAttrs( + *args[0], pos, "while evaluating the first argument passed to builtins.intersectAttrs" + ); + state.forceAttrs( + *args[1], pos, "while evaluating the second argument passed to builtins.intersectAttrs" + ); + + Bindings & left = *args[0]->attrs; + Bindings & right = *args[1]->attrs; + + auto attrs = state.buildBindings(std::min(left.size(), right.size())); + + // The current implementation has good asymptotic complexity and is reasonably + // simple. Further optimization may be possible, but does not seem productive, + // considering the state of eval performance in 2022. + // + // I have looked for reusable and/or standard solutions and these are my + // findings: + // + // STL + // === + // std::set_intersection is not suitable, as it only performs a simultaneous + // linear scan; not taking advantage of random access. This is O(n + m), so + // linear in the largest set, which is not acceptable for callPackage in Nixpkgs. + // + // Simultaneous scan, with alternating simple binary search + // === + // One alternative algorithm scans the attrsets simultaneously, jumping + // forward using `lower_bound` in case of inequality. This should perform + // well on very similar sets, having a local and predictable access pattern. + // On dissimilar sets, it seems to need more comparisons than the current + // algorithm, as few consecutive attrs match. `lower_bound` could take + // advantage of the decreasing remaining search space, but this causes + // the medians to move, which can mean that they don't stay in the cache + // like they would with the current naive `find`. + // + // Double binary search + // === + // The optimal algorithm may be "Double binary search", which doesn't + // scan at all, but rather divides both sets simultaneously. + // See "Fast Intersection Algorithms for Sorted Sequences" by Baeza-Yates et al. + // https://cs.uwaterloo.ca/~ajsaling/papers/intersection_alg_app10.pdf + // The only downsides I can think of are not having a linear access pattern + // for similar sets, and having to maintain a more intricate algorithm. + // + // Adaptive + // === + // Finally one could run try a simultaneous scan, count misses and fall back + // to double binary search when the counter hit some threshold and/or ratio. + + if (left.size() < right.size()) { + for (auto & l : left) { + Bindings::iterator r = right.find(l.name); + if (r != right.end()) { + attrs.insert(*r); + } + } + } else { + for (auto & r : right) { + Bindings::iterator l = left.find(r.name); + if (l != left.end()) { + attrs.insert(r); + } + } + } + + v.mkAttrs(attrs.alreadySorted()); +} + +static RegisterPrimOp primop_intersectAttrs({ + .name = "__intersectAttrs", + .args = {"e1", "e2"}, + .doc = R"( + Return a set consisting of the attributes in the set *e2* which have the + same name as some attribute in *e1*. + + Performs in O(*n* log *m*) where *n* is the size of the smaller set and *m* the larger set's size. + )", + .fun = prim_intersectAttrs, +}); + +/** + * builtins.mapAttrs + */ + +static void prim_mapAttrs(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + state.forceAttrs( + *args[1], pos, "while evaluating the second argument passed to builtins.mapAttrs" + ); + + auto attrs = state.buildBindings(args[1]->attrs->size()); + + for (auto & i : *args[1]->attrs) { + Value * vName = state.allocValue(); + Value * vFun2 = state.allocValue(); + vName->mkString(state.symbols[i.name]); + vFun2->mkApp(args[0], vName); + attrs.alloc(i.name).mkApp(vFun2, i.value); + } + + v.mkAttrs(attrs.alreadySorted()); +} + +static RegisterPrimOp primop_mapAttrs({ + .name = "__mapAttrs", + .args = {"f", "attrset"}, + .doc = R"( + Apply function *f* to every element of *attrset*. For example, + + ```nix + builtins.mapAttrs (name: value: value * 10) { a = 1; b = 2; } + ``` + + evaluates to `{ a = 10; b = 20; }`. + )", + .fun = prim_mapAttrs, +}); + +/** + * builtins.mapAttrs + */ + +static void prim_removeAttrs(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + state.forceAttrs( + *args[0], pos, "while evaluating the first argument passed to builtins.removeAttrs" + ); + state.forceList( + *args[1], pos, "while evaluating the second argument passed to builtins.removeAttrs" + ); + + /* Get the attribute names to be removed. + We keep them as Attrs instead of Symbols so std::set_difference + can be used to remove them from attrs[0]. */ + // 64: large enough to fit the attributes of a derivation + boost::container::small_vector names; + names.reserve(args[1]->listSize()); + for (auto elem : args[1]->listItems()) { + state.forceStringNoCtx( + *elem, + pos, + "while evaluating the values of the second argument passed to builtins.removeAttrs" + ); + names.emplace_back(state.symbols.create(elem->string.s), nullptr); + } + std::sort(names.begin(), names.end()); + + /* Copy all attributes not in that set. Note that we don't need + to sort v.attrs because it's a subset of an already sorted + vector. */ + auto attrs = state.buildBindings(args[0]->attrs->size()); + std::set_difference( + args[0]->attrs->begin(), + args[0]->attrs->end(), + names.begin(), + names.end(), + std::back_inserter(attrs) + ); + v.mkAttrs(attrs.alreadySorted()); +} + +static RegisterPrimOp primop_removeAttrs({ + .name = "removeAttrs", + .args = {"set", "list"}, + .doc = R"( + Remove the attributes listed in *list* from *set*. The attributes + don’t have to exist in *set*. For instance, + + ```nix + removeAttrs { x = 1; y = 2; z = 3; } [ "a" "x" "z" ] + ``` + + evaluates to `{ y = 2; }`. + )", + .fun = prim_removeAttrs, +}); + +/** + * builtins.zipAttrsWith + */ + +static void prim_zipAttrsWith(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + // we will first count how many values are present for each given key. + // we then allocate a single attrset and pre-populate it with lists of + // appropriate sizes, stash the pointers to the list elements of each, + // and populate the lists. after that we replace the list in the every + // attribute with the merge function application. this way we need not + // use (slightly slower) temporary storage the GC does not know about. + + std::map> attrsSeen; + + state.forceFunction( + *args[0], pos, "while evaluating the first argument passed to builtins.zipAttrsWith" + ); + state.forceList( + *args[1], pos, "while evaluating the second argument passed to builtins.zipAttrsWith" + ); + const auto listSize = args[1]->listSize(); + const auto listElems = args[1]->listElems(); + + for (unsigned int n = 0; n < listSize; ++n) { + Value * vElem = listElems[n]; + state.forceAttrs( + *vElem, + noPos, + "while evaluating a value of the list passed as second argument to " + "builtins.zipAttrsWith" + ); + for (auto & attr : *vElem->attrs) { + attrsSeen[attr.name].first++; + } + } + + auto attrs = state.buildBindings(attrsSeen.size()); + for (auto & [sym, elem] : attrsSeen) { + auto & list = attrs.alloc(sym); + state.mkList(list, elem.first); + elem.second = list.listElems(); + } + v.mkAttrs(attrs.alreadySorted()); + + for (unsigned int n = 0; n < listSize; ++n) { + Value * vElem = listElems[n]; + for (auto & attr : *vElem->attrs) { + *attrsSeen[attr.name].second++ = attr.value; + } + } + + for (auto & attr : *v.attrs) { + auto name = state.allocValue(); + name->mkString(state.symbols[attr.name]); + auto call1 = state.allocValue(); + call1->mkApp(args[0], name); + auto call2 = state.allocValue(); + call2->mkApp(call1, attr.value); + attr.value = call2; + } +} + +static RegisterPrimOp primop_zipAttrsWith({ + .name = "__zipAttrsWith", + .args = {"f", "list"}, + .doc = R"( + Transpose a list of attribute sets into an attribute set of lists, + then apply `mapAttrs`. + + `f` receives two arguments: the attribute name and a non-empty + list of all values encountered for that attribute name. + + The result is an attribute set where the attribute names are the + union of the attribute names in each element of `list`. The attribute + values are the return values of `f`. + + ```nix + builtins.zipAttrsWith + (name: values: { inherit name values; }) + [ { a = "x"; } { a = "y"; b = "z"; } ] + ``` + + evaluates to + + ``` + { + a = { name = "a"; values = [ "x" "y" ]; }; + b = { name = "b"; values = [ "z" ]; }; + } + ``` + )", + .fun = prim_zipAttrsWith, +}); + +}