From 90700736c7744b97ab69a4fadcd56fa242ec617f Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Thu, 2 Dec 2021 17:46:44 +0100 Subject: [PATCH] Introduce builtins.groupBy primop This function is very useful in nixpkgs, but its implementation in Nix itself is rather slow due to it requiring a lot of attribute set and list appends. --- doc/manual/src/release-notes/rl-next.md | 3 ++ src/libexpr/primops.cc | 50 +++++++++++++++++++++++++ src/libexpr/value.hh | 2 + 3 files changed, 55 insertions(+) diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 26c7d2cce..a6b22dfa7 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -5,3 +5,6 @@ * `nix develop` now has a flag `--unpack` to run `unpackPhase`. * Lists can now be compared lexicographically using the `<` operator. + +* New built-in function: `builtins.groupBy`, with the same functionality as + Nixpkgs' `lib.groupBy`, but faster. diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index d1f4d9009..66af373d7 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -2928,6 +2928,56 @@ static RegisterPrimOp primop_partition({ .fun = prim_partition, }); +static void prim_groupBy(EvalState & state, const Pos & pos, Value * * args, Value & v) +{ + state.forceFunction(*args[0], pos); + state.forceList(*args[1], pos); + + ValueVectorMap attrs; + + for (auto vElem : args[1]->listItems()) { + Value res; + state.callFunction(*args[0], *vElem, res, pos); + string name = state.forceStringNoCtx(res, pos); + Symbol sym = state.symbols.create(name); + auto vector = attrs.try_emplace(sym, ValueVector()).first; + vector->second.push_back(vElem); + } + + state.mkAttrs(v, attrs.size()); + + for (auto & i : attrs) { + Value * list = state.allocAttr(v, i.first); + auto size = i.second.size(); + state.mkList(*list, size); + memcpy(list->listElems(), i.second.data(), sizeof(Value *) * size); + } +} + +static RegisterPrimOp primop_groupBy({ + .name = "__groupBy", + .args = {"f", "list"}, + .doc = R"( + Groups elements of *list* together by the string returned from the + function *f* called on each element. It returns an attribute set + where each attribute value contains the elements of *list* that are + mapped to the same corresponding attribute name returned by *f*. + + For example, + + ```nix + builtins.groupBy (builtins.substring 0 1) ["foo" "bar" "baz"] + ``` + + evaluates to + + ```nix + { b = [ "bar" "baz" ]; f = [ "foo" ]; } + ``` + )", + .fun = prim_groupBy, +}); + static void prim_concatMap(EvalState & state, const Pos & pos, Value * * args, Value & v) { state.forceFunction(*args[0], pos); diff --git a/src/libexpr/value.hh b/src/libexpr/value.hh index 4b43e47ae..6b4f3c0ae 100644 --- a/src/libexpr/value.hh +++ b/src/libexpr/value.hh @@ -425,9 +425,11 @@ void mkPath(Value & v, const char * s); #if HAVE_BOEHMGC typedef std::vector > ValueVector; typedef std::map, traceable_allocator > > ValueMap; +typedef std::map, traceable_allocator > > ValueVectorMap; #else typedef std::vector ValueVector; typedef std::map ValueMap; +typedef std::map ValueVectorMap; #endif