From 4b2b0d3a5528133898a15f3210c79162ec993823 Mon Sep 17 00:00:00 2001 From: pennae Date: Wed, 29 Dec 2021 01:28:58 +0100 Subject: [PATCH 1/6] remove GC_PTR_STORE_AND_DIRTY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit turns out it's only necessary for MANUAL_VDB, which nix doesn't use. omitting them gives a slight performance improvement on eval. before: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.988 s ± 0.061 s [User: 5.935 s, System: 0.845 s] Range (min … max): 6.865 s … 7.075 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 332.6 ms ± 3.9 ms [User: 299.6 ms, System: 32.9 ms] Range (min … max): 328.1 ms … 339.1 ms 20 runs Benchmark 3: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.681 s ± 0.049 s [User: 2.382 s, System: 0.228 s] Range (min … max): 2.607 s … 2.776 s 20 runs after: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.946 s ± 0.041 s [User: 5.875 s, System: 0.835 s] Range (min … max): 6.834 s … 7.005 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 330.3 ms ± 2.5 ms [User: 299.2 ms, System: 30.9 ms] Range (min … max): 327.5 ms … 337.7 ms 20 runs Benchmark 3: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.671 s ± 0.035 s [User: 2.370 s, System: 0.232 s] Range (min … max): 2.597 s … 2.749 s 20 runs --- src/libexpr/eval.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 193358161..777f6a4ec 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -879,7 +879,7 @@ Value * EvalState::allocValue() /* GC_NEXT is a convenience macro for accessing the first word of an object. Take the first list item, advance the list to the next item, and clear the next pointer. */ void * p = *valueAllocCache; - GC_PTR_STORE_AND_DIRTY(&*valueAllocCache, GC_NEXT(p)); + *valueAllocCache = GC_NEXT(p); GC_NEXT(p) = nullptr; nrValues++; From 60ed4e908a59f258f82356a9dd47e43361d39f2f Mon Sep 17 00:00:00 2001 From: pennae Date: Sun, 26 Dec 2021 19:32:08 +0100 Subject: [PATCH 2/6] cache singleton Envs just like Values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vast majority of envs is this size. before: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.946 s ± 0.041 s [User: 5.875 s, System: 0.835 s] Range (min … max): 6.834 s … 7.005 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 330.3 ms ± 2.5 ms [User: 299.2 ms, System: 30.9 ms] Range (min … max): 327.5 ms … 337.7 ms 20 runs Benchmark 3: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.671 s ± 0.035 s [User: 2.370 s, System: 0.232 s] Range (min … max): 2.597 s … 2.749 s 20 runs after: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.935 s ± 0.052 s [User: 5.852 s, System: 0.853 s] Range (min … max): 6.808 s … 7.026 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 329.8 ms ± 2.7 ms [User: 299.0 ms, System: 30.8 ms] Range (min … max): 326.6 ms … 336.5 ms 20 runs Benchmark 3: nix flakes eval --raw --impure --file expr.nix Time (mean ± σ): 2.655 s ± 0.038 s [User: 2.364 s, System: 0.220 s] Range (min … max): 2.574 s … 2.737 s 20 runs --- src/libexpr/eval.cc | 21 ++++++++++++++++++++- src/libexpr/eval.hh | 3 +++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 777f6a4ec..ddbbdeaa6 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -438,8 +438,10 @@ EvalState::EvalState( , regexCache(makeRegexCache()) #if HAVE_BOEHMGC , valueAllocCache(std::allocate_shared(traceable_allocator(), nullptr)) + , env1AllocCache(std::allocate_shared(traceable_allocator(), nullptr)) #else , valueAllocCache(std::make_shared(nullptr)) + , env1AllocCache(std::make_shared(nullptr)) #endif , baseEnv(allocEnv(128)) , staticBaseEnv(false, 0) @@ -892,7 +894,24 @@ Env & EvalState::allocEnv(size_t size) { nrEnvs++; nrValuesInEnvs += size; - Env * env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *)); + + Env * env; + + if (size != 1) + env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *)); + else { + /* see allocValue for explanations. */ + if (!*env1AllocCache) { + *env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *)); + if (!*env1AllocCache) throw std::bad_alloc(); + } + + void * p = *env1AllocCache; + *env1AllocCache = GC_NEXT(p); + GC_NEXT(p) = nullptr; + env = (Env *) p; + } + env->type = Env::Plain; /* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */ diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 800b00eef..41384f044 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -136,6 +136,9 @@ private: /* Allocation cache for GC'd Value objects. */ std::shared_ptr valueAllocCache; + /* Allocation cache for size-1 Env objects. */ + std::shared_ptr env1AllocCache; + public: EvalState( From c96460f3520862d52b7bf3108f609e20384878e7 Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 28 Dec 2021 19:18:17 +0100 Subject: [PATCH 3/6] force-inline a few much-used functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit these functions are called a whole lot, and they're all comparatively small. always inlining them gives ~0.7% performance boost on eval. before: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.935 s ± 0.052 s [User: 5.852 s, System: 0.853 s] Range (min … max): 6.808 s … 7.026 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 329.8 ms ± 2.7 ms [User: 299.0 ms, System: 30.8 ms] Range (min … max): 326.6 ms … 336.5 ms 20 runs Benchmark 3: nix flakes eval --raw --impure --file expr.nix Time (mean ± σ): 2.655 s ± 0.038 s [User: 2.364 s, System: 0.220 s] Range (min … max): 2.574 s … 2.737 s 20 runs after: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.912 s ± 0.036 s [User: 5.823 s, System: 0.856 s] Range (min … max): 6.849 s … 6.980 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 325.1 ms ± 2.5 ms [User: 293.2 ms, System: 31.8 ms] Range (min … max): 322.2 ms … 332.8 ms 20 runs Benchmark 3: nix flakes eval --raw --impure --file expr.nix Time (mean ± σ): 2.636 s ± 0.024 s [User: 2.352 s, System: 0.226 s] Range (min … max): 2.574 s … 2.681 s 20 runs --- src/libexpr/eval-inline.hh | 86 ++++++++++++++++++++++++++++++++------ src/libexpr/eval.cc | 53 ----------------------- src/libexpr/eval.hh | 6 ++- 3 files changed, 77 insertions(+), 68 deletions(-) diff --git a/src/libexpr/eval-inline.hh b/src/libexpr/eval-inline.hh index aef1f6351..3331a7643 100644 --- a/src/libexpr/eval-inline.hh +++ b/src/libexpr/eval-inline.hh @@ -24,6 +24,76 @@ LocalNoInlineNoReturn(void throwTypeError(const Pos & pos, const char * s, const } +/* Note: Various places expect the allocated memory to be zeroed. */ +[[gnu::always_inline]] +inline void * allocBytes(size_t n) +{ + void * p; +#if HAVE_BOEHMGC + p = GC_MALLOC(n); +#else + p = calloc(n, 1); +#endif + if (!p) throw std::bad_alloc(); + return p; +} + + +[[gnu::always_inline]] +Value * EvalState::allocValue() +{ + /* We use the boehm batch allocator to speed up allocations of Values (of which there are many). + GC_malloc_many returns a linked list of objects of the given size, where the first word + of each object is also the pointer to the next object in the list. This also means that we + have to explicitly clear the first word of every object we take. */ + if (!*valueAllocCache) { + *valueAllocCache = GC_malloc_many(sizeof(Value)); + if (!*valueAllocCache) throw std::bad_alloc(); + } + + /* GC_NEXT is a convenience macro for accessing the first word of an object. + Take the first list item, advance the list to the next item, and clear the next pointer. */ + void * p = *valueAllocCache; + *valueAllocCache = GC_NEXT(p); + GC_NEXT(p) = nullptr; + + nrValues++; + return (Value *) p; +} + + +[[gnu::always_inline]] +Env & EvalState::allocEnv(size_t size) +{ + nrEnvs++; + nrValuesInEnvs += size; + + Env * env; + + if (size != 1) + env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *)); + else { + /* see allocValue for explanations. */ + if (!*env1AllocCache) { + *env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *)); + if (!*env1AllocCache) throw std::bad_alloc(); + } + + void * p = *env1AllocCache; + *env1AllocCache = GC_NEXT(p); + GC_NEXT(p) = nullptr; + env = (Env *) p; + } + + env->type = Env::Plain; + + /* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */ + + return *env; +} + + +[[gnu::always_inline]] void EvalState::forceValue(Value & v, const Pos & pos) { forceValue(v, [&]() { return pos; }); @@ -52,6 +122,7 @@ void EvalState::forceValue(Value & v, Callable getPos) } +[[gnu::always_inline]] inline void EvalState::forceAttrs(Value & v, const Pos & pos) { forceAttrs(v, [&]() { return pos; }); @@ -59,6 +130,7 @@ inline void EvalState::forceAttrs(Value & v, const Pos & pos) template +[[gnu::always_inline]] inline void EvalState::forceAttrs(Value & v, Callable getPos) { forceValue(v, getPos); @@ -67,6 +139,7 @@ inline void EvalState::forceAttrs(Value & v, Callable getPos) } +[[gnu::always_inline]] inline void EvalState::forceList(Value & v, const Pos & pos) { forceValue(v, pos); @@ -74,18 +147,5 @@ inline void EvalState::forceList(Value & v, const Pos & pos) throwTypeError(pos, "value is %1% while a list was expected", v); } -/* Note: Various places expect the allocated memory to be zeroed. */ -inline void * allocBytes(size_t n) -{ - void * p; -#if HAVE_BOEHMGC - p = GC_MALLOC(n); -#else - p = calloc(n, 1); -#endif - if (!p) throw std::bad_alloc(); - return p; -} - } diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index ddbbdeaa6..038b6bb7c 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -867,59 +867,6 @@ inline Value * EvalState::lookupVar(Env * env, const ExprVar & var, bool noEval) } -Value * EvalState::allocValue() -{ - /* We use the boehm batch allocator to speed up allocations of Values (of which there are many). - GC_malloc_many returns a linked list of objects of the given size, where the first word - of each object is also the pointer to the next object in the list. This also means that we - have to explicitly clear the first word of every object we take. */ - if (!*valueAllocCache) { - *valueAllocCache = GC_malloc_many(sizeof(Value)); - if (!*valueAllocCache) throw std::bad_alloc(); - } - - /* GC_NEXT is a convenience macro for accessing the first word of an object. - Take the first list item, advance the list to the next item, and clear the next pointer. */ - void * p = *valueAllocCache; - *valueAllocCache = GC_NEXT(p); - GC_NEXT(p) = nullptr; - - nrValues++; - auto v = (Value *) p; - return v; -} - - -Env & EvalState::allocEnv(size_t size) -{ - nrEnvs++; - nrValuesInEnvs += size; - - Env * env; - - if (size != 1) - env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *)); - else { - /* see allocValue for explanations. */ - if (!*env1AllocCache) { - *env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *)); - if (!*env1AllocCache) throw std::bad_alloc(); - } - - void * p = *env1AllocCache; - *env1AllocCache = GC_NEXT(p); - GC_NEXT(p) = nullptr; - env = (Env *) p; - } - - env->type = Env::Plain; - - /* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */ - - return *env; -} - - void EvalState::mkList(Value & v, size_t size) { v.mkList(size); diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 41384f044..d2efe8a47 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -350,8 +350,8 @@ public: void autoCallFunction(Bindings & args, Value & fun, Value & res); /* Allocation primitives. */ - Value * allocValue(); - Env & allocEnv(size_t size); + inline Value * allocValue(); + inline Env & allocEnv(size_t size); Value * allocAttr(Value & vAttrs, const Symbol & name); Value * allocAttr(Value & vAttrs, std::string_view name); @@ -512,3 +512,5 @@ extern EvalSettings evalSettings; static const std::string corepkgsPrefix{"/__corepkgs__/"}; } + +#include "eval-inline.hh" From 47baa9d43c0339b0a738b9b75c5ddcfb07d7131d Mon Sep 17 00:00:00 2001 From: pennae Date: Sat, 1 Jan 2022 19:24:20 +0100 Subject: [PATCH 4/6] make Pos smaller reduces peak hep memory use on eval of our test system from 264.4MB to 242.3MB, possibly also a slight performance boost. theoretically memory use could be cut down by another eight bytes per Pos on average by turning it into a tuple containing an index into a global base position table with row and column offsets, but that doesn't seem worth the effort at this point. --- src/libexpr/nixexpr.hh | 13 ++++++------- src/libutil/error.hh | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libexpr/nixexpr.hh b/src/libexpr/nixexpr.hh index 12b54b8eb..4dbe31510 100644 --- a/src/libexpr/nixexpr.hh +++ b/src/libexpr/nixexpr.hh @@ -23,14 +23,13 @@ MakeError(RestrictedPathError, Error); struct Pos { - FileOrigin origin; Symbol file; - unsigned int line, column; - - Pos() : origin(foString), line(0), column(0) { } - Pos(FileOrigin origin, const Symbol & file, unsigned int line, unsigned int column) - : origin(origin), file(file), line(line), column(column) { } - + uint32_t line; + FileOrigin origin:2; + uint32_t column:30; + Pos() : line(0), origin(foString), column(0) { }; + Pos(FileOrigin origin, const Symbol & file, uint32_t line, uint32_t column) + : file(file), line(line), origin(origin), column(column) { }; operator bool() const { return line != 0; diff --git a/src/libutil/error.hh b/src/libutil/error.hh index d55e1d701..bb43aa53b 100644 --- a/src/libutil/error.hh +++ b/src/libutil/error.hh @@ -53,6 +53,7 @@ typedef enum { lvlVomit } Verbosity; +/* adjust Pos::origin bit width when adding stuff here */ typedef enum { foFile, foStdin, From 8e2eaaaf69d9e216fce3ca6f7913bd0e2048e4b2 Mon Sep 17 00:00:00 2001 From: pennae Date: Fri, 31 Dec 2021 00:50:23 +0100 Subject: [PATCH 5/6] make Finally more local no need for function<> with c++17 deduction. this saves allocations and virtual calls, but has the same semantics otherwise. not going through function has the side effect of giving compilers more insight into the cleanup code, so we need a few local warning disables. --- src/libutil/finally.hh | 7 +++---- src/libutil/util.cc | 15 ++++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/libutil/finally.hh b/src/libutil/finally.hh index 7760cfe9a..dee2e8d2f 100644 --- a/src/libutil/finally.hh +++ b/src/libutil/finally.hh @@ -1,14 +1,13 @@ #pragma once -#include - /* A trivial class to run a function at the end of a scope. */ +template class Finally { private: - std::function fun; + Fn fun; public: - Finally(std::function fun) : fun(fun) { } + Finally(Fn fun) : fun(std::move(fun)) { } ~Finally() { fun(); } }; diff --git a/src/libutil/util.cc b/src/libutil/util.cc index b833038a9..9f13d5f02 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -682,7 +682,14 @@ std::string drainFD(int fd, bool block, const size_t reserveSize) void drainFD(int fd, Sink & sink, bool block) { - int saved; + // silence GCC maybe-uninitialized warning in finally + int saved = 0; + + if (!block) { + saved = fcntl(fd, F_GETFL); + if (fcntl(fd, F_SETFL, saved | O_NONBLOCK) == -1) + throw SysError("making file descriptor non-blocking"); + } Finally finally([&]() { if (!block) { @@ -691,12 +698,6 @@ void drainFD(int fd, Sink & sink, bool block) } }); - if (!block) { - saved = fcntl(fd, F_GETFL); - if (fcntl(fd, F_SETFL, saved | O_NONBLOCK) == -1) - throw SysError("making file descriptor non-blocking"); - } - std::vector buf(64 * 1024); while (1) { checkInterrupt(); From 4d629c4f7abbbe58dfe6d9d2b37541cdf2331606 Mon Sep 17 00:00:00 2001 From: pennae Date: Wed, 5 Jan 2022 01:48:26 +0100 Subject: [PATCH 6/6] add HAVE_BOEHMGC guards to batched allocation functions --- src/libexpr/eval-inline.hh | 13 +++++++++---- src/libexpr/eval.hh | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/libexpr/eval-inline.hh b/src/libexpr/eval-inline.hh index 3331a7643..08a419923 100644 --- a/src/libexpr/eval-inline.hh +++ b/src/libexpr/eval-inline.hh @@ -42,6 +42,7 @@ inline void * allocBytes(size_t n) [[gnu::always_inline]] Value * EvalState::allocValue() { +#if HAVE_BOEHMGC /* We use the boehm batch allocator to speed up allocations of Values (of which there are many). GC_malloc_many returns a linked list of objects of the given size, where the first word of each object is also the pointer to the next object in the list. This also means that we @@ -56,6 +57,9 @@ Value * EvalState::allocValue() void * p = *valueAllocCache; *valueAllocCache = GC_NEXT(p); GC_NEXT(p) = nullptr; +#else + void * p = allocBytes(sizeof(Value)); +#endif nrValues++; return (Value *) p; @@ -70,9 +74,8 @@ Env & EvalState::allocEnv(size_t size) Env * env; - if (size != 1) - env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *)); - else { +#if HAVE_BOEHMGC + if (size == 1) { /* see allocValue for explanations. */ if (!*env1AllocCache) { *env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *)); @@ -83,7 +86,9 @@ Env & EvalState::allocEnv(size_t size) *env1AllocCache = GC_NEXT(p); GC_NEXT(p) = nullptr; env = (Env *) p; - } + } else +#endif + env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *)); env->type = Env::Plain; diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index d2efe8a47..f1e00bae7 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -133,11 +133,13 @@ private: /* Cache used by prim_match(). */ std::shared_ptr regexCache; +#if HAVE_BOEHMGC /* Allocation cache for GC'd Value objects. */ std::shared_ptr valueAllocCache; /* Allocation cache for size-1 Env objects. */ std::shared_ptr env1AllocCache; +#endif public: