From c96460f3520862d52b7bf3108f609e20384878e7 Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 28 Dec 2021 19:18:17 +0100 Subject: [PATCH] force-inline a few much-used functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit these functions are called a whole lot, and they're all comparatively small. always inlining them gives ~0.7% performance boost on eval. before: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.935 s ± 0.052 s [User: 5.852 s, System: 0.853 s] Range (min … max): 6.808 s … 7.026 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 329.8 ms ± 2.7 ms [User: 299.0 ms, System: 30.8 ms] Range (min … max): 326.6 ms … 336.5 ms 20 runs Benchmark 3: nix flakes eval --raw --impure --file expr.nix Time (mean ± σ): 2.655 s ± 0.038 s [User: 2.364 s, System: 0.220 s] Range (min … max): 2.574 s … 2.737 s 20 runs after: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.912 s ± 0.036 s [User: 5.823 s, System: 0.856 s] Range (min … max): 6.849 s … 6.980 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 325.1 ms ± 2.5 ms [User: 293.2 ms, System: 31.8 ms] Range (min … max): 322.2 ms … 332.8 ms 20 runs Benchmark 3: nix flakes eval --raw --impure --file expr.nix Time (mean ± σ): 2.636 s ± 0.024 s [User: 2.352 s, System: 0.226 s] Range (min … max): 2.574 s … 2.681 s 20 runs --- src/libexpr/eval-inline.hh | 86 ++++++++++++++++++++++++++++++++------ src/libexpr/eval.cc | 53 ----------------------- src/libexpr/eval.hh | 6 ++- 3 files changed, 77 insertions(+), 68 deletions(-) diff --git a/src/libexpr/eval-inline.hh b/src/libexpr/eval-inline.hh index aef1f6351..3331a7643 100644 --- a/src/libexpr/eval-inline.hh +++ b/src/libexpr/eval-inline.hh @@ -24,6 +24,76 @@ LocalNoInlineNoReturn(void throwTypeError(const Pos & pos, const char * s, const } +/* Note: Various places expect the allocated memory to be zeroed. */ +[[gnu::always_inline]] +inline void * allocBytes(size_t n) +{ + void * p; +#if HAVE_BOEHMGC + p = GC_MALLOC(n); +#else + p = calloc(n, 1); +#endif + if (!p) throw std::bad_alloc(); + return p; +} + + +[[gnu::always_inline]] +Value * EvalState::allocValue() +{ + /* We use the boehm batch allocator to speed up allocations of Values (of which there are many). + GC_malloc_many returns a linked list of objects of the given size, where the first word + of each object is also the pointer to the next object in the list. This also means that we + have to explicitly clear the first word of every object we take. */ + if (!*valueAllocCache) { + *valueAllocCache = GC_malloc_many(sizeof(Value)); + if (!*valueAllocCache) throw std::bad_alloc(); + } + + /* GC_NEXT is a convenience macro for accessing the first word of an object. + Take the first list item, advance the list to the next item, and clear the next pointer. */ + void * p = *valueAllocCache; + *valueAllocCache = GC_NEXT(p); + GC_NEXT(p) = nullptr; + + nrValues++; + return (Value *) p; +} + + +[[gnu::always_inline]] +Env & EvalState::allocEnv(size_t size) +{ + nrEnvs++; + nrValuesInEnvs += size; + + Env * env; + + if (size != 1) + env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *)); + else { + /* see allocValue for explanations. */ + if (!*env1AllocCache) { + *env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *)); + if (!*env1AllocCache) throw std::bad_alloc(); + } + + void * p = *env1AllocCache; + *env1AllocCache = GC_NEXT(p); + GC_NEXT(p) = nullptr; + env = (Env *) p; + } + + env->type = Env::Plain; + + /* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */ + + return *env; +} + + +[[gnu::always_inline]] void EvalState::forceValue(Value & v, const Pos & pos) { forceValue(v, [&]() { return pos; }); @@ -52,6 +122,7 @@ void EvalState::forceValue(Value & v, Callable getPos) } +[[gnu::always_inline]] inline void EvalState::forceAttrs(Value & v, const Pos & pos) { forceAttrs(v, [&]() { return pos; }); @@ -59,6 +130,7 @@ inline void EvalState::forceAttrs(Value & v, const Pos & pos) template +[[gnu::always_inline]] inline void EvalState::forceAttrs(Value & v, Callable getPos) { forceValue(v, getPos); @@ -67,6 +139,7 @@ inline void EvalState::forceAttrs(Value & v, Callable getPos) } +[[gnu::always_inline]] inline void EvalState::forceList(Value & v, const Pos & pos) { forceValue(v, pos); @@ -74,18 +147,5 @@ inline void EvalState::forceList(Value & v, const Pos & pos) throwTypeError(pos, "value is %1% while a list was expected", v); } -/* Note: Various places expect the allocated memory to be zeroed. */ -inline void * allocBytes(size_t n) -{ - void * p; -#if HAVE_BOEHMGC - p = GC_MALLOC(n); -#else - p = calloc(n, 1); -#endif - if (!p) throw std::bad_alloc(); - return p; -} - } diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index ddbbdeaa6..038b6bb7c 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -867,59 +867,6 @@ inline Value * EvalState::lookupVar(Env * env, const ExprVar & var, bool noEval) } -Value * EvalState::allocValue() -{ - /* We use the boehm batch allocator to speed up allocations of Values (of which there are many). - GC_malloc_many returns a linked list of objects of the given size, where the first word - of each object is also the pointer to the next object in the list. This also means that we - have to explicitly clear the first word of every object we take. */ - if (!*valueAllocCache) { - *valueAllocCache = GC_malloc_many(sizeof(Value)); - if (!*valueAllocCache) throw std::bad_alloc(); - } - - /* GC_NEXT is a convenience macro for accessing the first word of an object. - Take the first list item, advance the list to the next item, and clear the next pointer. */ - void * p = *valueAllocCache; - *valueAllocCache = GC_NEXT(p); - GC_NEXT(p) = nullptr; - - nrValues++; - auto v = (Value *) p; - return v; -} - - -Env & EvalState::allocEnv(size_t size) -{ - nrEnvs++; - nrValuesInEnvs += size; - - Env * env; - - if (size != 1) - env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *)); - else { - /* see allocValue for explanations. */ - if (!*env1AllocCache) { - *env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *)); - if (!*env1AllocCache) throw std::bad_alloc(); - } - - void * p = *env1AllocCache; - *env1AllocCache = GC_NEXT(p); - GC_NEXT(p) = nullptr; - env = (Env *) p; - } - - env->type = Env::Plain; - - /* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */ - - return *env; -} - - void EvalState::mkList(Value & v, size_t size) { v.mkList(size); diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 41384f044..d2efe8a47 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -350,8 +350,8 @@ public: void autoCallFunction(Bindings & args, Value & fun, Value & res); /* Allocation primitives. */ - Value * allocValue(); - Env & allocEnv(size_t size); + inline Value * allocValue(); + inline Env & allocEnv(size_t size); Value * allocAttr(Value & vAttrs, const Symbol & name); Value * allocAttr(Value & vAttrs, std::string_view name); @@ -512,3 +512,5 @@ extern EvalSettings evalSettings; static const std::string corepkgsPrefix{"/__corepkgs__/"}; } + +#include "eval-inline.hh"