force-inline a few much-used functions

these functions are called a whole lot, and they're all comparatively small.
always inlining them gives ~0.7% performance boost on eval.

before:

  Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      6.935 s ±  0.052 s    [User: 5.852 s, System: 0.853 s]
    Range (min … max):    6.808 s …  7.026 s    20 runs

  Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     329.8 ms ±   2.7 ms    [User: 299.0 ms, System: 30.8 ms]
    Range (min … max):   326.6 ms … 336.5 ms    20 runs

  Benchmark 3: nix flakes eval --raw --impure --file expr.nix
    Time (mean ± σ):      2.655 s ±  0.038 s    [User: 2.364 s, System: 0.220 s]
    Range (min … max):    2.574 s …  2.737 s    20 runs

after:

  Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      6.912 s ±  0.036 s    [User: 5.823 s, System: 0.856 s]
    Range (min … max):    6.849 s …  6.980 s    20 runs

  Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     325.1 ms ±   2.5 ms    [User: 293.2 ms, System: 31.8 ms]
    Range (min … max):   322.2 ms … 332.8 ms    20 runs

  Benchmark 3: nix flakes eval --raw --impure --file expr.nix
    Time (mean ± σ):      2.636 s ±  0.024 s    [User: 2.352 s, System: 0.226 s]
    Range (min … max):    2.574 s …  2.681 s    20 runs
This commit is contained in:
pennae 2021-12-28 19:18:17 +01:00
parent 60ed4e908a
commit c96460f352
3 changed files with 77 additions and 68 deletions

View file

@ -24,6 +24,76 @@ LocalNoInlineNoReturn(void throwTypeError(const Pos & pos, const char * s, const
}
/* Note: Various places expect the allocated memory to be zeroed. */
[[gnu::always_inline]]
inline void * allocBytes(size_t n)
{
void * p;
#if HAVE_BOEHMGC
p = GC_MALLOC(n);
#else
p = calloc(n, 1);
#endif
if (!p) throw std::bad_alloc();
return p;
}
[[gnu::always_inline]]
Value * EvalState::allocValue()
{
/* We use the boehm batch allocator to speed up allocations of Values (of which there are many).
GC_malloc_many returns a linked list of objects of the given size, where the first word
of each object is also the pointer to the next object in the list. This also means that we
have to explicitly clear the first word of every object we take. */
if (!*valueAllocCache) {
*valueAllocCache = GC_malloc_many(sizeof(Value));
if (!*valueAllocCache) throw std::bad_alloc();
}
/* GC_NEXT is a convenience macro for accessing the first word of an object.
Take the first list item, advance the list to the next item, and clear the next pointer. */
void * p = *valueAllocCache;
*valueAllocCache = GC_NEXT(p);
GC_NEXT(p) = nullptr;
nrValues++;
return (Value *) p;
}
[[gnu::always_inline]]
Env & EvalState::allocEnv(size_t size)
{
nrEnvs++;
nrValuesInEnvs += size;
Env * env;
if (size != 1)
env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *));
else {
/* see allocValue for explanations. */
if (!*env1AllocCache) {
*env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *));
if (!*env1AllocCache) throw std::bad_alloc();
}
void * p = *env1AllocCache;
*env1AllocCache = GC_NEXT(p);
GC_NEXT(p) = nullptr;
env = (Env *) p;
}
env->type = Env::Plain;
/* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */
return *env;
}
[[gnu::always_inline]]
void EvalState::forceValue(Value & v, const Pos & pos)
{
forceValue(v, [&]() { return pos; });
@ -52,6 +122,7 @@ void EvalState::forceValue(Value & v, Callable getPos)
}
[[gnu::always_inline]]
inline void EvalState::forceAttrs(Value & v, const Pos & pos)
{
forceAttrs(v, [&]() { return pos; });
@ -59,6 +130,7 @@ inline void EvalState::forceAttrs(Value & v, const Pos & pos)
template <typename Callable>
[[gnu::always_inline]]
inline void EvalState::forceAttrs(Value & v, Callable getPos)
{
forceValue(v, getPos);
@ -67,6 +139,7 @@ inline void EvalState::forceAttrs(Value & v, Callable getPos)
}
[[gnu::always_inline]]
inline void EvalState::forceList(Value & v, const Pos & pos)
{
forceValue(v, pos);
@ -74,18 +147,5 @@ inline void EvalState::forceList(Value & v, const Pos & pos)
throwTypeError(pos, "value is %1% while a list was expected", v);
}
/* Note: Various places expect the allocated memory to be zeroed. */
inline void * allocBytes(size_t n)
{
void * p;
#if HAVE_BOEHMGC
p = GC_MALLOC(n);
#else
p = calloc(n, 1);
#endif
if (!p) throw std::bad_alloc();
return p;
}
}

View file

@ -867,59 +867,6 @@ inline Value * EvalState::lookupVar(Env * env, const ExprVar & var, bool noEval)
}
Value * EvalState::allocValue()
{
/* We use the boehm batch allocator to speed up allocations of Values (of which there are many).
GC_malloc_many returns a linked list of objects of the given size, where the first word
of each object is also the pointer to the next object in the list. This also means that we
have to explicitly clear the first word of every object we take. */
if (!*valueAllocCache) {
*valueAllocCache = GC_malloc_many(sizeof(Value));
if (!*valueAllocCache) throw std::bad_alloc();
}
/* GC_NEXT is a convenience macro for accessing the first word of an object.
Take the first list item, advance the list to the next item, and clear the next pointer. */
void * p = *valueAllocCache;
*valueAllocCache = GC_NEXT(p);
GC_NEXT(p) = nullptr;
nrValues++;
auto v = (Value *) p;
return v;
}
Env & EvalState::allocEnv(size_t size)
{
nrEnvs++;
nrValuesInEnvs += size;
Env * env;
if (size != 1)
env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *));
else {
/* see allocValue for explanations. */
if (!*env1AllocCache) {
*env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *));
if (!*env1AllocCache) throw std::bad_alloc();
}
void * p = *env1AllocCache;
*env1AllocCache = GC_NEXT(p);
GC_NEXT(p) = nullptr;
env = (Env *) p;
}
env->type = Env::Plain;
/* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */
return *env;
}
void EvalState::mkList(Value & v, size_t size)
{
v.mkList(size);

View file

@ -350,8 +350,8 @@ public:
void autoCallFunction(Bindings & args, Value & fun, Value & res);
/* Allocation primitives. */
Value * allocValue();
Env & allocEnv(size_t size);
inline Value * allocValue();
inline Env & allocEnv(size_t size);
Value * allocAttr(Value & vAttrs, const Symbol & name);
Value * allocAttr(Value & vAttrs, std::string_view name);
@ -512,3 +512,5 @@ extern EvalSettings evalSettings;
static const std::string corepkgsPrefix{"/__corepkgs__/"};
}
#include "eval-inline.hh"