lix/src/libexpr/eval-inline.hh
pennae c96460f352 force-inline a few much-used functions
these functions are called a whole lot, and they're all comparatively small.
always inlining them gives ~0.7% performance boost on eval.

before:

  Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      6.935 s ±  0.052 s    [User: 5.852 s, System: 0.853 s]
    Range (min … max):    6.808 s …  7.026 s    20 runs

  Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     329.8 ms ±   2.7 ms    [User: 299.0 ms, System: 30.8 ms]
    Range (min … max):   326.6 ms … 336.5 ms    20 runs

  Benchmark 3: nix flakes eval --raw --impure --file expr.nix
    Time (mean ± σ):      2.655 s ±  0.038 s    [User: 2.364 s, System: 0.220 s]
    Range (min … max):    2.574 s …  2.737 s    20 runs

after:

  Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      6.912 s ±  0.036 s    [User: 5.823 s, System: 0.856 s]
    Range (min … max):    6.849 s …  6.980 s    20 runs

  Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     325.1 ms ±   2.5 ms    [User: 293.2 ms, System: 31.8 ms]
    Range (min … max):   322.2 ms … 332.8 ms    20 runs

  Benchmark 3: nix flakes eval --raw --impure --file expr.nix
    Time (mean ± σ):      2.636 s ±  0.024 s    [User: 2.352 s, System: 0.226 s]
    Range (min … max):    2.574 s …  2.681 s    20 runs
2022-03-08 23:30:18 +01:00

152 lines
3.7 KiB
C++

#pragma once
#include "eval.hh"
#define LocalNoInline(f) static f __attribute__((noinline)); f
#define LocalNoInlineNoReturn(f) static f __attribute__((noinline, noreturn)); f
namespace nix {
LocalNoInlineNoReturn(void throwEvalError(const Pos & pos, const char * s))
{
throw EvalError({
.msg = hintfmt(s),
.errPos = pos
});
}
LocalNoInlineNoReturn(void throwTypeError(const Pos & pos, const char * s, const Value & v))
{
throw TypeError({
.msg = hintfmt(s, showType(v)),
.errPos = pos
});
}
/* Note: Various places expect the allocated memory to be zeroed. */
[[gnu::always_inline]]
inline void * allocBytes(size_t n)
{
void * p;
#if HAVE_BOEHMGC
p = GC_MALLOC(n);
#else
p = calloc(n, 1);
#endif
if (!p) throw std::bad_alloc();
return p;
}
[[gnu::always_inline]]
Value * EvalState::allocValue()
{
/* We use the boehm batch allocator to speed up allocations of Values (of which there are many).
GC_malloc_many returns a linked list of objects of the given size, where the first word
of each object is also the pointer to the next object in the list. This also means that we
have to explicitly clear the first word of every object we take. */
if (!*valueAllocCache) {
*valueAllocCache = GC_malloc_many(sizeof(Value));
if (!*valueAllocCache) throw std::bad_alloc();
}
/* GC_NEXT is a convenience macro for accessing the first word of an object.
Take the first list item, advance the list to the next item, and clear the next pointer. */
void * p = *valueAllocCache;
*valueAllocCache = GC_NEXT(p);
GC_NEXT(p) = nullptr;
nrValues++;
return (Value *) p;
}
[[gnu::always_inline]]
Env & EvalState::allocEnv(size_t size)
{
nrEnvs++;
nrValuesInEnvs += size;
Env * env;
if (size != 1)
env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *));
else {
/* see allocValue for explanations. */
if (!*env1AllocCache) {
*env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *));
if (!*env1AllocCache) throw std::bad_alloc();
}
void * p = *env1AllocCache;
*env1AllocCache = GC_NEXT(p);
GC_NEXT(p) = nullptr;
env = (Env *) p;
}
env->type = Env::Plain;
/* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */
return *env;
}
[[gnu::always_inline]]
void EvalState::forceValue(Value & v, const Pos & pos)
{
forceValue(v, [&]() { return pos; });
}
template<typename Callable>
void EvalState::forceValue(Value & v, Callable getPos)
{
if (v.isThunk()) {
Env * env = v.thunk.env;
Expr * expr = v.thunk.expr;
try {
v.mkBlackhole();
//checkInterrupt();
expr->eval(*this, *env, v);
} catch (...) {
v.mkThunk(env, expr);
throw;
}
}
else if (v.isApp())
callFunction(*v.app.left, *v.app.right, v, noPos);
else if (v.isBlackhole())
throwEvalError(getPos(), "infinite recursion encountered");
}
[[gnu::always_inline]]
inline void EvalState::forceAttrs(Value & v, const Pos & pos)
{
forceAttrs(v, [&]() { return pos; });
}
template <typename Callable>
[[gnu::always_inline]]
inline void EvalState::forceAttrs(Value & v, Callable getPos)
{
forceValue(v, getPos);
if (v.type() != nAttrs)
throwTypeError(getPos(), "value is %1% while a set was expected", v);
}
[[gnu::always_inline]]
inline void EvalState::forceList(Value & v, const Pos & pos)
{
forceValue(v, pos);
if (!v.isList())
throwTypeError(pos, "value is %1% while a list was expected", v);
}
}