force-inline a few much-used functions
these functions are called a whole lot, and they're all comparatively small. always inlining them gives ~0.7% performance boost on eval. before: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.935 s ± 0.052 s [User: 5.852 s, System: 0.853 s] Range (min … max): 6.808 s … 7.026 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 329.8 ms ± 2.7 ms [User: 299.0 ms, System: 30.8 ms] Range (min … max): 326.6 ms … 336.5 ms 20 runs Benchmark 3: nix flakes eval --raw --impure --file expr.nix Time (mean ± σ): 2.655 s ± 0.038 s [User: 2.364 s, System: 0.220 s] Range (min … max): 2.574 s … 2.737 s 20 runs after: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.912 s ± 0.036 s [User: 5.823 s, System: 0.856 s] Range (min … max): 6.849 s … 6.980 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 325.1 ms ± 2.5 ms [User: 293.2 ms, System: 31.8 ms] Range (min … max): 322.2 ms … 332.8 ms 20 runs Benchmark 3: nix flakes eval --raw --impure --file expr.nix Time (mean ± σ): 2.636 s ± 0.024 s [User: 2.352 s, System: 0.226 s] Range (min … max): 2.574 s … 2.681 s 20 runs
This commit is contained in:
parent
60ed4e908a
commit
c96460f352
3 changed files with 77 additions and 68 deletions
|
@ -24,6 +24,76 @@ LocalNoInlineNoReturn(void throwTypeError(const Pos & pos, const char * s, const
|
|||
}
|
||||
|
||||
|
||||
/* Note: Various places expect the allocated memory to be zeroed. */
|
||||
[[gnu::always_inline]]
|
||||
inline void * allocBytes(size_t n)
|
||||
{
|
||||
void * p;
|
||||
#if HAVE_BOEHMGC
|
||||
p = GC_MALLOC(n);
|
||||
#else
|
||||
p = calloc(n, 1);
|
||||
#endif
|
||||
if (!p) throw std::bad_alloc();
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
[[gnu::always_inline]]
|
||||
Value * EvalState::allocValue()
|
||||
{
|
||||
/* We use the boehm batch allocator to speed up allocations of Values (of which there are many).
|
||||
GC_malloc_many returns a linked list of objects of the given size, where the first word
|
||||
of each object is also the pointer to the next object in the list. This also means that we
|
||||
have to explicitly clear the first word of every object we take. */
|
||||
if (!*valueAllocCache) {
|
||||
*valueAllocCache = GC_malloc_many(sizeof(Value));
|
||||
if (!*valueAllocCache) throw std::bad_alloc();
|
||||
}
|
||||
|
||||
/* GC_NEXT is a convenience macro for accessing the first word of an object.
|
||||
Take the first list item, advance the list to the next item, and clear the next pointer. */
|
||||
void * p = *valueAllocCache;
|
||||
*valueAllocCache = GC_NEXT(p);
|
||||
GC_NEXT(p) = nullptr;
|
||||
|
||||
nrValues++;
|
||||
return (Value *) p;
|
||||
}
|
||||
|
||||
|
||||
[[gnu::always_inline]]
|
||||
Env & EvalState::allocEnv(size_t size)
|
||||
{
|
||||
nrEnvs++;
|
||||
nrValuesInEnvs += size;
|
||||
|
||||
Env * env;
|
||||
|
||||
if (size != 1)
|
||||
env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *));
|
||||
else {
|
||||
/* see allocValue for explanations. */
|
||||
if (!*env1AllocCache) {
|
||||
*env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *));
|
||||
if (!*env1AllocCache) throw std::bad_alloc();
|
||||
}
|
||||
|
||||
void * p = *env1AllocCache;
|
||||
*env1AllocCache = GC_NEXT(p);
|
||||
GC_NEXT(p) = nullptr;
|
||||
env = (Env *) p;
|
||||
}
|
||||
|
||||
env->type = Env::Plain;
|
||||
|
||||
/* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */
|
||||
|
||||
return *env;
|
||||
}
|
||||
|
||||
|
||||
[[gnu::always_inline]]
|
||||
void EvalState::forceValue(Value & v, const Pos & pos)
|
||||
{
|
||||
forceValue(v, [&]() { return pos; });
|
||||
|
@ -52,6 +122,7 @@ void EvalState::forceValue(Value & v, Callable getPos)
|
|||
}
|
||||
|
||||
|
||||
[[gnu::always_inline]]
|
||||
inline void EvalState::forceAttrs(Value & v, const Pos & pos)
|
||||
{
|
||||
forceAttrs(v, [&]() { return pos; });
|
||||
|
@ -59,6 +130,7 @@ inline void EvalState::forceAttrs(Value & v, const Pos & pos)
|
|||
|
||||
|
||||
template <typename Callable>
|
||||
[[gnu::always_inline]]
|
||||
inline void EvalState::forceAttrs(Value & v, Callable getPos)
|
||||
{
|
||||
forceValue(v, getPos);
|
||||
|
@ -67,6 +139,7 @@ inline void EvalState::forceAttrs(Value & v, Callable getPos)
|
|||
}
|
||||
|
||||
|
||||
[[gnu::always_inline]]
|
||||
inline void EvalState::forceList(Value & v, const Pos & pos)
|
||||
{
|
||||
forceValue(v, pos);
|
||||
|
@ -74,18 +147,5 @@ inline void EvalState::forceList(Value & v, const Pos & pos)
|
|||
throwTypeError(pos, "value is %1% while a list was expected", v);
|
||||
}
|
||||
|
||||
/* Note: Various places expect the allocated memory to be zeroed. */
|
||||
inline void * allocBytes(size_t n)
|
||||
{
|
||||
void * p;
|
||||
#if HAVE_BOEHMGC
|
||||
p = GC_MALLOC(n);
|
||||
#else
|
||||
p = calloc(n, 1);
|
||||
#endif
|
||||
if (!p) throw std::bad_alloc();
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -867,59 +867,6 @@ inline Value * EvalState::lookupVar(Env * env, const ExprVar & var, bool noEval)
|
|||
}
|
||||
|
||||
|
||||
Value * EvalState::allocValue()
|
||||
{
|
||||
/* We use the boehm batch allocator to speed up allocations of Values (of which there are many).
|
||||
GC_malloc_many returns a linked list of objects of the given size, where the first word
|
||||
of each object is also the pointer to the next object in the list. This also means that we
|
||||
have to explicitly clear the first word of every object we take. */
|
||||
if (!*valueAllocCache) {
|
||||
*valueAllocCache = GC_malloc_many(sizeof(Value));
|
||||
if (!*valueAllocCache) throw std::bad_alloc();
|
||||
}
|
||||
|
||||
/* GC_NEXT is a convenience macro for accessing the first word of an object.
|
||||
Take the first list item, advance the list to the next item, and clear the next pointer. */
|
||||
void * p = *valueAllocCache;
|
||||
*valueAllocCache = GC_NEXT(p);
|
||||
GC_NEXT(p) = nullptr;
|
||||
|
||||
nrValues++;
|
||||
auto v = (Value *) p;
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
Env & EvalState::allocEnv(size_t size)
|
||||
{
|
||||
nrEnvs++;
|
||||
nrValuesInEnvs += size;
|
||||
|
||||
Env * env;
|
||||
|
||||
if (size != 1)
|
||||
env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *));
|
||||
else {
|
||||
/* see allocValue for explanations. */
|
||||
if (!*env1AllocCache) {
|
||||
*env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *));
|
||||
if (!*env1AllocCache) throw std::bad_alloc();
|
||||
}
|
||||
|
||||
void * p = *env1AllocCache;
|
||||
*env1AllocCache = GC_NEXT(p);
|
||||
GC_NEXT(p) = nullptr;
|
||||
env = (Env *) p;
|
||||
}
|
||||
|
||||
env->type = Env::Plain;
|
||||
|
||||
/* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */
|
||||
|
||||
return *env;
|
||||
}
|
||||
|
||||
|
||||
void EvalState::mkList(Value & v, size_t size)
|
||||
{
|
||||
v.mkList(size);
|
||||
|
|
|
@ -350,8 +350,8 @@ public:
|
|||
void autoCallFunction(Bindings & args, Value & fun, Value & res);
|
||||
|
||||
/* Allocation primitives. */
|
||||
Value * allocValue();
|
||||
Env & allocEnv(size_t size);
|
||||
inline Value * allocValue();
|
||||
inline Env & allocEnv(size_t size);
|
||||
|
||||
Value * allocAttr(Value & vAttrs, const Symbol & name);
|
||||
Value * allocAttr(Value & vAttrs, std::string_view name);
|
||||
|
@ -512,3 +512,5 @@ extern EvalSettings evalSettings;
|
|||
static const std::string corepkgsPrefix{"/__corepkgs__/"};
|
||||
|
||||
}
|
||||
|
||||
#include "eval-inline.hh"
|
||||
|
|
Loading…
Reference in a new issue