force-inline a few much-used functions
these functions are called a whole lot, and they're all comparatively small. always inlining them gives ~0.7% performance boost on eval. before: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.935 s ± 0.052 s [User: 5.852 s, System: 0.853 s] Range (min … max): 6.808 s … 7.026 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 329.8 ms ± 2.7 ms [User: 299.0 ms, System: 30.8 ms] Range (min … max): 326.6 ms … 336.5 ms 20 runs Benchmark 3: nix flakes eval --raw --impure --file expr.nix Time (mean ± σ): 2.655 s ± 0.038 s [User: 2.364 s, System: 0.220 s] Range (min … max): 2.574 s … 2.737 s 20 runs after: Benchmark 1: nix flakes search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 6.912 s ± 0.036 s [User: 5.823 s, System: 0.856 s] Range (min … max): 6.849 s … 6.980 s 20 runs Benchmark 2: nix flakes eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 325.1 ms ± 2.5 ms [User: 293.2 ms, System: 31.8 ms] Range (min … max): 322.2 ms … 332.8 ms 20 runs Benchmark 3: nix flakes eval --raw --impure --file expr.nix Time (mean ± σ): 2.636 s ± 0.024 s [User: 2.352 s, System: 0.226 s] Range (min … max): 2.574 s … 2.681 s 20 runs
This commit is contained in:
parent
60ed4e908a
commit
c96460f352
3 changed files with 77 additions and 68 deletions
|
@ -24,6 +24,76 @@ LocalNoInlineNoReturn(void throwTypeError(const Pos & pos, const char * s, const
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Note: Various places expect the allocated memory to be zeroed. */
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
inline void * allocBytes(size_t n)
|
||||||
|
{
|
||||||
|
void * p;
|
||||||
|
#if HAVE_BOEHMGC
|
||||||
|
p = GC_MALLOC(n);
|
||||||
|
#else
|
||||||
|
p = calloc(n, 1);
|
||||||
|
#endif
|
||||||
|
if (!p) throw std::bad_alloc();
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
Value * EvalState::allocValue()
|
||||||
|
{
|
||||||
|
/* We use the boehm batch allocator to speed up allocations of Values (of which there are many).
|
||||||
|
GC_malloc_many returns a linked list of objects of the given size, where the first word
|
||||||
|
of each object is also the pointer to the next object in the list. This also means that we
|
||||||
|
have to explicitly clear the first word of every object we take. */
|
||||||
|
if (!*valueAllocCache) {
|
||||||
|
*valueAllocCache = GC_malloc_many(sizeof(Value));
|
||||||
|
if (!*valueAllocCache) throw std::bad_alloc();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* GC_NEXT is a convenience macro for accessing the first word of an object.
|
||||||
|
Take the first list item, advance the list to the next item, and clear the next pointer. */
|
||||||
|
void * p = *valueAllocCache;
|
||||||
|
*valueAllocCache = GC_NEXT(p);
|
||||||
|
GC_NEXT(p) = nullptr;
|
||||||
|
|
||||||
|
nrValues++;
|
||||||
|
return (Value *) p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
Env & EvalState::allocEnv(size_t size)
|
||||||
|
{
|
||||||
|
nrEnvs++;
|
||||||
|
nrValuesInEnvs += size;
|
||||||
|
|
||||||
|
Env * env;
|
||||||
|
|
||||||
|
if (size != 1)
|
||||||
|
env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *));
|
||||||
|
else {
|
||||||
|
/* see allocValue for explanations. */
|
||||||
|
if (!*env1AllocCache) {
|
||||||
|
*env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *));
|
||||||
|
if (!*env1AllocCache) throw std::bad_alloc();
|
||||||
|
}
|
||||||
|
|
||||||
|
void * p = *env1AllocCache;
|
||||||
|
*env1AllocCache = GC_NEXT(p);
|
||||||
|
GC_NEXT(p) = nullptr;
|
||||||
|
env = (Env *) p;
|
||||||
|
}
|
||||||
|
|
||||||
|
env->type = Env::Plain;
|
||||||
|
|
||||||
|
/* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */
|
||||||
|
|
||||||
|
return *env;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
void EvalState::forceValue(Value & v, const Pos & pos)
|
void EvalState::forceValue(Value & v, const Pos & pos)
|
||||||
{
|
{
|
||||||
forceValue(v, [&]() { return pos; });
|
forceValue(v, [&]() { return pos; });
|
||||||
|
@ -52,6 +122,7 @@ void EvalState::forceValue(Value & v, Callable getPos)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
inline void EvalState::forceAttrs(Value & v, const Pos & pos)
|
inline void EvalState::forceAttrs(Value & v, const Pos & pos)
|
||||||
{
|
{
|
||||||
forceAttrs(v, [&]() { return pos; });
|
forceAttrs(v, [&]() { return pos; });
|
||||||
|
@ -59,6 +130,7 @@ inline void EvalState::forceAttrs(Value & v, const Pos & pos)
|
||||||
|
|
||||||
|
|
||||||
template <typename Callable>
|
template <typename Callable>
|
||||||
|
[[gnu::always_inline]]
|
||||||
inline void EvalState::forceAttrs(Value & v, Callable getPos)
|
inline void EvalState::forceAttrs(Value & v, Callable getPos)
|
||||||
{
|
{
|
||||||
forceValue(v, getPos);
|
forceValue(v, getPos);
|
||||||
|
@ -67,6 +139,7 @@ inline void EvalState::forceAttrs(Value & v, Callable getPos)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
inline void EvalState::forceList(Value & v, const Pos & pos)
|
inline void EvalState::forceList(Value & v, const Pos & pos)
|
||||||
{
|
{
|
||||||
forceValue(v, pos);
|
forceValue(v, pos);
|
||||||
|
@ -74,18 +147,5 @@ inline void EvalState::forceList(Value & v, const Pos & pos)
|
||||||
throwTypeError(pos, "value is %1% while a list was expected", v);
|
throwTypeError(pos, "value is %1% while a list was expected", v);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Note: Various places expect the allocated memory to be zeroed. */
|
|
||||||
inline void * allocBytes(size_t n)
|
|
||||||
{
|
|
||||||
void * p;
|
|
||||||
#if HAVE_BOEHMGC
|
|
||||||
p = GC_MALLOC(n);
|
|
||||||
#else
|
|
||||||
p = calloc(n, 1);
|
|
||||||
#endif
|
|
||||||
if (!p) throw std::bad_alloc();
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -867,59 +867,6 @@ inline Value * EvalState::lookupVar(Env * env, const ExprVar & var, bool noEval)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Value * EvalState::allocValue()
|
|
||||||
{
|
|
||||||
/* We use the boehm batch allocator to speed up allocations of Values (of which there are many).
|
|
||||||
GC_malloc_many returns a linked list of objects of the given size, where the first word
|
|
||||||
of each object is also the pointer to the next object in the list. This also means that we
|
|
||||||
have to explicitly clear the first word of every object we take. */
|
|
||||||
if (!*valueAllocCache) {
|
|
||||||
*valueAllocCache = GC_malloc_many(sizeof(Value));
|
|
||||||
if (!*valueAllocCache) throw std::bad_alloc();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* GC_NEXT is a convenience macro for accessing the first word of an object.
|
|
||||||
Take the first list item, advance the list to the next item, and clear the next pointer. */
|
|
||||||
void * p = *valueAllocCache;
|
|
||||||
*valueAllocCache = GC_NEXT(p);
|
|
||||||
GC_NEXT(p) = nullptr;
|
|
||||||
|
|
||||||
nrValues++;
|
|
||||||
auto v = (Value *) p;
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Env & EvalState::allocEnv(size_t size)
|
|
||||||
{
|
|
||||||
nrEnvs++;
|
|
||||||
nrValuesInEnvs += size;
|
|
||||||
|
|
||||||
Env * env;
|
|
||||||
|
|
||||||
if (size != 1)
|
|
||||||
env = (Env *) allocBytes(sizeof(Env) + size * sizeof(Value *));
|
|
||||||
else {
|
|
||||||
/* see allocValue for explanations. */
|
|
||||||
if (!*env1AllocCache) {
|
|
||||||
*env1AllocCache = GC_malloc_many(sizeof(Env) + sizeof(Value *));
|
|
||||||
if (!*env1AllocCache) throw std::bad_alloc();
|
|
||||||
}
|
|
||||||
|
|
||||||
void * p = *env1AllocCache;
|
|
||||||
*env1AllocCache = GC_NEXT(p);
|
|
||||||
GC_NEXT(p) = nullptr;
|
|
||||||
env = (Env *) p;
|
|
||||||
}
|
|
||||||
|
|
||||||
env->type = Env::Plain;
|
|
||||||
|
|
||||||
/* We assume that env->values has been cleared by the allocator; maybeThunk() and lookupVar fromWith expect this. */
|
|
||||||
|
|
||||||
return *env;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void EvalState::mkList(Value & v, size_t size)
|
void EvalState::mkList(Value & v, size_t size)
|
||||||
{
|
{
|
||||||
v.mkList(size);
|
v.mkList(size);
|
||||||
|
|
|
@ -350,8 +350,8 @@ public:
|
||||||
void autoCallFunction(Bindings & args, Value & fun, Value & res);
|
void autoCallFunction(Bindings & args, Value & fun, Value & res);
|
||||||
|
|
||||||
/* Allocation primitives. */
|
/* Allocation primitives. */
|
||||||
Value * allocValue();
|
inline Value * allocValue();
|
||||||
Env & allocEnv(size_t size);
|
inline Env & allocEnv(size_t size);
|
||||||
|
|
||||||
Value * allocAttr(Value & vAttrs, const Symbol & name);
|
Value * allocAttr(Value & vAttrs, const Symbol & name);
|
||||||
Value * allocAttr(Value & vAttrs, std::string_view name);
|
Value * allocAttr(Value & vAttrs, std::string_view name);
|
||||||
|
@ -512,3 +512,5 @@ extern EvalSettings evalSettings;
|
||||||
static const std::string corepkgsPrefix{"/__corepkgs__/"};
|
static const std::string corepkgsPrefix{"/__corepkgs__/"};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include "eval-inline.hh"
|
||||||
|
|
Loading…
Reference in a new issue