From a9f026f8b180f098f0b1f78d2995ee545a145b9e Mon Sep 17 00:00:00 2001 From: Mel Zuser Date: Wed, 15 May 2024 20:38:02 -0700 Subject: [PATCH] libexpr: store lazy string lengths Splitup the internal rep of string to allow for storing string lengths without hurting overall eval performance. If the length is not known at the time the value is created, it will be cached after the first call to str(). This fixes O(n) substring, O(n^2) stringToCharacters, etc. Change-Id: I10a6f16544f6619b0726f77b617acd517ee87a00 --- src/libexpr/eval.cc | 59 ++++++++++++++++-------- src/libexpr/primops.cc | 2 +- src/libexpr/value.hh | 101 +++++++++++++++++++++++++++++++++++------ 3 files changed, 127 insertions(+), 35 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index d1f3e59d9..f727f9a7b 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -94,6 +94,14 @@ static const char * makeImmutableString(std::string_view s) return t; } +static Value::StringMeta * makeStringMeta(size_t size, const char * * context) +{ + auto meta = (Value::StringMeta *) allocBytes(sizeof(Value::StringMeta)); + meta->size = size; + meta->context = context; + return meta; +} + RootValue allocRootValue(Value * v) { @@ -157,7 +165,10 @@ std::string showType(const Value & v) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wswitch-enum" switch (v.internalType) { - case tString: return v.string.context ? "a string with context" : "a string"; + case tStringEmpty: return v.stringContext() ? "a string with context" : "a string"; + case tStringUnknownSize: return "a string"; + case tStringKnownSize: return "a string"; + case tStringWithContext: return "a string with context"; case tPrimOp: return fmt("the built-in function '%s'", std::string(v.primOp->name)); case tPrimOpApp: @@ -877,34 +888,41 @@ DebugTraceStacker::DebugTraceStacker(EvalState & evalState, DebugTrace t) evalState.runDebugRepl(nullptr, trace.env, trace.expr); } -void Value::mkString(std::string_view s) +void Value::mkString(const char * s, size_t size, const char * * context) { - mkString(makeImmutableString(s)); + + if (size == 0) { + mkEmptyString(context); + } else if (! context) { + mkStringKnownSize(s, size); + } else { + mkStringWithContext(s, makeStringMeta(size, context)); + } } - -static void copyContextToValue(Value & v, const NixStringContext & context) +void Value::mkString(std::string_view s) { - if (!context.empty()) { - size_t n = 0; - v.string.context = (const char * *) - allocBytes((context.size() + 1) * sizeof(char *)); - for (auto & i : context) - v.string.context[n++] = dupString(i.to_string().c_str()); - v.string.context[n] = 0; - } + mkString(makeImmutableString(s), s.size()); } void Value::mkString(std::string_view s, const NixStringContext & context) { - mkString(s); - copyContextToValue(*this, context); + mkStringMove(makeImmutableString(s), s.size(), context); } -void Value::mkStringMove(const char * s, const NixStringContext & context) +void Value::mkStringMove(const char * s, size_t size, const NixStringContext & context) { - mkString(s); - copyContextToValue(*this, context); + if (context.empty()) + mkString(s, size); + else { + auto vContext = (const char * *) allocBytes((context.size() + 1) * sizeof(char *)); + mkString(s, size, vContext); + + size_t n = 0; + for (auto & i : context) + vContext[n++] = dupString(i.to_string().c_str()); + vContext[n] = 0; + } } @@ -2091,8 +2109,9 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v) if (!context.empty()) state.error("a string that refers to a store path cannot be appended to a path").atPos(pos).withFrame(env, *this).debugThrow(); v.mkPath(CanonPath(canonPath(str()))); - } else - v.mkStringMove(c_str(), context); + } else { + v.mkStringMove(c_str(), sSize, context); + } } diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 2d4eb9d33..6aafe23f5 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -3740,7 +3740,7 @@ static void prim_substring(EvalState & state, const PosIdx pos, Value * * args, if (len == 0) { state.forceValue(*args[2], pos); if (args[2]->type() == nString) { - v.mkString("", args[2]->stringContext()); + v.mkEmptyString(args[2]->stringContext()); return; } } diff --git a/src/libexpr/value.hh b/src/libexpr/value.hh index 73981d869..2b4d35027 100644 --- a/src/libexpr/value.hh +++ b/src/libexpr/value.hh @@ -23,7 +23,10 @@ class BindingsBuilder; typedef enum { tInt = 1, tBool, - tString, + tStringEmpty, + tStringUnknownSize, + tStringKnownSize, + tStringWithContext, tPath, tNull, tAttrs, @@ -159,6 +162,13 @@ public: inline bool isPrimOp() const { return internalType == tPrimOp; }; inline bool isPrimOpApp() const { return internalType == tPrimOpApp; }; + // Widening Value kills eval performace so we use an extra indirection + // to carry more metadata. + struct StringMeta { + size_t size; + const char * * context; // must be in sorted order, see note below + }; + union { NixInt integer; @@ -186,10 +196,29 @@ public: * For canonicity, the store paths should be in sorted order. */ + + // When a string is empty we can store the context directly. + struct { + const char * * context; + } emptyString; + + // When the context is empty, we can use the InternalType + // to be lazy about calculating the size of the string. struct { const char * s; - const char * * context; // must be in sorted order - } string; + } stringUnknownSize; + struct { + const char * s; + size_t size; + } stringKnownSize; + + // We happen to always have a size available when we're + // constucting a string with context. If this changes + // use the same trick as for strings without context. + struct { + const char * s; + const StringMeta * meta; + } stringWithContext; const char * _path; Bindings * attrs; @@ -229,7 +258,7 @@ public: switch (internalType) { case tInt: return nInt; case tBool: return nBool; - case tString: return nString; + case tStringEmpty: case tStringUnknownSize: case tStringKnownSize: case tStringWithContext: return nString; case tPath: return nPath; case tNull: return nNull; case tAttrs: return nAttrs; @@ -268,22 +297,55 @@ public: boolean = b; } - inline void mkString(const char * s, const char * * context = 0) + inline void mkEmptyString(const char * * context = 0) { - internalType = tString; - string.s = s; - string.context = context; + clearValue(); + internalType = tStringEmpty; + emptyString.context = context; + } + + inline void mkStringUnknownSize(const char * s) + { + clearValue(); + internalType = tStringUnknownSize; + stringUnknownSize.s = s; + } + + inline void mkStringKnownSize(const char * s, size_t size) + { + internalType = tStringKnownSize; + stringKnownSize.s = s; + stringKnownSize.size = size; + } + + inline void mkStringWithContext(const char * s, StringMeta * meta) + { + internalType = tStringWithContext; + stringWithContext.s = s; + stringWithContext.meta = meta; + } + + void mkString(const char * s, size_t size, const char * * context = 0); + + // Don't change this proto. You'll get upcast to string_view and kill the gc. + inline void mkString(const char * s) + { + if (s[0] == '\0') + mkEmptyString(); + else + mkStringUnknownSize(s); } void mkString(std::string_view s); void mkString(std::string_view s, const NixStringContext & context); - void mkStringMove(const char * s, const NixStringContext & context); + void mkStringMove(const char * s, size_t size, const NixStringContext & context); - inline void mkString(const Symbol & s) + inline void mkString(const Symbol & sym) { - mkString(((const std::string &) s).c_str()); + auto s = (const std::string &) sym; + mkString(s.c_str(), s.size()); } void mkPath(const SourcePath & path); @@ -444,7 +506,10 @@ public: const char * c_str() const { switch(internalType) { - case tString: return string.s; + case tStringEmpty: return ""; + case tStringUnknownSize: return stringUnknownSize.s; + case tStringKnownSize: return stringKnownSize.s; + case tStringWithContext: return stringWithContext.s; default: abort(); } } @@ -452,7 +517,12 @@ public: std::string_view str() { switch(internalType) { - case tString: return std::string_view(string.s); + case tStringEmpty: return std::string_view(""); + case tStringUnknownSize: + mkStringKnownSize(stringUnknownSize.s, strlen(stringUnknownSize.s)); + return std::string_view(stringKnownSize.s, stringKnownSize.size); + case tStringKnownSize: return std::string_view(stringKnownSize.s, stringKnownSize.size); + case tStringWithContext: return std::string_view(stringWithContext.s, stringWithContext.meta->size); default: abort(); } } @@ -460,7 +530,10 @@ public: const char * * stringContext() const { switch(internalType) { - case tString: return string.context; + case tStringEmpty: return emptyString.context; + case tStringUnknownSize: return 0; + case tStringKnownSize: return 0; + case tStringWithContext: return stringWithContext.meta->context; default: abort(); } }