From 26a8b220eb7470e132b9bcedb94b58492cdd786f Mon Sep 17 00:00:00 2001
From: pennae <github@quasiparticle.net>
Date: Tue, 28 Dec 2021 22:26:59 +0100
Subject: [PATCH 1/3] avoid ostream sentries per json string character
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

we don't have to create an ostream sentry object for every character of a JSON
string we write. format a bunch of characters and flush them to the stream all
at once instead.

this doesn't affect small numbers of string characters, but larger numbers of
total JSON string characters written gain a lot. at 1MB of total string written
we gain almost 30%, at 16MB it's almost a factor of 3x. large numbers of JSON
string characters do occur naturally in a nixos system evaluation to generate
documentation (though this is now somewhat mitigated by caching the largest part
of nixos option docs).

benchmarked with

  hyperfine 'nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) {e})"' --warmup 1 -L e 1,4,256,4096,65536

before:

  Benchmark 1: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 1)"
    Time (mean ± σ):      12.5 ms ±   0.2 ms    [User: 9.2 ms, System: 4.0 ms]
    Range (min … max):    11.9 ms …  13.1 ms    223 runs

  Benchmark 2: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 4)"
    Time (mean ± σ):      12.5 ms ±   0.2 ms    [User: 9.3 ms, System: 3.8 ms]
    Range (min … max):    11.9 ms …  13.2 ms    220 runs

  Benchmark 3: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 256)"
    Time (mean ± σ):      13.2 ms ±   0.3 ms    [User: 9.8 ms, System: 4.0 ms]
    Range (min … max):    12.6 ms …  14.3 ms    205 runs

  Benchmark 4: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 4096)"
    Time (mean ± σ):      24.0 ms ±   0.4 ms    [User: 19.4 ms, System: 5.2 ms]
    Range (min … max):    22.7 ms …  25.8 ms    119 runs

  Benchmark 5: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 65536)"
    Time (mean ± σ):     196.0 ms ±   3.7 ms    [User: 171.2 ms, System: 25.8 ms]
    Range (min … max):   190.6 ms … 201.5 ms    14 runs

after:

  Benchmark 1: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 1)"
    Time (mean ± σ):      12.4 ms ±   0.3 ms    [User: 9.1 ms, System: 4.0 ms]
    Range (min … max):    11.7 ms …  13.3 ms    204 runs

  Benchmark 2: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 4)"
    Time (mean ± σ):      12.4 ms ±   0.2 ms    [User: 9.2 ms, System: 3.9 ms]
    Range (min … max):    11.8 ms …  13.0 ms    214 runs

  Benchmark 3: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 256)"
    Time (mean ± σ):      12.6 ms ±   0.2 ms    [User: 9.5 ms, System: 3.8 ms]
    Range (min … max):    12.1 ms …  13.3 ms    209 runs

  Benchmark 4: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 4096)"
    Time (mean ± σ):      15.9 ms ±   0.2 ms    [User: 11.4 ms, System: 5.1 ms]
    Range (min … max):    15.2 ms …  16.4 ms    171 runs

  Benchmark 5: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 65536)"
    Time (mean ± σ):      69.0 ms ±   0.9 ms    [User: 44.3 ms, System: 25.3 ms]
    Range (min … max):    67.2 ms …  70.9 ms    42 runs
---
 src/libutil/json.cc | 42 ++++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/libutil/json.cc b/src/libutil/json.cc
index 01331947e..3a981376f 100644
--- a/src/libutil/json.cc
+++ b/src/libutil/json.cc
@@ -7,16 +7,38 @@ namespace nix {
 
 void toJSON(std::ostream & str, const char * start, const char * end)
 {
-    str << '"';
-    for (auto i = start; i != end; i++)
-        if (*i == '\"' || *i == '\\') str << '\\' << *i;
-        else if (*i == '\n') str << "\\n";
-        else if (*i == '\r') str << "\\r";
-        else if (*i == '\t') str << "\\t";
-        else if (*i >= 0 && *i < 32)
-            str << "\\u" << std::setfill('0') << std::setw(4) << std::hex << (uint16_t) *i << std::dec;
-        else str << *i;
-    str << '"';
+    constexpr size_t BUF_SIZE = 4096;
+    char buf[BUF_SIZE + 7]; // BUF_SIZE + largest single sequence of puts
+    size_t bufPos = 0;
+
+    const auto flush = [&] {
+        str.write(buf, bufPos);
+        bufPos = 0;
+    };
+    const auto put = [&] (char c) {
+        buf[bufPos++] = c;
+    };
+
+    put('"');
+    for (auto i = start; i != end; i++) {
+        if (bufPos >= BUF_SIZE) flush();
+        if (*i == '\"' || *i == '\\') { put('\\'); put(*i); }
+        else if (*i == '\n') { put('\\'); put('n'); }
+        else if (*i == '\r') { put('\\'); put('r'); }
+        else if (*i == '\t') { put('\\'); put('t'); }
+        else if (*i >= 0 && *i < 32) {
+            const char hex[17] = "0123456789abcdef";
+            put('\\');
+            put('u');
+            put(hex[(uint16_t(*i) >> 12) & 0xf]);
+            put(hex[(uint16_t(*i) >>  8) & 0xf]);
+            put(hex[(uint16_t(*i) >>  4) & 0xf]);
+            put(hex[(uint16_t(*i) >>  0) & 0xf]);
+        }
+        else put(*i);
+    }
+    put('"');
+    flush();
 }
 
 void toJSON(std::ostream & str, const char * s)

From 5838354d342f1cdd09da7099e86123b36ecec409 Mon Sep 17 00:00:00 2001
From: pennae <github@quasiparticle.net>
Date: Mon, 27 Dec 2021 02:04:49 +0100
Subject: [PATCH 2/3] optimize ExprConcatStrings::eval
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

constructing an ostringstream for non-string concats (like integer addition) is
a small constant cost that we can avoid. for string concats we can keep all the
string temporaries we get from coerceToString and concatenate them in one go,
which saves a lot of intermediate temporaries and copies in ostringstream. we
can also avoid copying the concatenated string again by directly allocating it
in GC memory and moving ownership of the concatenated string into the target
value.

saves about 2% on system eval.

before:

  Benchmark 1: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.837 s ±  0.031 s    [User: 2.562 s, System: 0.191 s]
    Range (min … max):    2.796 s …  2.892 s    20 runs

after:

  Benchmark 1: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.790 s ±  0.035 s    [User: 2.532 s, System: 0.187 s]
    Range (min … max):    2.722 s …  2.836 s    20 runs
---
 src/libexpr/eval.cc  | 75 ++++++++++++++++++++++++++++++++++++--------
 src/libexpr/value.hh |  2 ++
 2 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc
index 81aa86641..61bccd6e2 100644
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@@ -36,6 +36,19 @@
 namespace nix {
 
 
+static char * allocString(size_t size)
+{
+    char * t;
+#if HAVE_BOEHMGC
+    t = (char *) GC_MALLOC_ATOMIC(size);
+#else
+    t = malloc(size);
+#endif
+    if (!t) throw std::bad_alloc();
+    return t;
+}
+
+
 static char * dupString(const char * s)
 {
     char * t;
@@ -771,17 +784,28 @@ void Value::mkString(std::string_view s)
 }
 
 
+static void copyContextToValue(Value & v, const PathSet & context)
+{
+    if (!context.empty()) {
+        size_t n = 0;
+        v.string.context = (const char * *)
+            allocBytes((context.size() + 1) * sizeof(char *));
+        for (auto & i : context)
+            v.string.context[n++] = dupString(i.c_str());
+        v.string.context[n] = 0;
+    }
+}
+
 void Value::mkString(std::string_view s, const PathSet & context)
 {
     mkString(s);
-    if (!context.empty()) {
-        size_t n = 0;
-        string.context = (const char * *)
-            allocBytes((context.size() + 1) * sizeof(char *));
-        for (auto & i : context)
-            string.context[n++] = dupString(i.c_str());
-        string.context[n] = 0;
-    }
+    copyContextToValue(*this, context);
+}
+
+void Value::mkStringMove(const char * s, const PathSet & context)
+{
+    mkString(s);
+    copyContextToValue(*this, context);
 }
 
 
@@ -1660,13 +1684,34 @@ void EvalState::concatLists(Value & v, size_t nrLists, Value * * lists, const Po
 void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v)
 {
     PathSet context;
-    std::ostringstream s;
+    std::vector<std::string> s;
+    size_t sSize = 0;
     NixInt n = 0;
     NixFloat nf = 0;
 
     bool first = !forceString;
     ValueType firstType = nString;
 
+    const auto str = [&] {
+        std::string result;
+        result.reserve(sSize);
+        for (const auto & part : s) result += part;
+        return result;
+    };
+    /* c_str() is not str().c_str() because we want to create a string
+       Value. allocating a GC'd string directly and moving it into a
+       Value lets us avoid an allocation and copy. */
+    const auto c_str = [&] {
+        char * result = allocString(sSize + 1);
+        char * tmp = result;
+        for (const auto & part : s) {
+            memcpy(tmp, part.c_str(), part.size());
+            tmp += part.size();
+        }
+        *tmp = 0;
+        return result;
+    };
+
     for (auto & [i_pos, i] : *es) {
         Value vTmp;
         i->eval(state, env, vTmp);
@@ -1696,11 +1741,15 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v)
                 nf += vTmp.fpoint;
             } else
                 throwEvalError(i_pos, "cannot add %1% to a float", showType(vTmp));
-        } else
+        } else {
+            if (s.empty()) s.reserve(es->size());
             /* skip canonization of first path, which would only be not
             canonized in the first place if it's coming from a ./${foo} type
             path */
-            s << state.coerceToString(i_pos, vTmp, context, false, firstType == nString, !first);
+            s.emplace_back(
+                state.coerceToString(i_pos, vTmp, context, false, firstType == nString, !first));
+            sSize += s.back().size();
+        }
 
         first = false;
     }
@@ -1712,9 +1761,9 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v)
     else if (firstType == nPath) {
         if (!context.empty())
             throwEvalError(pos, "a string that refers to a store path cannot be appended to a path");
-        v.mkPath(canonPath(s.str()));
+        v.mkPath(canonPath(str()));
     } else
-        v.mkString(s.str(), context);
+        v.mkStringMove(c_str(), context);
 }
 
 
diff --git a/src/libexpr/value.hh b/src/libexpr/value.hh
index 7d007ebdc..1896c7563 100644
--- a/src/libexpr/value.hh
+++ b/src/libexpr/value.hh
@@ -241,6 +241,8 @@ public:
 
     void mkString(std::string_view s, const PathSet & context);
 
+    void mkStringMove(const char * s, const PathSet & context);
+
     inline void mkString(const Symbol & s)
     {
         mkString(((const std::string &) s).c_str());

From 73fcc40fa4382e2325be601bd85ebe258420e1ce Mon Sep 17 00:00:00 2001
From: pennae <github@quasiparticle.net>
Date: Fri, 31 Dec 2021 03:16:59 +0100
Subject: [PATCH 3/3] use boost::lexical_cast for string2*

this avoids one copy from `s` into `str`, and possibly another copy needed to
construct `s` at the call site. lexical_cast is also more efficient in general.
---
 src/libutil/util.hh | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/libutil/util.hh b/src/libutil/util.hh
index d08f42826..7da60b310 100644
--- a/src/libutil/util.hh
+++ b/src/libutil/util.hh
@@ -11,6 +11,8 @@
 #include <unistd.h>
 #include <signal.h>
 
+#include <boost/lexical_cast.hpp>
+
 #include <atomic>
 #include <functional>
 #include <map>
@@ -417,21 +419,21 @@ bool statusOk(int status);
 
 /* Parse a string into an integer. */
 template<class N>
-std::optional<N> string2Int(const std::string & s)
+std::optional<N> string2Int(const std::string_view s)
 {
     if (s.substr(0, 1) == "-" && !std::numeric_limits<N>::is_signed)
         return std::nullopt;
-    std::istringstream str(s);
-    N n;
-    str >> n;
-    if (str && str.get() == EOF) return n;
-    return std::nullopt;
+    try {
+        return boost::lexical_cast<N>(s.data(), s.size());
+    } catch (const boost::bad_lexical_cast &) {
+        return std::nullopt;
+    }
 }
 
 /* Like string2Int(), but support an optional suffix 'K', 'M', 'G' or
    'T' denoting a binary unit prefix. */
 template<class N>
-N string2IntWithUnitPrefix(std::string s)
+N string2IntWithUnitPrefix(std::string_view s)
 {
     N multiplier = 1;
     if (!s.empty()) {
@@ -442,7 +444,7 @@ N string2IntWithUnitPrefix(std::string s)
             else if (u == 'G') multiplier = 1ULL << 30;
             else if (u == 'T') multiplier = 1ULL << 40;
             else throw UsageError("invalid unit specifier '%1%'", u);
-            s.resize(s.size() - 1);
+            s.remove_suffix(1);
         }
     }
     if (auto n = string2Int<N>(s))
@@ -452,13 +454,13 @@ N string2IntWithUnitPrefix(std::string s)
 
 /* Parse a string into a float. */
 template<class N>
-std::optional<N> string2Float(const string & s)
+std::optional<N> string2Float(const std::string_view s)
 {
-    std::istringstream str(s);
-    N n;
-    str >> n;
-    if (str && str.get() == EOF) return n;
-    return std::nullopt;
+    try {
+        return boost::lexical_cast<N>(s.data(), s.size());
+    } catch (const boost::bad_lexical_cast &) {
+        return std::nullopt;
+    }
 }