From 26a8b220eb7470e132b9bcedb94b58492cdd786f Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 28 Dec 2021 22:26:59 +0100 Subject: [PATCH] avoid ostream sentries per json string character MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit we don't have to create an ostream sentry object for every character of a JSON string we write. format a bunch of characters and flush them to the stream all at once instead. this doesn't affect small numbers of string characters, but larger numbers of total JSON string characters written gain a lot. at 1MB of total string written we gain almost 30%, at 16MB it's almost a factor of 3x. large numbers of JSON string characters do occur naturally in a nixos system evaluation to generate documentation (though this is now somewhat mitigated by caching the largest part of nixos option docs). benchmarked with hyperfine 'nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) {e})"' --warmup 1 -L e 1,4,256,4096,65536 before: Benchmark 1: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 1)" Time (mean ± σ): 12.5 ms ± 0.2 ms [User: 9.2 ms, System: 4.0 ms] Range (min … max): 11.9 ms … 13.1 ms 223 runs Benchmark 2: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 4)" Time (mean ± σ): 12.5 ms ± 0.2 ms [User: 9.3 ms, System: 3.8 ms] Range (min … max): 11.9 ms … 13.2 ms 220 runs Benchmark 3: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 256)" Time (mean ± σ): 13.2 ms ± 0.3 ms [User: 9.8 ms, System: 4.0 ms] Range (min … max): 12.6 ms … 14.3 ms 205 runs Benchmark 4: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 4096)" Time (mean ± σ): 24.0 ms ± 0.4 ms [User: 19.4 ms, System: 5.2 ms] Range (min … max): 22.7 ms … 25.8 ms 119 runs Benchmark 5: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 65536)" Time (mean ± σ): 196.0 ms ± 3.7 ms [User: 171.2 ms, System: 25.8 ms] Range (min … max): 190.6 ms … 201.5 ms 14 runs after: Benchmark 1: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 1)" Time (mean ± σ): 12.4 ms ± 0.3 ms [User: 9.1 ms, System: 4.0 ms] Range (min … max): 11.7 ms … 13.3 ms 204 runs Benchmark 2: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 4)" Time (mean ± σ): 12.4 ms ± 0.2 ms [User: 9.2 ms, System: 3.9 ms] Range (min … max): 11.8 ms … 13.0 ms 214 runs Benchmark 3: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 256)" Time (mean ± σ): 12.6 ms ± 0.2 ms [User: 9.5 ms, System: 3.8 ms] Range (min … max): 12.1 ms … 13.3 ms 209 runs Benchmark 4: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 4096)" Time (mean ± σ): 15.9 ms ± 0.2 ms [User: 11.4 ms, System: 5.1 ms] Range (min … max): 15.2 ms … 16.4 ms 171 runs Benchmark 5: nix eval --raw --expr "let s = __concatStringsSep \"\" (__genList (_: \"c\") 256); in __toJSON (__genList (_: s) 65536)" Time (mean ± σ): 69.0 ms ± 0.9 ms [User: 44.3 ms, System: 25.3 ms] Range (min … max): 67.2 ms … 70.9 ms 42 runs --- src/libutil/json.cc | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/src/libutil/json.cc b/src/libutil/json.cc index 01331947e..3a981376f 100644 --- a/src/libutil/json.cc +++ b/src/libutil/json.cc @@ -7,16 +7,38 @@ namespace nix { void toJSON(std::ostream & str, const char * start, const char * end) { - str << '"'; - for (auto i = start; i != end; i++) - if (*i == '\"' || *i == '\\') str << '\\' << *i; - else if (*i == '\n') str << "\\n"; - else if (*i == '\r') str << "\\r"; - else if (*i == '\t') str << "\\t"; - else if (*i >= 0 && *i < 32) - str << "\\u" << std::setfill('0') << std::setw(4) << std::hex << (uint16_t) *i << std::dec; - else str << *i; - str << '"'; + constexpr size_t BUF_SIZE = 4096; + char buf[BUF_SIZE + 7]; // BUF_SIZE + largest single sequence of puts + size_t bufPos = 0; + + const auto flush = [&] { + str.write(buf, bufPos); + bufPos = 0; + }; + const auto put = [&] (char c) { + buf[bufPos++] = c; + }; + + put('"'); + for (auto i = start; i != end; i++) { + if (bufPos >= BUF_SIZE) flush(); + if (*i == '\"' || *i == '\\') { put('\\'); put(*i); } + else if (*i == '\n') { put('\\'); put('n'); } + else if (*i == '\r') { put('\\'); put('r'); } + else if (*i == '\t') { put('\\'); put('t'); } + else if (*i >= 0 && *i < 32) { + const char hex[17] = "0123456789abcdef"; + put('\\'); + put('u'); + put(hex[(uint16_t(*i) >> 12) & 0xf]); + put(hex[(uint16_t(*i) >> 8) & 0xf]); + put(hex[(uint16_t(*i) >> 4) & 0xf]); + put(hex[(uint16_t(*i) >> 0) & 0xf]); + } + else put(*i); + } + put('"'); + flush(); } void toJSON(std::ostream & str, const char * s)