From 34e3bd10e3891afc965a7fb8fdcaacbdc900b2d5 Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 21 Dec 2021 13:56:57 +0100 Subject: [PATCH] avoid copies of parser input data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when given a string yacc will copy the entire input to a newly allocated location so that it can add a second terminating NUL byte. since the parser is a very internal thing to EvalState we can ensure that having two terminating NUL bytes is always possible without copying, and have the parser itself merely check that the expected NULs are present. # before Benchmark 1: nix search --offline nixpkgs hello Time (mean ± σ): 572.4 ms ± 2.3 ms [User: 563.4 ms, System: 8.6 ms] Range (min … max): 566.9 ms … 579.1 ms 50 runs Benchmark 2: nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 381.7 ms ± 1.0 ms [User: 348.3 ms, System: 33.1 ms] Range (min … max): 380.2 ms … 387.7 ms 50 runs Benchmark 3: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.936 s ± 0.005 s [User: 2.715 s, System: 0.221 s] Range (min … max): 2.923 s … 2.946 s 50 runs # after Benchmark 1: nix search --offline nixpkgs hello Time (mean ± σ): 571.7 ms ± 2.4 ms [User: 563.3 ms, System: 8.0 ms] Range (min … max): 566.7 ms … 579.7 ms 50 runs Benchmark 2: nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 376.6 ms ± 1.0 ms [User: 345.8 ms, System: 30.5 ms] Range (min … max): 374.5 ms … 379.1 ms 50 runs Benchmark 3: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.922 s ± 0.006 s [User: 2.707 s, System: 0.215 s] Range (min … max): 2.906 s … 2.934 s 50 runs --- src/libexpr/eval.hh | 6 +++--- src/libexpr/parser.y | 23 +++++++++++++++-------- src/libexpr/primops.cc | 9 ++++++--- src/libutil/util.cc | 4 +++- src/nix-build/nix-build.cc | 2 +- src/nix/repl.cc | 2 +- 6 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index cc63294c6..15925a6b4 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -181,8 +181,8 @@ public: Expr * parseExprFromFile(const Path & path, StaticEnv & staticEnv); /* Parse a Nix expression from the specified string. */ - Expr * parseExprFromString(std::string_view s, const Path & basePath, StaticEnv & staticEnv); - Expr * parseExprFromString(std::string_view s, const Path & basePath); + Expr * parseExprFromString(std::string s, const Path & basePath, StaticEnv & staticEnv); + Expr * parseExprFromString(std::string s, const Path & basePath); Expr * parseStdin(); @@ -310,7 +310,7 @@ private: friend struct ExprAttrs; friend struct ExprLet; - Expr * parse(const char * text, FileOrigin origin, const Path & path, + Expr * parse(char * text, size_t length, FileOrigin origin, const Path & path, const Path & basePath, StaticEnv & staticEnv); public: diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 049a149cc..a3e713937 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -596,7 +596,7 @@ formal namespace nix { -Expr * EvalState::parse(const char * text, FileOrigin origin, +Expr * EvalState::parse(char * text, size_t length, FileOrigin origin, const Path & path, const Path & basePath, StaticEnv & staticEnv) { yyscan_t scanner; @@ -616,7 +616,7 @@ Expr * EvalState::parse(const char * text, FileOrigin origin, data.basePath = basePath; yylex_init(&scanner); - yy_scan_string(text, scanner); + yy_scan_buffer(text, length, scanner); int res = yyparse(scanner, &data); yylex_destroy(scanner); @@ -662,26 +662,33 @@ Expr * EvalState::parseExprFromFile(const Path & path) Expr * EvalState::parseExprFromFile(const Path & path, StaticEnv & staticEnv) { - return parse(readFile(path).c_str(), foFile, path, dirOf(path), staticEnv); + auto buffer = readFile(path); + // readFile should have left some extra space for terminators + buffer.append("\0\0", 2); + return parse(buffer.data(), buffer.size(), foFile, path, dirOf(path), staticEnv); } -Expr * EvalState::parseExprFromString(std::string_view s, const Path & basePath, StaticEnv & staticEnv) +Expr * EvalState::parseExprFromString(std::string s, const Path & basePath, StaticEnv & staticEnv) { - return parse(s.data(), foString, "", basePath, staticEnv); + s.append("\0\0", 2); + return parse(s.data(), s.size(), foString, "", basePath, staticEnv); } -Expr * EvalState::parseExprFromString(std::string_view s, const Path & basePath) +Expr * EvalState::parseExprFromString(std::string s, const Path & basePath) { - return parseExprFromString(s, basePath, staticBaseEnv); + return parseExprFromString(std::move(s), basePath, staticBaseEnv); } Expr * EvalState::parseStdin() { //Activity act(*logger, lvlTalkative, format("parsing standard input")); - return parse(drainFD(0).data(), foStdin, "", absPath("."), staticBaseEnv); + auto buffer = drainFD(0); + // drainFD should have left some extra space for terminators + buffer.append("\0\0", 2); + return parse(buffer.data(), buffer.size(), foStdin, "", absPath("."), staticBaseEnv); } diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 66af373d7..852317aa3 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -350,7 +350,7 @@ void prim_exec(EvalState & state, const Pos & pos, Value * * args, Value & v) auto output = runProgram(program, true, commandArgs); Expr * parsed; try { - parsed = state.parseExprFromString(output, pos.file); + parsed = state.parseExprFromString(std::move(output), pos.file); } catch (Error & e) { e.addTrace(pos, "While parsing the output from '%1%'", program); throw; @@ -3800,9 +3800,12 @@ void EvalState::createBaseEnv() /* Note: we have to initialize the 'derivation' constant *after* building baseEnv/staticBaseEnv because it uses 'builtins'. */ - eval(parse( + char code[] = #include "primops/derivation.nix.gen.hh" - , foFile, sDerivationNix, "/", staticBaseEnv), *vDerivation); + // the parser needs two NUL bytes as terminators; one of them + // is implied by being a C string. + "\0"; + eval(parse(code, sizeof(code), foFile, sDerivationNix, "/", staticBaseEnv), *vDerivation); } diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 43fea1b1e..f15a617b0 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -669,7 +669,9 @@ void writeFull(int fd, std::string_view s, bool allowInterrupts) string drainFD(int fd, bool block, const size_t reserveSize) { - StringSink sink(reserveSize); + // the parser needs two extra bytes to append terminating characters, other users will + // not care very much about the extra memory. + StringSink sink(reserveSize + 2); drainFD(fd, sink, block); return std::move(*sink.s); } diff --git a/src/nix-build/nix-build.cc b/src/nix-build/nix-build.cc index e2325c91f..b5d0f4813 100755 --- a/src/nix-build/nix-build.cc +++ b/src/nix-build/nix-build.cc @@ -296,7 +296,7 @@ static void main_nix_build(int argc, char * * argv) else for (auto i : left) { if (fromArgs) - exprs.push_back(state->parseExprFromString(i, absPath("."))); + exprs.push_back(state->parseExprFromString(std::move(i), absPath("."))); else { auto absolute = i; try { diff --git a/src/nix/repl.cc b/src/nix/repl.cc index f453343f3..c8bb5a90f 100644 --- a/src/nix/repl.cc +++ b/src/nix/repl.cc @@ -683,7 +683,7 @@ void NixRepl::addVarToScope(const Symbol & name, Value & v) Expr * NixRepl::parseString(string s) { - Expr * e = state->parseExprFromString(s, curDir, staticEnv); + Expr * e = state->parseExprFromString(std::move(s), curDir, staticEnv); return e; }