From 72f42093e711db1ab43c920688bb5e59df33935d Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 21 Dec 2021 10:28:05 +0100 Subject: [PATCH] optimize unescapeStr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainly to avoid an allocation and a copy of a string that can be modified in place (ever since EvalState holds on to the buffer, not the generated parser itself). # before Benchmark 1: nix search --offline nixpkgs hello Time (mean ± σ): 571.7 ms ± 2.4 ms [User: 563.3 ms, System: 8.0 ms] Range (min … max): 566.7 ms … 579.7 ms 50 runs Benchmark 2: nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 376.6 ms ± 1.0 ms [User: 345.8 ms, System: 30.5 ms] Range (min … max): 374.5 ms … 379.1 ms 50 runs Benchmark 3: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.922 s ± 0.006 s [User: 2.707 s, System: 0.215 s] Range (min … max): 2.906 s … 2.934 s 50 runs # after Benchmark 1: nix search --offline nixpkgs hello Time (mean ± σ): 570.4 ms ± 2.8 ms [User: 561.3 ms, System: 8.6 ms] Range (min … max): 564.6 ms … 578.1 ms 50 runs Benchmark 2: nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 375.4 ms ± 1.3 ms [User: 343.2 ms, System: 31.7 ms] Range (min … max): 373.4 ms … 378.2 ms 50 runs Benchmark 3: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.925 s ± 0.006 s [User: 2.704 s, System: 0.219 s] Range (min … max): 2.910 s … 2.942 s 50 runs --- src/libexpr/lexer.l | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index 70e99d2d2..a0e7a1877 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -64,29 +64,32 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len) } -// FIXME: optimize -static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length) +// we make use of the fact that the parser receives a private copy of the input +// string and can munge around in it. +static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length) { - string t; - t.reserve(length); + char * result = s; + char * t = s; char c; + // the input string is terminated with *two* NULs, so we can safely take + // *one* character after the one being checked against. while ((c = *s++)) { if (c == '\\') { - assert(*s); c = *s++; - if (c == 'n') t += '\n'; - else if (c == 'r') t += '\r'; - else if (c == 't') t += '\t'; - else t += c; + if (c == 'n') *t = '\n'; + else if (c == 'r') *t = '\r'; + else if (c == 't') *t = '\t'; + else *t = c; } else if (c == '\r') { /* Normalise CR and CR/LF into LF. */ - t += '\n'; + *t = '\n'; if (*s == '\n') s++; /* cr/lf */ } - else t += c; + else *t = c; + t++; } - return new ExprString(symbols.create(t)); + return new ExprString(symbols.create({result, size_t(t - result)})); }