optimize unescapeStr

mainly to avoid an allocation and a copy of a string that can be
modified in place (ever since EvalState holds on to the buffer, not the
generated parser itself).

 # before

Benchmark 1: nix search --offline nixpkgs hello
  Time (mean ± σ):     571.7 ms ±   2.4 ms    [User: 563.3 ms, System: 8.0 ms]
  Range (min … max):   566.7 ms … 579.7 ms    50 runs

Benchmark 2: nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
  Time (mean ± σ):     376.6 ms ±   1.0 ms    [User: 345.8 ms, System: 30.5 ms]
  Range (min … max):   374.5 ms … 379.1 ms    50 runs

Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
  Time (mean ± σ):      2.922 s ±  0.006 s    [User: 2.707 s, System: 0.215 s]
  Range (min … max):    2.906 s …  2.934 s    50 runs

 # after

Benchmark 1: nix search --offline nixpkgs hello
  Time (mean ± σ):     570.4 ms ±   2.8 ms    [User: 561.3 ms, System: 8.6 ms]
  Range (min … max):   564.6 ms … 578.1 ms    50 runs

Benchmark 2: nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
  Time (mean ± σ):     375.4 ms ±   1.3 ms    [User: 343.2 ms, System: 31.7 ms]
  Range (min … max):   373.4 ms … 378.2 ms    50 runs

Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
  Time (mean ± σ):      2.925 s ±  0.006 s    [User: 2.704 s, System: 0.219 s]
  Range (min … max):    2.910 s …  2.942 s    50 runs
This commit is contained in:
pennae 2021-12-21 10:28:05 +01:00
parent 34e3bd10e3
commit 72f42093e7

View file

@ -64,29 +64,32 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
} }
// FIXME: optimize // we make use of the fact that the parser receives a private copy of the input
static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length) // string and can munge around in it.
static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
{ {
string t; char * result = s;
t.reserve(length); char * t = s;
char c; char c;
// the input string is terminated with *two* NULs, so we can safely take
// *one* character after the one being checked against.
while ((c = *s++)) { while ((c = *s++)) {
if (c == '\\') { if (c == '\\') {
assert(*s);
c = *s++; c = *s++;
if (c == 'n') t += '\n'; if (c == 'n') *t = '\n';
else if (c == 'r') t += '\r'; else if (c == 'r') *t = '\r';
else if (c == 't') t += '\t'; else if (c == 't') *t = '\t';
else t += c; else *t = c;
} }
else if (c == '\r') { else if (c == '\r') {
/* Normalise CR and CR/LF into LF. */ /* Normalise CR and CR/LF into LF. */
t += '\n'; *t = '\n';
if (*s == '\n') s++; /* cr/lf */ if (*s == '\n') s++; /* cr/lf */
} }
else t += c; else *t = c;
t++;
} }
return new ExprString(symbols.create(t)); return new ExprString(symbols.create({result, size_t(t - result)}));
} }