optimize unescapeStr

mainly to avoid an allocation and a copy of a string that can be
modified in place (ever since EvalState holds on to the buffer, not the
generated parser itself).

 # before

Benchmark 1: nix search --offline nixpkgs hello
  Time (mean ± σ):     571.7 ms ±   2.4 ms    [User: 563.3 ms, System: 8.0 ms]
  Range (min … max):   566.7 ms … 579.7 ms    50 runs

Benchmark 2: nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
  Time (mean ± σ):     376.6 ms ±   1.0 ms    [User: 345.8 ms, System: 30.5 ms]
  Range (min … max):   374.5 ms … 379.1 ms    50 runs

Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
  Time (mean ± σ):      2.922 s ±  0.006 s    [User: 2.707 s, System: 0.215 s]
  Range (min … max):    2.906 s …  2.934 s    50 runs

 # after

Benchmark 1: nix search --offline nixpkgs hello
  Time (mean ± σ):     570.4 ms ±   2.8 ms    [User: 561.3 ms, System: 8.6 ms]
  Range (min … max):   564.6 ms … 578.1 ms    50 runs

Benchmark 2: nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
  Time (mean ± σ):     375.4 ms ±   1.3 ms    [User: 343.2 ms, System: 31.7 ms]
  Range (min … max):   373.4 ms … 378.2 ms    50 runs

Benchmark 3: nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
  Time (mean ± σ):      2.925 s ±  0.006 s    [User: 2.704 s, System: 0.219 s]
  Range (min … max):    2.910 s …  2.942 s    50 runs
This commit is contained in:
pennae 2021-12-21 10:28:05 +01:00
parent 34e3bd10e3
commit 72f42093e7

View file

@ -64,29 +64,32 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
}
// FIXME: optimize
static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length)
// we make use of the fact that the parser receives a private copy of the input
// string and can munge around in it.
static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
{
string t;
t.reserve(length);
char * result = s;
char * t = s;
char c;
// the input string is terminated with *two* NULs, so we can safely take
// *one* character after the one being checked against.
while ((c = *s++)) {
if (c == '\\') {
assert(*s);
c = *s++;
if (c == 'n') t += '\n';
else if (c == 'r') t += '\r';
else if (c == 't') t += '\t';
else t += c;
if (c == 'n') *t = '\n';
else if (c == 'r') *t = '\r';
else if (c == 't') *t = '\t';
else *t = c;
}
else if (c == '\r') {
/* Normalise CR and CR/LF into LF. */
t += '\n';
*t = '\n';
if (*s == '\n') s++; /* cr/lf */
}
else t += c;
else *t = c;
t++;
}
return new ExprString(symbols.create(t));
return new ExprString(symbols.create({result, size_t(t - result)}));
}