forked from lix-project/lix
remove ExprIndStr
it can be replaced with StringToken if we add another bit if information to StringToken, namely whether this string should take part in indentation scanning or not. since all escaping terminates indentation scanning we need to set this bit only for the non-escaped IND_STRING rule. this improves performance by about 1%. before nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 8.880 s ± 0.048 s [User: 6.809 s, System: 1.643 s] Range (min … max): 8.781 s … 8.993 s 20 runs nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 375.0 ms ± 2.2 ms [User: 339.8 ms, System: 35.2 ms] Range (min … max): 371.5 ms … 379.3 ms 20 runs nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' Time (mean ± σ): 2.831 s ± 0.040 s [User: 2.536 s, System: 0.225 s] Range (min … max): 2.769 s … 2.912 s 20 runs after nix search --no-eval-cache --offline ../nixpkgs hello Time (mean ± σ): 8.832 s ± 0.048 s [User: 6.757 s, System: 1.657 s] Range (min … max): 8.743 s … 8.921 s 20 runs nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix Time (mean ± σ): 367.4 ms ± 3.2 ms [User: 332.7 ms, System: 34.7 ms] Range (min … max): 364.6 ms … 374.6 ms 20 runs nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' Time (mean ± σ): 2.810 s ± 0.030 s [User: 2.517 s, System: 0.225 s] Range (min … max): 2.742 s … 2.854 s 20 runs
This commit is contained in:
parent
bc443511eb
commit
0a7746603e
3 changed files with 57 additions and 54 deletions
|
@ -66,7 +66,7 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
|
||||||
|
|
||||||
// we make use of the fact that the parser receives a private copy of the input
|
// we make use of the fact that the parser receives a private copy of the input
|
||||||
// string and can munge around in it.
|
// string and can munge around in it.
|
||||||
static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
|
static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
|
||||||
{
|
{
|
||||||
char * result = s;
|
char * result = s;
|
||||||
char * t = s;
|
char * t = s;
|
||||||
|
@ -89,7 +89,7 @@ static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
|
||||||
else *t = c;
|
else *t = c;
|
||||||
t++;
|
t++;
|
||||||
}
|
}
|
||||||
return new ExprString(symbols.create({result, size_t(t - result)}));
|
return {result, size_t(t - result)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -176,7 +176,7 @@ or { return OR_KW; }
|
||||||
/* It is impossible to match strings ending with '$' with one
|
/* It is impossible to match strings ending with '$' with one
|
||||||
regex because trailing contexts are only valid at the end
|
regex because trailing contexts are only valid at the end
|
||||||
of a rule. (A sane but undocumented limitation.) */
|
of a rule. (A sane but undocumented limitation.) */
|
||||||
yylval->e = unescapeStr(data->symbols, yytext, yyleng);
|
yylval->str = unescapeStr(data->symbols, yytext, yyleng);
|
||||||
return STR;
|
return STR;
|
||||||
}
|
}
|
||||||
<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
||||||
|
@ -191,26 +191,26 @@ or { return OR_KW; }
|
||||||
|
|
||||||
\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
|
\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
|
||||||
<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
|
<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
|
||||||
yylval->e = new ExprIndStr(yytext);
|
yylval->str = {yytext, (size_t) yyleng, true};
|
||||||
return IND_STR;
|
return IND_STR;
|
||||||
}
|
}
|
||||||
<IND_STRING>\'\'\$ |
|
<IND_STRING>\'\'\$ |
|
||||||
<IND_STRING>\$ {
|
<IND_STRING>\$ {
|
||||||
yylval->e = new ExprIndStr("$");
|
yylval->str = {"$", 1};
|
||||||
return IND_STR;
|
return IND_STR;
|
||||||
}
|
}
|
||||||
<IND_STRING>\'\'\' {
|
<IND_STRING>\'\'\' {
|
||||||
yylval->e = new ExprIndStr("''");
|
yylval->str = {"''", 2};
|
||||||
return IND_STR;
|
return IND_STR;
|
||||||
}
|
}
|
||||||
<IND_STRING>\'\'\\{ANY} {
|
<IND_STRING>\'\'\\{ANY} {
|
||||||
yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
|
yylval->str = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
|
||||||
return IND_STR;
|
return IND_STR;
|
||||||
}
|
}
|
||||||
<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
||||||
<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
|
<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
|
||||||
<IND_STRING>\' {
|
<IND_STRING>\' {
|
||||||
yylval->e = new ExprIndStr("'");
|
yylval->str = {"'", 1};
|
||||||
return IND_STR;
|
return IND_STR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -264,7 +264,7 @@ or { return OR_KW; }
|
||||||
PUSH_STATE(INPATH_SLASH);
|
PUSH_STATE(INPATH_SLASH);
|
||||||
else
|
else
|
||||||
PUSH_STATE(INPATH);
|
PUSH_STATE(INPATH);
|
||||||
yylval->e = new ExprString(data->symbols.create(string(yytext)));
|
yylval->str = {yytext, (size_t) yyleng};
|
||||||
return STR;
|
return STR;
|
||||||
}
|
}
|
||||||
<INPATH>{ANY} |
|
<INPATH>{ANY} |
|
||||||
|
|
|
@ -117,13 +117,6 @@ struct ExprString : Expr
|
||||||
Value * maybeThunk(EvalState & state, Env & env);
|
Value * maybeThunk(EvalState & state, Env & env);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Temporary class used during parsing of indented strings. */
|
|
||||||
struct ExprIndStr : Expr
|
|
||||||
{
|
|
||||||
string s;
|
|
||||||
ExprIndStr(const string & s) : s(s) { };
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ExprPath : Expr
|
struct ExprPath : Expr
|
||||||
{
|
{
|
||||||
string s;
|
string s;
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
#ifndef BISON_HEADER
|
#ifndef BISON_HEADER
|
||||||
#define BISON_HEADER
|
#define BISON_HEADER
|
||||||
|
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
#include "util.hh"
|
#include "util.hh"
|
||||||
|
|
||||||
#include "nixexpr.hh"
|
#include "nixexpr.hh"
|
||||||
|
@ -41,6 +43,15 @@ namespace nix {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// using C a struct allows us to avoid having to define the special
|
||||||
|
// members that using string_view here would implicitly delete.
|
||||||
|
struct StringToken {
|
||||||
|
const char * p;
|
||||||
|
size_t l;
|
||||||
|
bool hasIndentation;
|
||||||
|
operator std::string_view() const { return {p, l}; }
|
||||||
|
};
|
||||||
|
|
||||||
#define YY_DECL int yylex \
|
#define YY_DECL int yylex \
|
||||||
(YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParseData * data)
|
(YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParseData * data)
|
||||||
|
|
||||||
|
@ -152,7 +163,8 @@ static void addFormal(const Pos & pos, Formals * formals, const Formal & formal)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<std::pair<Pos, Expr *> > & es)
|
static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols,
|
||||||
|
vector<std::pair<Pos, std::variant<Expr *, StringToken> > > & es)
|
||||||
{
|
{
|
||||||
if (es.empty()) return new ExprString(symbols.create(""));
|
if (es.empty()) return new ExprString(symbols.create(""));
|
||||||
|
|
||||||
|
@ -163,20 +175,20 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
|
||||||
size_t minIndent = 1000000;
|
size_t minIndent = 1000000;
|
||||||
size_t curIndent = 0;
|
size_t curIndent = 0;
|
||||||
for (auto & [i_pos, i] : es) {
|
for (auto & [i_pos, i] : es) {
|
||||||
ExprIndStr * e = dynamic_cast<ExprIndStr *>(i);
|
auto * str = std::get_if<StringToken>(&i);
|
||||||
if (!e) {
|
if (!str || !str->hasIndentation) {
|
||||||
/* Anti-quotations end the current start-of-line whitespace. */
|
/* Anti-quotations and escaped characters end the current start-of-line whitespace. */
|
||||||
if (atStartOfLine) {
|
if (atStartOfLine) {
|
||||||
atStartOfLine = false;
|
atStartOfLine = false;
|
||||||
if (curIndent < minIndent) minIndent = curIndent;
|
if (curIndent < minIndent) minIndent = curIndent;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (size_t j = 0; j < e->s.size(); ++j) {
|
for (size_t j = 0; j < str->l; ++j) {
|
||||||
if (atStartOfLine) {
|
if (atStartOfLine) {
|
||||||
if (e->s[j] == ' ')
|
if (str->p[j] == ' ')
|
||||||
curIndent++;
|
curIndent++;
|
||||||
else if (e->s[j] == '\n') {
|
else if (str->p[j] == '\n') {
|
||||||
/* Empty line, doesn't influence minimum
|
/* Empty line, doesn't influence minimum
|
||||||
indentation. */
|
indentation. */
|
||||||
curIndent = 0;
|
curIndent = 0;
|
||||||
|
@ -184,7 +196,7 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
|
||||||
atStartOfLine = false;
|
atStartOfLine = false;
|
||||||
if (curIndent < minIndent) minIndent = curIndent;
|
if (curIndent < minIndent) minIndent = curIndent;
|
||||||
}
|
}
|
||||||
} else if (e->s[j] == '\n') {
|
} else if (str->p[j] == '\n') {
|
||||||
atStartOfLine = true;
|
atStartOfLine = true;
|
||||||
curIndent = 0;
|
curIndent = 0;
|
||||||
}
|
}
|
||||||
|
@ -196,33 +208,31 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
|
||||||
atStartOfLine = true;
|
atStartOfLine = true;
|
||||||
size_t curDropped = 0;
|
size_t curDropped = 0;
|
||||||
size_t n = es.size();
|
size_t n = es.size();
|
||||||
for (vector<std::pair<Pos, Expr *> >::iterator i = es.begin(); i != es.end(); ++i, --n) {
|
auto i = es.begin();
|
||||||
ExprIndStr * e = dynamic_cast<ExprIndStr *>(i->second);
|
const auto trimExpr = [&] (Expr * e) {
|
||||||
if (!e) {
|
atStartOfLine = false;
|
||||||
atStartOfLine = false;
|
curDropped = 0;
|
||||||
curDropped = 0;
|
es2->emplace_back(i->first, e);
|
||||||
es2->push_back(*i);
|
};
|
||||||
continue;
|
const auto trimString = [&] (const StringToken & t) {
|
||||||
}
|
|
||||||
|
|
||||||
string s2;
|
string s2;
|
||||||
for (size_t j = 0; j < e->s.size(); ++j) {
|
for (size_t j = 0; j < t.l; ++j) {
|
||||||
if (atStartOfLine) {
|
if (atStartOfLine) {
|
||||||
if (e->s[j] == ' ') {
|
if (t.p[j] == ' ') {
|
||||||
if (curDropped++ >= minIndent)
|
if (curDropped++ >= minIndent)
|
||||||
s2 += e->s[j];
|
s2 += t.p[j];
|
||||||
}
|
}
|
||||||
else if (e->s[j] == '\n') {
|
else if (t.p[j] == '\n') {
|
||||||
curDropped = 0;
|
curDropped = 0;
|
||||||
s2 += e->s[j];
|
s2 += t.p[j];
|
||||||
} else {
|
} else {
|
||||||
atStartOfLine = false;
|
atStartOfLine = false;
|
||||||
curDropped = 0;
|
curDropped = 0;
|
||||||
s2 += e->s[j];
|
s2 += t.p[j];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
s2 += e->s[j];
|
s2 += t.p[j];
|
||||||
if (e->s[j] == '\n') atStartOfLine = true;
|
if (t.p[j] == '\n') atStartOfLine = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -235,6 +245,9 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
|
||||||
}
|
}
|
||||||
|
|
||||||
es2->emplace_back(i->first, new ExprString(symbols.create(s2)));
|
es2->emplace_back(i->first, new ExprString(symbols.create(s2)));
|
||||||
|
};
|
||||||
|
for (; i != es.end(); ++i, --n) {
|
||||||
|
std::visit(overloaded { trimExpr, trimString }, i->second);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If this is a single string, then don't do a concatenation. */
|
/* If this is a single string, then don't do a concatenation. */
|
||||||
|
@ -273,18 +286,13 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
|
||||||
nix::Formal * formal;
|
nix::Formal * formal;
|
||||||
nix::NixInt n;
|
nix::NixInt n;
|
||||||
nix::NixFloat nf;
|
nix::NixFloat nf;
|
||||||
// using C a struct allows us to avoid having to define the special
|
|
||||||
// members that using string_view here would implicitly delete.
|
|
||||||
struct StringToken {
|
|
||||||
const char * p;
|
|
||||||
size_t l;
|
|
||||||
operator std::string_view() const { return {p, l}; }
|
|
||||||
};
|
|
||||||
StringToken id; // !!! -> Symbol
|
StringToken id; // !!! -> Symbol
|
||||||
StringToken path;
|
StringToken path;
|
||||||
StringToken uri;
|
StringToken uri;
|
||||||
|
StringToken str;
|
||||||
std::vector<nix::AttrName> * attrNames;
|
std::vector<nix::AttrName> * attrNames;
|
||||||
std::vector<std::pair<nix::Pos, nix::Expr *> > * string_parts;
|
std::vector<std::pair<nix::Pos, nix::Expr *> > * string_parts;
|
||||||
|
std::vector<std::pair<nix::Pos, std::variant<nix::Expr *, StringToken> > > * ind_string_parts;
|
||||||
}
|
}
|
||||||
|
|
||||||
%type <e> start expr expr_function expr_if expr_op
|
%type <e> start expr expr_function expr_if expr_op
|
||||||
|
@ -294,11 +302,12 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
|
||||||
%type <formals> formals
|
%type <formals> formals
|
||||||
%type <formal> formal
|
%type <formal> formal
|
||||||
%type <attrNames> attrs attrpath
|
%type <attrNames> attrs attrpath
|
||||||
%type <string_parts> string_parts_interpolated ind_string_parts
|
%type <string_parts> string_parts_interpolated
|
||||||
|
%type <ind_string_parts> ind_string_parts
|
||||||
%type <e> path_start string_parts string_attr
|
%type <e> path_start string_parts string_attr
|
||||||
%type <id> attr
|
%type <id> attr
|
||||||
%token <id> ID ATTRPATH
|
%token <id> ID ATTRPATH
|
||||||
%token <e> STR IND_STR
|
%token <str> STR IND_STR
|
||||||
%token <n> INT
|
%token <n> INT
|
||||||
%token <nf> FLOAT
|
%token <nf> FLOAT
|
||||||
%token <path> PATH HPATH SPATH PATH_END
|
%token <path> PATH HPATH SPATH PATH_END
|
||||||
|
@ -449,18 +458,19 @@ expr_simple
|
||||||
;
|
;
|
||||||
|
|
||||||
string_parts
|
string_parts
|
||||||
: STR
|
: STR { $$ = new ExprString(data->symbols.create($1)); }
|
||||||
| string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); }
|
| string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); }
|
||||||
| { $$ = new ExprString(data->symbols.create("")); }
|
| { $$ = new ExprString(data->symbols.create("")); }
|
||||||
;
|
;
|
||||||
|
|
||||||
string_parts_interpolated
|
string_parts_interpolated
|
||||||
: string_parts_interpolated STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
|
: string_parts_interpolated STR
|
||||||
|
{ $$ = $1; $1->emplace_back(makeCurPos(@2, data), new ExprString(data->symbols.create($2))); }
|
||||||
| string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
|
| string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
|
||||||
| DOLLAR_CURLY expr '}' { $$ = new vector<std::pair<Pos, Expr *> >; $$->emplace_back(makeCurPos(@1, data), $2); }
|
| DOLLAR_CURLY expr '}' { $$ = new vector<std::pair<Pos, Expr *> >; $$->emplace_back(makeCurPos(@1, data), $2); }
|
||||||
| STR DOLLAR_CURLY expr '}' {
|
| STR DOLLAR_CURLY expr '}' {
|
||||||
$$ = new vector<std::pair<Pos, Expr *> >;
|
$$ = new vector<std::pair<Pos, Expr *> >;
|
||||||
$$->emplace_back(makeCurPos(@1, data), $1);
|
$$->emplace_back(makeCurPos(@1, data), new ExprString(data->symbols.create($1)));
|
||||||
$$->emplace_back(makeCurPos(@2, data), $3);
|
$$->emplace_back(makeCurPos(@2, data), $3);
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
@ -482,7 +492,7 @@ path_start
|
||||||
ind_string_parts
|
ind_string_parts
|
||||||
: ind_string_parts IND_STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
|
: ind_string_parts IND_STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
|
||||||
| ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
|
| ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
|
||||||
| { $$ = new vector<std::pair<Pos, Expr *> >; }
|
| { $$ = new vector<std::pair<Pos, std::variant<Expr *, StringToken> > >; }
|
||||||
;
|
;
|
||||||
|
|
||||||
binds
|
binds
|
||||||
|
|
Loading…
Reference in a new issue