forked from lix-project/lix
Merge pull request #5812 from pennae/small-perf-improvements
improve parser performance a bit
This commit is contained in:
commit
fc2443a67c
|
@ -179,8 +179,8 @@ public:
|
||||||
Expr * parseExprFromFile(const Path & path, StaticEnv & staticEnv);
|
Expr * parseExprFromFile(const Path & path, StaticEnv & staticEnv);
|
||||||
|
|
||||||
/* Parse a Nix expression from the specified string. */
|
/* Parse a Nix expression from the specified string. */
|
||||||
Expr * parseExprFromString(std::string_view s, const Path & basePath, StaticEnv & staticEnv);
|
Expr * parseExprFromString(std::string s, const Path & basePath, StaticEnv & staticEnv);
|
||||||
Expr * parseExprFromString(std::string_view s, const Path & basePath);
|
Expr * parseExprFromString(std::string s, const Path & basePath);
|
||||||
|
|
||||||
Expr * parseStdin();
|
Expr * parseStdin();
|
||||||
|
|
||||||
|
@ -308,7 +308,7 @@ private:
|
||||||
friend struct ExprAttrs;
|
friend struct ExprAttrs;
|
||||||
friend struct ExprLet;
|
friend struct ExprLet;
|
||||||
|
|
||||||
Expr * parse(const char * text, FileOrigin origin, const Path & path,
|
Expr * parse(char * text, size_t length, FileOrigin origin, const Path & path,
|
||||||
const Path & basePath, StaticEnv & staticEnv);
|
const Path & basePath, StaticEnv & staticEnv);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -64,29 +64,32 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// FIXME: optimize
|
// we make use of the fact that the parser receives a private copy of the input
|
||||||
static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length)
|
// string and can munge around in it.
|
||||||
|
static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
|
||||||
{
|
{
|
||||||
string t;
|
char * result = s;
|
||||||
t.reserve(length);
|
char * t = s;
|
||||||
char c;
|
char c;
|
||||||
|
// the input string is terminated with *two* NULs, so we can safely take
|
||||||
|
// *one* character after the one being checked against.
|
||||||
while ((c = *s++)) {
|
while ((c = *s++)) {
|
||||||
if (c == '\\') {
|
if (c == '\\') {
|
||||||
assert(*s);
|
|
||||||
c = *s++;
|
c = *s++;
|
||||||
if (c == 'n') t += '\n';
|
if (c == 'n') *t = '\n';
|
||||||
else if (c == 'r') t += '\r';
|
else if (c == 'r') *t = '\r';
|
||||||
else if (c == 't') t += '\t';
|
else if (c == 't') *t = '\t';
|
||||||
else t += c;
|
else *t = c;
|
||||||
}
|
}
|
||||||
else if (c == '\r') {
|
else if (c == '\r') {
|
||||||
/* Normalise CR and CR/LF into LF. */
|
/* Normalise CR and CR/LF into LF. */
|
||||||
t += '\n';
|
*t = '\n';
|
||||||
if (*s == '\n') s++; /* cr/lf */
|
if (*s == '\n') s++; /* cr/lf */
|
||||||
}
|
}
|
||||||
else t += c;
|
else *t = c;
|
||||||
|
t++;
|
||||||
}
|
}
|
||||||
return new ExprString(symbols.create(t));
|
return new ExprString(symbols.create({result, size_t(t - result)}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -139,7 +142,7 @@ or { return OR_KW; }
|
||||||
\/\/ { return UPDATE; }
|
\/\/ { return UPDATE; }
|
||||||
\+\+ { return CONCAT; }
|
\+\+ { return CONCAT; }
|
||||||
|
|
||||||
{ID} { yylval->id = strdup(yytext); return ID; }
|
{ID} { yylval->id = {yytext, (size_t) yyleng}; return ID; }
|
||||||
{INT} { errno = 0;
|
{INT} { errno = 0;
|
||||||
try {
|
try {
|
||||||
yylval->n = boost::lexical_cast<int64_t>(yytext);
|
yylval->n = boost::lexical_cast<int64_t>(yytext);
|
||||||
|
@ -221,14 +224,14 @@ or { return OR_KW; }
|
||||||
<PATH_START>{PATH_SEG} {
|
<PATH_START>{PATH_SEG} {
|
||||||
POP_STATE();
|
POP_STATE();
|
||||||
PUSH_STATE(INPATH_SLASH);
|
PUSH_STATE(INPATH_SLASH);
|
||||||
yylval->path = strdup(yytext);
|
yylval->path = {yytext, (size_t) yyleng};
|
||||||
return PATH;
|
return PATH;
|
||||||
}
|
}
|
||||||
|
|
||||||
<PATH_START>{HPATH_START} {
|
<PATH_START>{HPATH_START} {
|
||||||
POP_STATE();
|
POP_STATE();
|
||||||
PUSH_STATE(INPATH_SLASH);
|
PUSH_STATE(INPATH_SLASH);
|
||||||
yylval->path = strdup(yytext);
|
yylval->path = {yytext, (size_t) yyleng};
|
||||||
return HPATH;
|
return HPATH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -237,7 +240,7 @@ or { return OR_KW; }
|
||||||
PUSH_STATE(INPATH_SLASH);
|
PUSH_STATE(INPATH_SLASH);
|
||||||
else
|
else
|
||||||
PUSH_STATE(INPATH);
|
PUSH_STATE(INPATH);
|
||||||
yylval->path = strdup(yytext);
|
yylval->path = {yytext, (size_t) yyleng};
|
||||||
return PATH;
|
return PATH;
|
||||||
}
|
}
|
||||||
{HPATH} {
|
{HPATH} {
|
||||||
|
@ -245,7 +248,7 @@ or { return OR_KW; }
|
||||||
PUSH_STATE(INPATH_SLASH);
|
PUSH_STATE(INPATH_SLASH);
|
||||||
else
|
else
|
||||||
PUSH_STATE(INPATH);
|
PUSH_STATE(INPATH);
|
||||||
yylval->path = strdup(yytext);
|
yylval->path = {yytext, (size_t) yyleng};
|
||||||
return HPATH;
|
return HPATH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -280,8 +283,8 @@ or { return OR_KW; }
|
||||||
throw ParseError("path has a trailing slash");
|
throw ParseError("path has a trailing slash");
|
||||||
}
|
}
|
||||||
|
|
||||||
{SPATH} { yylval->path = strdup(yytext); return SPATH; }
|
{SPATH} { yylval->path = {yytext, (size_t) yyleng}; return SPATH; }
|
||||||
{URI} { yylval->uri = strdup(yytext); return URI; }
|
{URI} { yylval->uri = {yytext, (size_t) yyleng}; return URI; }
|
||||||
|
|
||||||
[ \t\r\n]+ /* eat up whitespace */
|
[ \t\r\n]+ /* eat up whitespace */
|
||||||
\#[^\r\n]* /* single-line comments */
|
\#[^\r\n]* /* single-line comments */
|
||||||
|
|
|
@ -473,7 +473,7 @@ string ExprLambda::showNamePos() const
|
||||||
size_t SymbolTable::totalSize() const
|
size_t SymbolTable::totalSize() const
|
||||||
{
|
{
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
for (auto & i : symbols)
|
for (auto & i : store)
|
||||||
n += i.size();
|
n += i.size();
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
|
@ -273,9 +273,16 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
|
||||||
nix::Formal * formal;
|
nix::Formal * formal;
|
||||||
nix::NixInt n;
|
nix::NixInt n;
|
||||||
nix::NixFloat nf;
|
nix::NixFloat nf;
|
||||||
const char * id; // !!! -> Symbol
|
// using C a struct allows us to avoid having to define the special
|
||||||
char * path;
|
// members that using string_view here would implicitly delete.
|
||||||
char * uri;
|
struct StringToken {
|
||||||
|
const char * p;
|
||||||
|
size_t l;
|
||||||
|
operator std::string_view() const { return {p, l}; }
|
||||||
|
};
|
||||||
|
StringToken id; // !!! -> Symbol
|
||||||
|
StringToken path;
|
||||||
|
StringToken uri;
|
||||||
std::vector<nix::AttrName> * attrNames;
|
std::vector<nix::AttrName> * attrNames;
|
||||||
std::vector<std::pair<nix::Pos, nix::Expr *> > * string_parts;
|
std::vector<std::pair<nix::Pos, nix::Expr *> > * string_parts;
|
||||||
}
|
}
|
||||||
|
@ -397,7 +404,7 @@ expr_select
|
||||||
|
|
||||||
expr_simple
|
expr_simple
|
||||||
: ID {
|
: ID {
|
||||||
if (strcmp($1, "__curPos") == 0)
|
if (strncmp($1.p, "__curPos", $1.l) == 0)
|
||||||
$$ = new ExprPos(CUR_POS);
|
$$ = new ExprPos(CUR_POS);
|
||||||
else
|
else
|
||||||
$$ = new ExprVar(CUR_POS, data->symbols.create($1));
|
$$ = new ExprVar(CUR_POS, data->symbols.create($1));
|
||||||
|
@ -414,7 +421,7 @@ expr_simple
|
||||||
$$ = new ExprConcatStrings(CUR_POS, false, $2);
|
$$ = new ExprConcatStrings(CUR_POS, false, $2);
|
||||||
}
|
}
|
||||||
| SPATH {
|
| SPATH {
|
||||||
string path($1 + 1, strlen($1) - 2);
|
string path($1.p + 1, $1.l - 2);
|
||||||
$$ = new ExprCall(CUR_POS,
|
$$ = new ExprCall(CUR_POS,
|
||||||
new ExprVar(data->symbols.create("__findFile")),
|
new ExprVar(data->symbols.create("__findFile")),
|
||||||
{new ExprVar(data->symbols.create("__nixPath")),
|
{new ExprVar(data->symbols.create("__nixPath")),
|
||||||
|
@ -460,14 +467,14 @@ string_parts_interpolated
|
||||||
|
|
||||||
path_start
|
path_start
|
||||||
: PATH {
|
: PATH {
|
||||||
Path path(absPath($1, data->basePath));
|
Path path(absPath({$1.p, $1.l}, data->basePath));
|
||||||
/* add back in the trailing '/' to the first segment */
|
/* add back in the trailing '/' to the first segment */
|
||||||
if ($1[strlen($1)-1] == '/' && strlen($1) > 1)
|
if ($1.p[$1.l-1] == '/' && $1.l > 1)
|
||||||
path += "/";
|
path += "/";
|
||||||
$$ = new ExprPath(path);
|
$$ = new ExprPath(path);
|
||||||
}
|
}
|
||||||
| HPATH {
|
| HPATH {
|
||||||
Path path(getHome() + string($1 + 1));
|
Path path(getHome() + string($1.p + 1, $1.l - 1));
|
||||||
$$ = new ExprPath(path);
|
$$ = new ExprPath(path);
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
@ -543,7 +550,7 @@ attrpath
|
||||||
|
|
||||||
attr
|
attr
|
||||||
: ID { $$ = $1; }
|
: ID { $$ = $1; }
|
||||||
| OR_KW { $$ = "or"; }
|
| OR_KW { $$ = {"or", 2}; }
|
||||||
;
|
;
|
||||||
|
|
||||||
string_attr
|
string_attr
|
||||||
|
@ -589,7 +596,7 @@ formal
|
||||||
namespace nix {
|
namespace nix {
|
||||||
|
|
||||||
|
|
||||||
Expr * EvalState::parse(const char * text, FileOrigin origin,
|
Expr * EvalState::parse(char * text, size_t length, FileOrigin origin,
|
||||||
const Path & path, const Path & basePath, StaticEnv & staticEnv)
|
const Path & path, const Path & basePath, StaticEnv & staticEnv)
|
||||||
{
|
{
|
||||||
yyscan_t scanner;
|
yyscan_t scanner;
|
||||||
|
@ -609,7 +616,7 @@ Expr * EvalState::parse(const char * text, FileOrigin origin,
|
||||||
data.basePath = basePath;
|
data.basePath = basePath;
|
||||||
|
|
||||||
yylex_init(&scanner);
|
yylex_init(&scanner);
|
||||||
yy_scan_string(text, scanner);
|
yy_scan_buffer(text, length, scanner);
|
||||||
int res = yyparse(scanner, &data);
|
int res = yyparse(scanner, &data);
|
||||||
yylex_destroy(scanner);
|
yylex_destroy(scanner);
|
||||||
|
|
||||||
|
@ -655,26 +662,33 @@ Expr * EvalState::parseExprFromFile(const Path & path)
|
||||||
|
|
||||||
Expr * EvalState::parseExprFromFile(const Path & path, StaticEnv & staticEnv)
|
Expr * EvalState::parseExprFromFile(const Path & path, StaticEnv & staticEnv)
|
||||||
{
|
{
|
||||||
return parse(readFile(path).c_str(), foFile, path, dirOf(path), staticEnv);
|
auto buffer = readFile(path);
|
||||||
|
// readFile should have left some extra space for terminators
|
||||||
|
buffer.append("\0\0", 2);
|
||||||
|
return parse(buffer.data(), buffer.size(), foFile, path, dirOf(path), staticEnv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Expr * EvalState::parseExprFromString(std::string_view s, const Path & basePath, StaticEnv & staticEnv)
|
Expr * EvalState::parseExprFromString(std::string s, const Path & basePath, StaticEnv & staticEnv)
|
||||||
{
|
{
|
||||||
return parse(s.data(), foString, "", basePath, staticEnv);
|
s.append("\0\0", 2);
|
||||||
|
return parse(s.data(), s.size(), foString, "", basePath, staticEnv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Expr * EvalState::parseExprFromString(std::string_view s, const Path & basePath)
|
Expr * EvalState::parseExprFromString(std::string s, const Path & basePath)
|
||||||
{
|
{
|
||||||
return parseExprFromString(s, basePath, staticBaseEnv);
|
return parseExprFromString(std::move(s), basePath, staticBaseEnv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Expr * EvalState::parseStdin()
|
Expr * EvalState::parseStdin()
|
||||||
{
|
{
|
||||||
//Activity act(*logger, lvlTalkative, format("parsing standard input"));
|
//Activity act(*logger, lvlTalkative, format("parsing standard input"));
|
||||||
return parse(drainFD(0).data(), foStdin, "", absPath("."), staticBaseEnv);
|
auto buffer = drainFD(0);
|
||||||
|
// drainFD should have left some extra space for terminators
|
||||||
|
buffer.append("\0\0", 2);
|
||||||
|
return parse(buffer.data(), buffer.size(), foStdin, "", absPath("."), staticBaseEnv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -378,7 +378,7 @@ void prim_exec(EvalState & state, const Pos & pos, Value * * args, Value & v)
|
||||||
auto output = runProgram(program, true, commandArgs);
|
auto output = runProgram(program, true, commandArgs);
|
||||||
Expr * parsed;
|
Expr * parsed;
|
||||||
try {
|
try {
|
||||||
parsed = state.parseExprFromString(output, pos.file);
|
parsed = state.parseExprFromString(std::move(output), pos.file);
|
||||||
} catch (Error & e) {
|
} catch (Error & e) {
|
||||||
e.addTrace(pos, "While parsing the output from '%1%'", program);
|
e.addTrace(pos, "While parsing the output from '%1%'", program);
|
||||||
throw;
|
throw;
|
||||||
|
@ -3915,9 +3915,12 @@ void EvalState::createBaseEnv()
|
||||||
|
|
||||||
/* Note: we have to initialize the 'derivation' constant *after*
|
/* Note: we have to initialize the 'derivation' constant *after*
|
||||||
building baseEnv/staticBaseEnv because it uses 'builtins'. */
|
building baseEnv/staticBaseEnv because it uses 'builtins'. */
|
||||||
eval(parse(
|
char code[] =
|
||||||
#include "primops/derivation.nix.gen.hh"
|
#include "primops/derivation.nix.gen.hh"
|
||||||
, foFile, sDerivationNix, "/", staticBaseEnv), *vDerivation);
|
// the parser needs two NUL bytes as terminators; one of them
|
||||||
|
// is implied by being a C string.
|
||||||
|
"\0";
|
||||||
|
eval(parse(code, sizeof(code), foFile, sDerivationNix, "/", staticBaseEnv), *vDerivation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <list>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <unordered_set>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "types.hh"
|
#include "types.hh"
|
||||||
|
|
||||||
|
@ -70,15 +71,21 @@ public:
|
||||||
class SymbolTable
|
class SymbolTable
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
typedef std::unordered_set<string> Symbols;
|
std::unordered_map<std::string_view, Symbol> symbols;
|
||||||
Symbols symbols;
|
std::list<string> store;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Symbol create(std::string_view s)
|
Symbol create(std::string_view s)
|
||||||
{
|
{
|
||||||
// FIXME: avoid allocation if 's' already exists in the symbol table.
|
// Most symbols are looked up more than once, so we trade off insertion performance
|
||||||
std::pair<Symbols::iterator, bool> res = symbols.emplace(std::string(s));
|
// for lookup performance.
|
||||||
return Symbol(&*res.first);
|
// TODO: could probably be done more efficiently with transparent Hash and Equals
|
||||||
|
// on the original implementation using unordered_set
|
||||||
|
auto it = symbols.find(s);
|
||||||
|
if (it != symbols.end()) return it->second;
|
||||||
|
|
||||||
|
const string & rawSym = store.emplace_back(s);
|
||||||
|
return symbols.emplace(rawSym, Symbol(&rawSym)).first->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t size() const
|
size_t size() const
|
||||||
|
@ -91,7 +98,7 @@ public:
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void dump(T callback)
|
void dump(T callback)
|
||||||
{
|
{
|
||||||
for (auto & s : symbols)
|
for (auto & s : store)
|
||||||
callback(s);
|
callback(s);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -668,7 +668,9 @@ void writeFull(int fd, std::string_view s, bool allowInterrupts)
|
||||||
|
|
||||||
string drainFD(int fd, bool block, const size_t reserveSize)
|
string drainFD(int fd, bool block, const size_t reserveSize)
|
||||||
{
|
{
|
||||||
StringSink sink(reserveSize);
|
// the parser needs two extra bytes to append terminating characters, other users will
|
||||||
|
// not care very much about the extra memory.
|
||||||
|
StringSink sink(reserveSize + 2);
|
||||||
drainFD(fd, sink, block);
|
drainFD(fd, sink, block);
|
||||||
return std::move(*sink.s);
|
return std::move(*sink.s);
|
||||||
}
|
}
|
||||||
|
|
|
@ -292,7 +292,7 @@ static void main_nix_build(int argc, char * * argv)
|
||||||
else
|
else
|
||||||
for (auto i : left) {
|
for (auto i : left) {
|
||||||
if (fromArgs)
|
if (fromArgs)
|
||||||
exprs.push_back(state->parseExprFromString(i, absPath(".")));
|
exprs.push_back(state->parseExprFromString(std::move(i), absPath(".")));
|
||||||
else {
|
else {
|
||||||
auto absolute = i;
|
auto absolute = i;
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -703,7 +703,7 @@ void NixRepl::addVarToScope(const Symbol & name, Value & v)
|
||||||
|
|
||||||
Expr * NixRepl::parseString(string s)
|
Expr * NixRepl::parseString(string s)
|
||||||
{
|
{
|
||||||
Expr * e = state->parseExprFromString(s, curDir, staticEnv);
|
Expr * e = state->parseExprFromString(std::move(s), curDir, staticEnv);
|
||||||
return e;
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue