forked from lix-project/lix
81e7c40264
We now parse function applications as a vector of arguments rather than as a chain of binary applications, e.g. 'substring 1 2 "foo"' is parsed as ExprCall { .fun = <substring>, .args = [ <1>, <2>, <"foo"> ] } rather than ExprApp (ExprApp (ExprApp <substring> <1>) <2>) <"foo"> This allows primops to be called immediately (if enough arguments are supplied) without having to allocate intermediate tPrimOpApp values. On $ nix-instantiate --dry-run '<nixpkgs/nixos/release-combined.nix>' -A nixos.tests.simple.x86_64-linux this gives a substantial performance improvement: user CPU time: median = 0.9209 mean = 0.9218 stddev = 0.0073 min = 0.9086 max = 0.9340 [rejected, p=0.00000, Δ=-0.21433±0.00677] elapsed time: median = 1.0585 mean = 1.0584 stddev = 0.0024 min = 1.0523 max = 1.0623 [rejected, p=0.00000, Δ=-0.20594±0.00236] because it reduces the number of tPrimOpApp allocations from 551990 to 42534 (i.e. only small minority of primop calls are partially applied) which in turn reduces time spent in the garbage collector.
296 lines
7.7 KiB
Text
296 lines
7.7 KiB
Text
%option reentrant bison-bridge bison-locations
|
|
%option noyywrap
|
|
%option never-interactive
|
|
%option stack
|
|
%option nodefault
|
|
%option nounput noyy_top_state
|
|
|
|
|
|
%s DEFAULT
|
|
%x STRING
|
|
%x IND_STRING
|
|
%x INPATH
|
|
%x INPATH_SLASH
|
|
%x PATH_START
|
|
|
|
|
|
%{
|
|
#ifdef __clang__
|
|
#pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
|
|
#endif
|
|
|
|
#include <boost/lexical_cast.hpp>
|
|
|
|
#include "nixexpr.hh"
|
|
#include "parser-tab.hh"
|
|
|
|
using namespace nix;
|
|
|
|
namespace nix {
|
|
|
|
// backup to recover from yyless(0)
|
|
YYLTYPE prev_yylloc;
|
|
|
|
static void initLoc(YYLTYPE * loc)
|
|
{
|
|
loc->first_line = loc->last_line = 1;
|
|
loc->first_column = loc->last_column = 1;
|
|
}
|
|
|
|
|
|
static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
|
|
{
|
|
prev_yylloc = *loc;
|
|
|
|
loc->first_line = loc->last_line;
|
|
loc->first_column = loc->last_column;
|
|
|
|
for (size_t i = 0; i < len; i++) {
|
|
switch (*s++) {
|
|
case '\r':
|
|
if (*s == '\n') { /* cr/lf */
|
|
i++;
|
|
s++;
|
|
}
|
|
/* fall through */
|
|
case '\n':
|
|
++loc->last_line;
|
|
loc->last_column = 1;
|
|
break;
|
|
default:
|
|
++loc->last_column;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// FIXME: optimize
|
|
static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length)
|
|
{
|
|
string t;
|
|
t.reserve(length);
|
|
char c;
|
|
while ((c = *s++)) {
|
|
if (c == '\\') {
|
|
assert(*s);
|
|
c = *s++;
|
|
if (c == 'n') t += '\n';
|
|
else if (c == 'r') t += '\r';
|
|
else if (c == 't') t += '\t';
|
|
else t += c;
|
|
}
|
|
else if (c == '\r') {
|
|
/* Normalise CR and CR/LF into LF. */
|
|
t += '\n';
|
|
if (*s == '\n') s++; /* cr/lf */
|
|
}
|
|
else t += c;
|
|
}
|
|
return new ExprString(symbols.create(t));
|
|
}
|
|
|
|
|
|
}
|
|
|
|
#define YY_USER_INIT initLoc(yylloc)
|
|
#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
|
|
|
|
#define PUSH_STATE(state) yy_push_state(state, yyscanner)
|
|
#define POP_STATE() yy_pop_state(yyscanner)
|
|
|
|
%}
|
|
|
|
|
|
ANY .|\n
|
|
ID [a-zA-Z\_][a-zA-Z0-9\_\'\-]*
|
|
INT [0-9]+
|
|
FLOAT (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?
|
|
PATH_CHAR [a-zA-Z0-9\.\_\-\+]
|
|
PATH {PATH_CHAR}*(\/{PATH_CHAR}+)+\/?
|
|
PATH_SEG {PATH_CHAR}*\/
|
|
HPATH \~(\/{PATH_CHAR}+)+\/?
|
|
HPATH_START \~\/
|
|
SPATH \<{PATH_CHAR}+(\/{PATH_CHAR}+)*\>
|
|
URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+
|
|
|
|
|
|
%%
|
|
|
|
|
|
if { return IF; }
|
|
then { return THEN; }
|
|
else { return ELSE; }
|
|
assert { return ASSERT; }
|
|
with { return WITH; }
|
|
let { return LET; }
|
|
in { return IN; }
|
|
rec { return REC; }
|
|
inherit { return INHERIT; }
|
|
or { return OR_KW; }
|
|
\.\.\. { return ELLIPSIS; }
|
|
|
|
\=\= { return EQ; }
|
|
\!\= { return NEQ; }
|
|
\<\= { return LEQ; }
|
|
\>\= { return GEQ; }
|
|
\&\& { return AND; }
|
|
\|\| { return OR; }
|
|
\-\> { return IMPL; }
|
|
\/\/ { return UPDATE; }
|
|
\+\+ { return CONCAT; }
|
|
|
|
{ID} { yylval->id = strdup(yytext); return ID; }
|
|
{INT} { errno = 0;
|
|
try {
|
|
yylval->n = boost::lexical_cast<int64_t>(yytext);
|
|
} catch (const boost::bad_lexical_cast &) {
|
|
throw ParseError("invalid integer '%1%'", yytext);
|
|
}
|
|
return INT;
|
|
}
|
|
{FLOAT} { errno = 0;
|
|
yylval->nf = strtod(yytext, 0);
|
|
if (errno != 0)
|
|
throw ParseError("invalid float '%1%'", yytext);
|
|
return FLOAT;
|
|
}
|
|
|
|
\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
|
|
|
\} { /* State INITIAL only exists at the bottom of the stack and is
|
|
used as a marker. DEFAULT replaces it everywhere else.
|
|
Popping when in INITIAL state causes an empty stack exception,
|
|
so don't */
|
|
if (YYSTATE != INITIAL)
|
|
POP_STATE();
|
|
return '}';
|
|
}
|
|
\{ { PUSH_STATE(DEFAULT); return '{'; }
|
|
|
|
\" { PUSH_STATE(STRING); return '"'; }
|
|
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})*\$/\" |
|
|
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})+ {
|
|
/* It is impossible to match strings ending with '$' with one
|
|
regex because trailing contexts are only valid at the end
|
|
of a rule. (A sane but undocumented limitation.) */
|
|
yylval->e = unescapeStr(data->symbols, yytext, yyleng);
|
|
return STR;
|
|
}
|
|
<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
|
<STRING>\" { POP_STATE(); return '"'; }
|
|
<STRING>\$|\\|\$\\ {
|
|
/* This can only occur when we reach EOF, otherwise the above
|
|
(...|\$[^\{\"\\]|\\.|\$\\.)+ would have triggered.
|
|
This is technically invalid, but we leave the problem to the
|
|
parser who fails with exact location. */
|
|
return STR;
|
|
}
|
|
|
|
\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
|
|
<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
|
|
yylval->e = new ExprIndStr(yytext);
|
|
return IND_STR;
|
|
}
|
|
<IND_STRING>\'\'\$ |
|
|
<IND_STRING>\$ {
|
|
yylval->e = new ExprIndStr("$");
|
|
return IND_STR;
|
|
}
|
|
<IND_STRING>\'\'\' {
|
|
yylval->e = new ExprIndStr("''");
|
|
return IND_STR;
|
|
}
|
|
<IND_STRING>\'\'\\{ANY} {
|
|
yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
|
|
return IND_STR;
|
|
}
|
|
<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
|
|
<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
|
|
<IND_STRING>\' {
|
|
yylval->e = new ExprIndStr("'");
|
|
return IND_STR;
|
|
}
|
|
|
|
{PATH_SEG}\$\{ |
|
|
{HPATH_START}\$\{ {
|
|
PUSH_STATE(PATH_START);
|
|
yyless(0);
|
|
*yylloc = prev_yylloc;
|
|
}
|
|
|
|
<PATH_START>{PATH_SEG} {
|
|
POP_STATE();
|
|
PUSH_STATE(INPATH_SLASH);
|
|
yylval->path = strdup(yytext);
|
|
return PATH;
|
|
}
|
|
|
|
<PATH_START>{HPATH_START} {
|
|
POP_STATE();
|
|
PUSH_STATE(INPATH_SLASH);
|
|
yylval->path = strdup(yytext);
|
|
return HPATH;
|
|
}
|
|
|
|
{PATH} {
|
|
if (yytext[yyleng-1] == '/')
|
|
PUSH_STATE(INPATH_SLASH);
|
|
else
|
|
PUSH_STATE(INPATH);
|
|
yylval->path = strdup(yytext);
|
|
return PATH;
|
|
}
|
|
{HPATH} {
|
|
if (yytext[yyleng-1] == '/')
|
|
PUSH_STATE(INPATH_SLASH);
|
|
else
|
|
PUSH_STATE(INPATH);
|
|
yylval->path = strdup(yytext);
|
|
return HPATH;
|
|
}
|
|
|
|
<INPATH,INPATH_SLASH>\$\{ {
|
|
POP_STATE();
|
|
PUSH_STATE(INPATH);
|
|
PUSH_STATE(DEFAULT);
|
|
return DOLLAR_CURLY;
|
|
}
|
|
<INPATH,INPATH_SLASH>{PATH}|{PATH_SEG}|{PATH_CHAR}+ {
|
|
POP_STATE();
|
|
if (yytext[yyleng-1] == '/')
|
|
PUSH_STATE(INPATH_SLASH);
|
|
else
|
|
PUSH_STATE(INPATH);
|
|
yylval->e = new ExprString(data->symbols.create(string(yytext)));
|
|
return STR;
|
|
}
|
|
<INPATH>{ANY} |
|
|
<INPATH><<EOF>> {
|
|
/* if we encounter a non-path character we inform the parser that the path has
|
|
ended with a PATH_END token and re-parse this character in the default
|
|
context (it may be ')', ';', or something of that sort) */
|
|
POP_STATE();
|
|
yyless(0);
|
|
*yylloc = prev_yylloc;
|
|
return PATH_END;
|
|
}
|
|
|
|
<INPATH_SLASH>{ANY} |
|
|
<INPATH_SLASH><<EOF>> {
|
|
throw ParseError("path has a trailing slash");
|
|
}
|
|
|
|
{SPATH} { yylval->path = strdup(yytext); return SPATH; }
|
|
{URI} { yylval->uri = strdup(yytext); return URI; }
|
|
|
|
[ \t\r\n]+ /* eat up whitespace */
|
|
\#[^\r\n]* /* single-line comments */
|
|
\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */
|
|
|
|
{ANY} {
|
|
/* Don't return a negative number, as this will cause
|
|
Bison to stop parsing without an error. */
|
|
return (unsigned char) yytext[0];
|
|
}
|
|
|
|
%%
|