* Replaced the SDF parser by a substantially faster Bison/Flex

parser (roughly 80x faster).

  The absolutely latest version of Bison (1.875c) is required for
  reentrant GLR support, as well as a recent version of Flex (say,
  2.5.31).  Note that most Unix distributions ship with the
  prehistoric Flex 2.5.4, which doesn't support reentrancy.
This commit is contained in:
Eelco Dolstra 2004-01-30 15:21:42 +00:00
parent abd1878b26
commit c5baaafae6
6 changed files with 261 additions and 252 deletions

View file

@ -1,20 +1,22 @@
noinst_LIBRARIES = libexpr.a noinst_LIBRARIES = libexpr.a
libexpr_a_SOURCES = nixexpr.cc nixexpr.hh parser.cc parser.hh \ libexpr_a_SOURCES = nixexpr.cc nixexpr.hh parser.cc parser.hh \
eval.cc eval.hh primops.cc primops.hh nix.sdf eval.cc eval.hh primops.cc primops.hh \
lexer-tab.c lexer-tab.h parser-tab.c parser-tab.h
AM_CXXFLAGS = \ AM_CXXFLAGS = \
-I.. -I../../externals/inst/include -I../libutil -I../libstore -I.. -I../../externals/inst/include -I../libutil -I../libstore
AM_CFLAGS = \
-I../../externals/inst/include
# Parse table generation. # Parser generation.
parser.o: parse-table.h parser-tab.c parser-tab.h: parser.y
../grammartest/inst/bin/bison -v -o parser-tab.c parser.y -d
parse-table.h: nix.tbl lexer-tab.c lexer-tab.h: lexer.l
../bin2c/bin2c nixParseTable < $< > $@ || (rm $@ && exit 1) flex --outfile lexer-tab.c --header-file=lexer-tab.h lexer.l
%.tbl: %.sdf
../../externals/inst/bin/sdf2table -s -i $< -o $@
CLEANFILES = parse-table.h nix.tbl CLEANFILES =

View file

@ -137,6 +137,8 @@ Expr evalExpr2(EvalState & state, Expr e)
/* Any encountered variables must be undeclared or primops. */ /* Any encountered variables must be undeclared or primops. */
if (atMatch(m, e) >> "Var" >> s1) { if (atMatch(m, e) >> "Var" >> s1) {
if (s1 == "null") return primNull(state); if (s1 == "null") return primNull(state);
if (s1 == "true") return ATmake("Bool(True)");
if (s1 == "false") return ATmake("Bool(False)");
return e; return e;
} }

78
src/libexpr/lexer.l Normal file
View file

@ -0,0 +1,78 @@
%option reentrant bison-bridge bison-locations
%option noyywrap
%option never-interactive
%{
#include <string.h>
#include <aterm2.h>
#include "parser-tab.h"
static void initLoc(YYLTYPE * loc)
{
loc->first_line = 1;
loc->first_column = 1;
}
static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
{
while (len--) {
switch (*s++) {
case '\n':
++loc->first_line;
loc->first_column = 1;
break;
default:
++loc->first_column;
}
}
}
#define YY_USER_INIT initLoc(yylloc)
#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
%}
ID [a-zA-Z\_][a-zA-Z0-9\_\']*
INT [0-9]+
STR \"[^\n\"]*\"
PATH [a-zA-Z0-9\.\_\-\+]*(\/[a-zA-Z0-9\.\_\-\+]+)+
URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']*
%%
if { return IF; }
then { return THEN; }
else { return ELSE; }
assert { return ASSERT; }
let { return LET; }
rec { return REC; }
\=\= { return EQ; }
\!\= { return NEQ; }
\&\& { return AND; }
\|\| { return OR; }
\-\> { return IMPL; }
{ID} { yylval->t = ATmake("<str>", yytext); return ID; /* !!! alloc */ }
{INT} { return INT; }
{STR} { int len = strlen(yytext);
yytext[len - 1] = 0;
yylval->t = ATmake("<str>", yytext + 1);
yytext[len - 1] = '\"';
return STR; /* !!! alloc */
}
{PATH} { yylval->t = ATmake("<str>", yytext); return PATH; /* !!! alloc */ }
{URI} { yylval->t = ATmake("<str>", yytext); return URI; /* !!! alloc */ }
[ \t\n]+ /* eat up whitespace */
\#[^\n]* /* single-line comments */
\/\*(.|\n)*\*\/ /* long comments */
. return yytext[0];
%%

View file

@ -1,131 +0,0 @@
definition
module Main
imports Fix
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Top level syntax.
module Fix
imports Fix-Exprs Fix-Layout
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Expressions.
module Fix-Exprs
imports Fix-Lexicals
exports
sorts Expr Formal Bind Binds ExprList
context-free syntax
Id -> Expr {cons("Var")}
Int -> Expr {cons("Int")}
Str -> Expr {cons("Str")}
Uri -> Expr {cons("Uri")}
Path -> Expr {cons("Path")}
"(" Expr ")" -> Expr {bracket}
Expr Expr -> Expr {cons("Call"), left}
"{" {Formal ","}* "}" ":" Expr -> Expr {cons("Function")}
Id -> Formal {cons("NoDefFormal")}
Id "?" Expr -> Formal {cons("DefFormal")}
"assert" Expr ";" Expr -> Expr {cons("Assert")}
"rec" "{" Binds "}" -> Expr {cons("Rec")}
"let" "{" Binds "}" -> Expr {cons("LetRec")}
"{" Binds "}" -> Expr {cons("Attrs")}
Bind* -> Binds
Id "=" Expr ";" -> Bind {cons("Bind")}
"[" ExprList "]" -> Expr {cons("List")}
"" -> ExprList {cons("ExprNil")}
Expr ExprList -> ExprList {cons("ExprCons")}
Expr "." Id -> Expr {cons("Select")}
"if" Expr "then" Expr "else" Expr -> Expr {cons("If")}
Expr "==" Expr -> Expr {cons("OpEq"), non-assoc}
Expr "!=" Expr -> Expr {cons("OpNEq"), non-assoc}
"!" Expr -> Expr {cons("OpNot")}
Expr "&&" Expr -> Expr {cons("OpAnd"), right}
Expr "||" Expr -> Expr {cons("OpOr"), right}
Expr "->" Expr -> Expr {cons("OpImpl"), right}
context-free priorities
Expr "." Id -> Expr
> Expr ExprList -> ExprList
> Expr Expr -> Expr
> "!" Expr -> Expr
> Expr "==" Expr -> Expr
> Expr "!=" Expr -> Expr
> Expr "&&" Expr -> Expr
> Expr "||" Expr -> Expr
> Expr "->" Expr -> Expr
> "assert" Expr ";" Expr -> Expr
> "{" {Formal ","}* "}" ":" Expr -> Expr
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Lexical syntax.
module Fix-Lexicals
exports
sorts Id Int Str Path PathComp Uri
lexical syntax
[a-zA-Z\_][a-zA-Z0-9\_\']* -> Id
"rec" -> Id {reject}
"let" -> Id {reject}
"if" -> Id {reject}
"then" -> Id {reject}
"else" -> Id {reject}
"assert" -> Id {reject}
[0-9]+ -> Int
"\"" ~[\n\"]* "\"" -> Str
"." ("/" PathComp)+ -> Path
".." ("/" PathComp)+ -> Path
("/" PathComp)+ -> Path
[a-zA-Z0-9\.\_\-\+]+ -> PathComp
[a-zA-Z] [a-zA-Z0-9\+\-\.]* ":" [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']* -> Uri
lexical restrictions
Id -/- [a-zA-Z0-9\_\']
Int -/- [0-9]
Path -/- [a-zA-Z0-9\.\_\-\+\/]
Uri -/- [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Layout.
module Fix-Layout
exports
sorts HashComment Asterisk Comment
lexical syntax
[\ \t\n] -> LAYOUT
HashComment -> LAYOUT
Comment -> LAYOUT
"#" ~[\n]* -> HashComment
"/*" ( ~[\*] | Asterisk )* "*/" -> Comment
[\*] -> Asterisk
lexical restrictions
Asterisk -/- [\/]
HashComment -/- ~[\n]
context-free restrictions
LAYOUT? -/- [\ \t\n]

View file

@ -5,133 +5,63 @@
#include <fcntl.h> #include <fcntl.h>
#include <unistd.h> #include <unistd.h>
extern "C" {
#include <sglr.h>
#include <asfix2.h>
}
#include "aterm.hh" #include "aterm.hh"
#include "parser.hh" #include "parser.hh"
#include "parse-table.h"
/* Cleanup cleans up an imploded parse tree into an actual abstract struct ParseData
syntax tree that we can evaluate. It removes quotes around
strings, converts integer literals into actual integers, and
absolutises paths relative to the directory containing the input
file. */
struct Cleanup : TermFun
{ {
Expr result;
string basePath; string basePath;
string location;
virtual ATerm operator () (ATerm e) string error;
{
checkInterrupt();
ATMatcher m;
string s;
if (atMatch(m, e) >> "Str" >> s)
return ATmake("Str(<str>)",
string(s, 1, s.size() - 2).c_str());
if (atMatch(m, e) >> "Path" >> s)
return ATmake("Path(<str>)", absPath(s, basePath).c_str());
if (atMatch(m, e) >> "Int" >> s) {
istringstream s2(s);
int n;
s2 >> n;
return ATmake("Int(<int>)", n);
}
if (atMatch(m, e) >> "Var" >> "true")
return ATmake("Bool(True)");
if (atMatch(m, e) >> "Var" >> "false")
return ATmake("Bool(False)");
if (atMatch(m, e) >> "ExprNil")
return (ATerm) ATempty;
ATerm e1;
ATermList e2;
if (atMatch(m, e) >> "ExprCons" >> e1 >> e2)
return (ATerm) ATinsert(e2, e1);
return e;
}
}; };
extern "C" {
#include "parser-tab.h"
#include "lexer-tab.h"
/* Callbacks for getting from C to C++. Due to a (small) bug in the
GLR code of Bison we cannot currently compile the parser as C++
code. */
void setParseResult(ParseData * data, ATerm t)
{
data->result = t;
}
ATerm absParsedPath(ParseData * data, ATerm t)
{
return string2ATerm(absPath(aterm2String(t), data->basePath).c_str());
}
void parseError(ParseData * data, char * error, int line, int column)
{
data->error = (format("%1%, at line %2%, column %3%, of %4%")
% error % line % column % data->location).str();
}
int yyparse(yyscan_t scanner, ParseData * data);
}
static Expr parse(const char * text, const string & location, static Expr parse(const char * text, const string & location,
const Path & basePath) const Path & basePath)
{ {
/* Initialise the SDF libraries. */ yyscan_t scanner;
static bool initialised = false; ParseData data;
static ATerm parseTable = 0; data.basePath = basePath;
static language lang = 0; data.location = location;
if (!initialised) { yylex_init(&scanner);
PT_initMEPTApi(); yy_scan_string(text, scanner);
PT_initAsFix2Api(); int res = yyparse(scanner, &data);
SGinitParser(ATfalse); yylex_destroy(scanner);
ATprotect(&parseTable); if (res) throw Error(data.error);
parseTable = ATreadFromBinaryString(
(char *) nixParseTable, sizeof nixParseTable);
if (!parseTable)
throw Error(format("cannot construct parse table term"));
ATprotect(&lang); return data.result;
lang = ATmake("Nix");
if (!SGopenLanguageFromTerm("nix-parse", lang, parseTable))
throw Error(format("cannot open language"));
SG_STARTSYMBOL_ON();
SG_OUTPUT_ON();
SG_ASFIX2ME_ON();
SG_AMBIGUITY_ERROR_ON();
SG_FILTER_OFF();
initialised = true;
}
/* Parse it. */
ATerm result = SGparseString(lang, "Expr", (char *) text);
if (!result)
throw SysError(format("parse failed in `%1%'") % location);
if (SGisParseError(result))
throw Error(format("parse error in `%1%': %2%")
% location % result);
/* Implode it. */
PT_ParseTree tree = PT_makeParseTreeFromTerm(result);
if (!tree)
throw Error(format("cannot create parse tree"));
ATerm imploded = PT_implodeParseTree(tree,
ATtrue,
ATtrue,
ATtrue,
ATtrue,
ATtrue,
ATtrue,
ATfalse,
ATtrue,
ATtrue,
ATtrue,
ATfalse);
if (!imploded)
throw Error(format("cannot implode parse tree"));
printMsg(lvlVomit, format("imploded parse tree of `%1%': %2%")
% location % imploded);
/* Finally, clean it up. */
Cleanup cleanup;
cleanup.basePath = basePath;
return bottomupRewrite(cleanup, imploded);
} }
@ -171,7 +101,7 @@ Expr parseExprFromFile(Path path)
readFull(fd, (unsigned char *) text, st.st_size); readFull(fd, (unsigned char *) text, st.st_size);
text[st.st_size] = 0; text[st.st_size] = 0;
return parse(text, path, dirOf(path)); return parse(text, "`" + path + "'", dirOf(path));
} }

128
src/libexpr/parser.y Normal file
View file

@ -0,0 +1,128 @@
%glr-parser
%pure-parser
%locations
%error-verbose
%parse-param { yyscan_t scanner }
%parse-param { void * data }
%lex-param { yyscan_t scanner }
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <aterm2.h>
#include "parser-tab.h"
#include "lexer-tab.h"
void setParseResult(void * data, ATerm t);
void parseError(void * data, char * error, int line, int column);
ATerm absParsedPath(void * data, ATerm t);
void yyerror(YYLTYPE * loc, yyscan_t scanner, void * data, char * s)
{
parseError(data, s, loc->first_line, loc->first_column);
}
%}
%union {
ATerm t;
ATermList ts;
}
%type <t> start expr expr_function expr_assert expr_op
%type <t> expr_app expr_select expr_simple bind formal
%type <ts> binds expr_list formals
%token <t> ID INT STR PATH URI
%token IF THEN ELSE ASSERT LET REC EQ NEQ AND OR IMPL
%nonassoc IMPL
%left OR
%left AND
%nonassoc EQ NEQ
%left NEG
%%
start: expr { setParseResult(data, $1); };
expr: expr_function;
expr_function
: '{' formals '}' ':' expr_function
{ $$ = ATmake("Function(<term>, <term>)", $2, $5); }
| expr_assert
;
expr_assert
: ASSERT expr ';' expr_assert
{ $$ = ATmake("Assert(<term>, <term>)", $2, $4); }
| expr_op
;
expr_op
: '!' expr_op %prec NEG { $$ = ATmake("OpNot(<term>)", $2); }
| expr_op EQ expr_op { $$ = ATmake("OpEq(<term>, <term>)", $1, $3); }
| expr_op NEQ expr_op { $$ = ATmake("OpNEq(<term>, <term>)", $1, $3); }
| expr_op AND expr_op { $$ = ATmake("OpAnd(<term>, <term>)", $1, $3); }
| expr_op OR expr_op { $$ = ATmake("OpOr(<term>, <term>)", $1, $3); }
| expr_op IMPL expr_op { $$ = ATmake("OpImpl(<term>, <term>)", $1, $3); }
| expr_app
;
expr_app
: expr_app expr_select
{ $$ = ATmake("Call(<term>, <term>)", $1, $2); }
| expr_select { $$ = $1; }
;
expr_select
: expr_select '.' ID
{ $$ = ATmake("Select(<term>, <term>)", $1, $3); }
| expr_simple { $$ = $1; }
;
expr_simple
: ID { $$ = ATmake("Var(<term>)", $1); }
| STR { $$ = ATmake("Str(<term>)", $1); }
| PATH { $$ = ATmake("Path(<term>)", absParsedPath(data, $1)); }
| URI { $$ = ATmake("Uri(<term>)", $1); }
| '(' expr ')' { $$ = $2; }
| LET '{' binds '}' { $$ = ATmake("LetRec(<term>)", $3); }
| REC '{' binds '}' { $$ = ATmake("Rec(<term>)", $3); }
| '{' binds '}' { $$ = ATmake("Attrs(<term>)", $2); }
| '[' expr_list ']' { $$ = ATmake("List(<term>)", $2); }
| IF expr THEN expr ELSE expr
{ $$ = ATmake("If(<term>, <term>, <term>)", $2, $4, $6); }
;
binds
: binds bind { $$ = ATinsert($1, $2); }
| { $$ = ATempty; }
;
bind
: ID '=' expr ';'
{ $$ = ATmake("Bind(<term>, <term>)", $1, $3); }
;
expr_list
: expr_select expr_list { $$ = ATinsert($2, $1); }
/* yes, this is right-recursive, but it doesn't matter since
otherwise we would need ATreverse which requires unbounded
stack space */
| { $$ = ATempty; }
;
formals
: formal ',' formals { $$ = ATinsert($3, $1); } /* idem - right recursive */
| formal { $$ = ATinsert(ATempty, $1); }
;
formal
: ID { $$ = ATmake("NoDefFormal(<term>)", $1); }
| ID '?' expr { $$ = ATmake("DefFormal(<term>, <term>)", $1, $3); }
;
%%