From c5baaafae69394082817ede9e6eb3910c4601a72 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 30 Jan 2004 15:21:42 +0000 Subject: [PATCH] * Replaced the SDF parser by a substantially faster Bison/Flex parser (roughly 80x faster). The absolutely latest version of Bison (1.875c) is required for reentrant GLR support, as well as a recent version of Flex (say, 2.5.31). Note that most Unix distributions ship with the prehistoric Flex 2.5.4, which doesn't support reentrancy. --- src/libexpr/Makefile.am | 18 ++--- src/libexpr/eval.cc | 2 + src/libexpr/lexer.l | 78 ++++++++++++++++++++ src/libexpr/nix.sdf | 131 --------------------------------- src/libexpr/parser.cc | 156 +++++++++++----------------------------- src/libexpr/parser.y | 128 +++++++++++++++++++++++++++++++++ 6 files changed, 261 insertions(+), 252 deletions(-) create mode 100644 src/libexpr/lexer.l delete mode 100644 src/libexpr/nix.sdf create mode 100644 src/libexpr/parser.y diff --git a/src/libexpr/Makefile.am b/src/libexpr/Makefile.am index a11dbbda6..66a3008ed 100644 --- a/src/libexpr/Makefile.am +++ b/src/libexpr/Makefile.am @@ -1,20 +1,22 @@ noinst_LIBRARIES = libexpr.a libexpr_a_SOURCES = nixexpr.cc nixexpr.hh parser.cc parser.hh \ - eval.cc eval.hh primops.cc primops.hh nix.sdf + eval.cc eval.hh primops.cc primops.hh \ + lexer-tab.c lexer-tab.h parser-tab.c parser-tab.h AM_CXXFLAGS = \ -I.. -I../../externals/inst/include -I../libutil -I../libstore +AM_CFLAGS = \ + -I../../externals/inst/include -# Parse table generation. +# Parser generation. -parser.o: parse-table.h +parser-tab.c parser-tab.h: parser.y + ../grammartest/inst/bin/bison -v -o parser-tab.c parser.y -d -parse-table.h: nix.tbl - ../bin2c/bin2c nixParseTable < $< > $@ || (rm $@ && exit 1) +lexer-tab.c lexer-tab.h: lexer.l + flex --outfile lexer-tab.c --header-file=lexer-tab.h lexer.l -%.tbl: %.sdf - ../../externals/inst/bin/sdf2table -s -i $< -o $@ -CLEANFILES = parse-table.h nix.tbl +CLEANFILES = diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 0470deee9..77cab55d0 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -137,6 +137,8 @@ Expr evalExpr2(EvalState & state, Expr e) /* Any encountered variables must be undeclared or primops. */ if (atMatch(m, e) >> "Var" >> s1) { if (s1 == "null") return primNull(state); + if (s1 == "true") return ATmake("Bool(True)"); + if (s1 == "false") return ATmake("Bool(False)"); return e; } diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l new file mode 100644 index 000000000..705b31b41 --- /dev/null +++ b/src/libexpr/lexer.l @@ -0,0 +1,78 @@ +%option reentrant bison-bridge bison-locations +%option noyywrap +%option never-interactive + + +%{ +#include +#include +#include "parser-tab.h" + +static void initLoc(YYLTYPE * loc) +{ + loc->first_line = 1; + loc->first_column = 1; +} + +static void adjustLoc(YYLTYPE * loc, const char * s, size_t len) +{ + while (len--) { + switch (*s++) { + case '\n': + ++loc->first_line; + loc->first_column = 1; + break; + default: + ++loc->first_column; + } + } +} + +#define YY_USER_INIT initLoc(yylloc) +#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng); + +%} + + +ID [a-zA-Z\_][a-zA-Z0-9\_\']* +INT [0-9]+ +STR \"[^\n\"]*\" +PATH [a-zA-Z0-9\.\_\-\+]*(\/[a-zA-Z0-9\.\_\-\+]+)+ +URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']* + + +%% + + +if { return IF; } +then { return THEN; } +else { return ELSE; } +assert { return ASSERT; } +let { return LET; } +rec { return REC; } + +\=\= { return EQ; } +\!\= { return NEQ; } +\&\& { return AND; } +\|\| { return OR; } +\-\> { return IMPL; } + +{ID} { yylval->t = ATmake("", yytext); return ID; /* !!! alloc */ } +{INT} { return INT; } +{STR} { int len = strlen(yytext); + yytext[len - 1] = 0; + yylval->t = ATmake("", yytext + 1); + yytext[len - 1] = '\"'; + return STR; /* !!! alloc */ + } +{PATH} { yylval->t = ATmake("", yytext); return PATH; /* !!! alloc */ } +{URI} { yylval->t = ATmake("", yytext); return URI; /* !!! alloc */ } + +[ \t\n]+ /* eat up whitespace */ +\#[^\n]* /* single-line comments */ +\/\*(.|\n)*\*\/ /* long comments */ + +. return yytext[0]; + + +%% diff --git a/src/libexpr/nix.sdf b/src/libexpr/nix.sdf deleted file mode 100644 index 36fbef39c..000000000 --- a/src/libexpr/nix.sdf +++ /dev/null @@ -1,131 +0,0 @@ -definition - -module Main -imports Fix - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% Top level syntax. - -module Fix -imports Fix-Exprs Fix-Layout - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% Expressions. - -module Fix-Exprs -imports Fix-Lexicals -exports - sorts Expr Formal Bind Binds ExprList - context-free syntax - - Id -> Expr {cons("Var")} - - Int -> Expr {cons("Int")} - - Str -> Expr {cons("Str")} - - Uri -> Expr {cons("Uri")} - - Path -> Expr {cons("Path")} - - "(" Expr ")" -> Expr {bracket} - - Expr Expr -> Expr {cons("Call"), left} - - "{" {Formal ","}* "}" ":" Expr -> Expr {cons("Function")} - Id -> Formal {cons("NoDefFormal")} - Id "?" Expr -> Formal {cons("DefFormal")} - - "assert" Expr ";" Expr -> Expr {cons("Assert")} - - "rec" "{" Binds "}" -> Expr {cons("Rec")} - "let" "{" Binds "}" -> Expr {cons("LetRec")} - "{" Binds "}" -> Expr {cons("Attrs")} - - Bind* -> Binds - Id "=" Expr ";" -> Bind {cons("Bind")} - - "[" ExprList "]" -> Expr {cons("List")} - "" -> ExprList {cons("ExprNil")} - Expr ExprList -> ExprList {cons("ExprCons")} - - Expr "." Id -> Expr {cons("Select")} - - "if" Expr "then" Expr "else" Expr -> Expr {cons("If")} - - Expr "==" Expr -> Expr {cons("OpEq"), non-assoc} - Expr "!=" Expr -> Expr {cons("OpNEq"), non-assoc} - - "!" Expr -> Expr {cons("OpNot")} - Expr "&&" Expr -> Expr {cons("OpAnd"), right} - Expr "||" Expr -> Expr {cons("OpOr"), right} - Expr "->" Expr -> Expr {cons("OpImpl"), right} - - context-free priorities - - Expr "." Id -> Expr - > Expr ExprList -> ExprList - > Expr Expr -> Expr - > "!" Expr -> Expr - > Expr "==" Expr -> Expr - > Expr "!=" Expr -> Expr - > Expr "&&" Expr -> Expr - > Expr "||" Expr -> Expr - > Expr "->" Expr -> Expr - > "assert" Expr ";" Expr -> Expr - > "{" {Formal ","}* "}" ":" Expr -> Expr - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% Lexical syntax. - -module Fix-Lexicals -exports - sorts Id Int Str Path PathComp Uri - lexical syntax - [a-zA-Z\_][a-zA-Z0-9\_\']* -> Id - "rec" -> Id {reject} - "let" -> Id {reject} - "if" -> Id {reject} - "then" -> Id {reject} - "else" -> Id {reject} - "assert" -> Id {reject} - - [0-9]+ -> Int - - "\"" ~[\n\"]* "\"" -> Str - - "." ("/" PathComp)+ -> Path - ".." ("/" PathComp)+ -> Path - ("/" PathComp)+ -> Path - [a-zA-Z0-9\.\_\-\+]+ -> PathComp - - [a-zA-Z] [a-zA-Z0-9\+\-\.]* ":" [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']* -> Uri - - lexical restrictions - Id -/- [a-zA-Z0-9\_\'] - Int -/- [0-9] - Path -/- [a-zA-Z0-9\.\_\-\+\/] - Uri -/- [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\'] - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% Layout. - -module Fix-Layout -exports - sorts HashComment Asterisk Comment - lexical syntax - [\ \t\n] -> LAYOUT - HashComment -> LAYOUT - Comment -> LAYOUT - "#" ~[\n]* -> HashComment - "/*" ( ~[\*] | Asterisk )* "*/" -> Comment - [\*] -> Asterisk - lexical restrictions - Asterisk -/- [\/] - HashComment -/- ~[\n] - context-free restrictions - LAYOUT? -/- [\ \t\n] diff --git a/src/libexpr/parser.cc b/src/libexpr/parser.cc index 1e0ef0c45..167c34bd8 100644 --- a/src/libexpr/parser.cc +++ b/src/libexpr/parser.cc @@ -5,133 +5,63 @@ #include #include -extern "C" { -#include -#include -} - #include "aterm.hh" #include "parser.hh" -#include "parse-table.h" -/* Cleanup cleans up an imploded parse tree into an actual abstract - syntax tree that we can evaluate. It removes quotes around - strings, converts integer literals into actual integers, and - absolutises paths relative to the directory containing the input - file. */ -struct Cleanup : TermFun +struct ParseData { + Expr result; string basePath; - - virtual ATerm operator () (ATerm e) - { - checkInterrupt(); - - ATMatcher m; - string s; - - if (atMatch(m, e) >> "Str" >> s) - return ATmake("Str()", - string(s, 1, s.size() - 2).c_str()); - - if (atMatch(m, e) >> "Path" >> s) - return ATmake("Path()", absPath(s, basePath).c_str()); - - if (atMatch(m, e) >> "Int" >> s) { - istringstream s2(s); - int n; - s2 >> n; - return ATmake("Int()", n); - } - - if (atMatch(m, e) >> "Var" >> "true") - return ATmake("Bool(True)"); - - if (atMatch(m, e) >> "Var" >> "false") - return ATmake("Bool(False)"); - - if (atMatch(m, e) >> "ExprNil") - return (ATerm) ATempty; - - ATerm e1; - ATermList e2; - if (atMatch(m, e) >> "ExprCons" >> e1 >> e2) - return (ATerm) ATinsert(e2, e1); - - return e; - } + string location; + string error; }; +extern "C" { + +#include "parser-tab.h" +#include "lexer-tab.h" + + /* Callbacks for getting from C to C++. Due to a (small) bug in the + GLR code of Bison we cannot currently compile the parser as C++ + code. */ + + void setParseResult(ParseData * data, ATerm t) + { + data->result = t; + } + + ATerm absParsedPath(ParseData * data, ATerm t) + { + return string2ATerm(absPath(aterm2String(t), data->basePath).c_str()); + } + + void parseError(ParseData * data, char * error, int line, int column) + { + data->error = (format("%1%, at line %2%, column %3%, of %4%") + % error % line % column % data->location).str(); + } + + int yyparse(yyscan_t scanner, ParseData * data); +} + static Expr parse(const char * text, const string & location, const Path & basePath) { - /* Initialise the SDF libraries. */ - static bool initialised = false; - static ATerm parseTable = 0; - static language lang = 0; + yyscan_t scanner; + ParseData data; + data.basePath = basePath; + data.location = location; - if (!initialised) { - PT_initMEPTApi(); - PT_initAsFix2Api(); - SGinitParser(ATfalse); - - ATprotect(&parseTable); - parseTable = ATreadFromBinaryString( - (char *) nixParseTable, sizeof nixParseTable); - if (!parseTable) - throw Error(format("cannot construct parse table term")); - - ATprotect(&lang); - lang = ATmake("Nix"); - if (!SGopenLanguageFromTerm("nix-parse", lang, parseTable)) - throw Error(format("cannot open language")); - - SG_STARTSYMBOL_ON(); - SG_OUTPUT_ON(); - SG_ASFIX2ME_ON(); - SG_AMBIGUITY_ERROR_ON(); - SG_FILTER_OFF(); - - initialised = true; - } - - /* Parse it. */ - ATerm result = SGparseString(lang, "Expr", (char *) text); - if (!result) - throw SysError(format("parse failed in `%1%'") % location); - if (SGisParseError(result)) - throw Error(format("parse error in `%1%': %2%") - % location % result); - - /* Implode it. */ - PT_ParseTree tree = PT_makeParseTreeFromTerm(result); - if (!tree) - throw Error(format("cannot create parse tree")); + yylex_init(&scanner); + yy_scan_string(text, scanner); + int res = yyparse(scanner, &data); + yylex_destroy(scanner); - ATerm imploded = PT_implodeParseTree(tree, - ATtrue, - ATtrue, - ATtrue, - ATtrue, - ATtrue, - ATtrue, - ATfalse, - ATtrue, - ATtrue, - ATtrue, - ATfalse); - if (!imploded) - throw Error(format("cannot implode parse tree")); + if (res) throw Error(data.error); - printMsg(lvlVomit, format("imploded parse tree of `%1%': %2%") - % location % imploded); - - /* Finally, clean it up. */ - Cleanup cleanup; - cleanup.basePath = basePath; - return bottomupRewrite(cleanup, imploded); + return data.result; } @@ -171,7 +101,7 @@ Expr parseExprFromFile(Path path) readFull(fd, (unsigned char *) text, st.st_size); text[st.st_size] = 0; - return parse(text, path, dirOf(path)); + return parse(text, "`" + path + "'", dirOf(path)); } diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y new file mode 100644 index 000000000..45e6a98e8 --- /dev/null +++ b/src/libexpr/parser.y @@ -0,0 +1,128 @@ +%glr-parser +%pure-parser +%locations +%error-verbose +%parse-param { yyscan_t scanner } +%parse-param { void * data } +%lex-param { yyscan_t scanner } + +%{ +#include +#include +#include +#include + +#include "parser-tab.h" +#include "lexer-tab.h" + +void setParseResult(void * data, ATerm t); +void parseError(void * data, char * error, int line, int column); +ATerm absParsedPath(void * data, ATerm t); + +void yyerror(YYLTYPE * loc, yyscan_t scanner, void * data, char * s) +{ + parseError(data, s, loc->first_line, loc->first_column); +} + +%} + +%union { + ATerm t; + ATermList ts; +} + +%type start expr expr_function expr_assert expr_op +%type expr_app expr_select expr_simple bind formal +%type binds expr_list formals +%token ID INT STR PATH URI +%token IF THEN ELSE ASSERT LET REC EQ NEQ AND OR IMPL + +%nonassoc IMPL +%left OR +%left AND +%nonassoc EQ NEQ +%left NEG + +%% + +start: expr { setParseResult(data, $1); }; + +expr: expr_function; + +expr_function + : '{' formals '}' ':' expr_function + { $$ = ATmake("Function(, )", $2, $5); } + | expr_assert + ; + +expr_assert + : ASSERT expr ';' expr_assert + { $$ = ATmake("Assert(, )", $2, $4); } + | expr_op + ; + +expr_op + : '!' expr_op %prec NEG { $$ = ATmake("OpNot()", $2); } + | expr_op EQ expr_op { $$ = ATmake("OpEq(, )", $1, $3); } + | expr_op NEQ expr_op { $$ = ATmake("OpNEq(, )", $1, $3); } + | expr_op AND expr_op { $$ = ATmake("OpAnd(, )", $1, $3); } + | expr_op OR expr_op { $$ = ATmake("OpOr(, )", $1, $3); } + | expr_op IMPL expr_op { $$ = ATmake("OpImpl(, )", $1, $3); } + | expr_app + ; + +expr_app + : expr_app expr_select + { $$ = ATmake("Call(, )", $1, $2); } + | expr_select { $$ = $1; } + ; + +expr_select + : expr_select '.' ID + { $$ = ATmake("Select(, )", $1, $3); } + | expr_simple { $$ = $1; } + ; + +expr_simple + : ID { $$ = ATmake("Var()", $1); } + | STR { $$ = ATmake("Str()", $1); } + | PATH { $$ = ATmake("Path()", absParsedPath(data, $1)); } + | URI { $$ = ATmake("Uri()", $1); } + | '(' expr ')' { $$ = $2; } + | LET '{' binds '}' { $$ = ATmake("LetRec()", $3); } + | REC '{' binds '}' { $$ = ATmake("Rec()", $3); } + | '{' binds '}' { $$ = ATmake("Attrs()", $2); } + | '[' expr_list ']' { $$ = ATmake("List()", $2); } + | IF expr THEN expr ELSE expr + { $$ = ATmake("If(, , )", $2, $4, $6); } + ; + +binds + : binds bind { $$ = ATinsert($1, $2); } + | { $$ = ATempty; } + ; + +bind + : ID '=' expr ';' + { $$ = ATmake("Bind(, )", $1, $3); } + ; + +expr_list + : expr_select expr_list { $$ = ATinsert($2, $1); } + /* yes, this is right-recursive, but it doesn't matter since + otherwise we would need ATreverse which requires unbounded + stack space */ + | { $$ = ATempty; } + ; + +formals + : formal ',' formals { $$ = ATinsert($3, $1); } /* idem - right recursive */ + | formal { $$ = ATinsert(ATempty, $1); } + ; + +formal + : ID { $$ = ATmake("NoDefFormal()", $1); } + | ID '?' expr { $$ = ATmake("DefFormal(, )", $1, $3); } + ; + +%%