From b95a3dc45bcbbe8a0985bab82146ed00afcf0239 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 29 Oct 2003 16:05:03 +0000 Subject: [PATCH] * Basic grammar and parser for the Fix language. We use libsglr and friends to do the parsing. The parse table is embedded in the Fix executable using bin2c, which converts an arbitrary file into a C character array. --- src/fix-ng/Makefile.am | 21 +++++- src/fix-ng/bin2c.c | 23 ++++++ src/fix-ng/fix.cc | 7 +- src/fix-ng/fix.sdf | 163 +++++++++++++++++++++++++++++++++++++++++ src/fix-ng/parser.cc | 76 +++++++++++++++++++ src/fix-ng/parser.hh | 15 ++++ 6 files changed, 298 insertions(+), 7 deletions(-) create mode 100644 src/fix-ng/bin2c.c create mode 100644 src/fix-ng/fix.sdf create mode 100644 src/fix-ng/parser.cc create mode 100644 src/fix-ng/parser.hh diff --git a/src/fix-ng/Makefile.am b/src/fix-ng/Makefile.am index 64e8b2ed3..3672c3dc9 100644 --- a/src/fix-ng/Makefile.am +++ b/src/fix-ng/Makefile.am @@ -1,8 +1,25 @@ bin_PROGRAMS = fix-ng -fix_ng_SOURCES = fix.cc +fix_ng_SOURCES = fix.cc parser.cc fix_ng_LDADD = ../libmain/libmain.a ../libnix/libnix.a ../boost/format/libformat.a \ - -L../../externals/inst/lib -ldb_cxx -lATerm + -L../../externals/inst/lib -ldb_cxx -lsglr -lATB -lconversion -lasfix2 -lmept -lATerm AM_CXXFLAGS = \ -I.. -I../../externals/inst/include -I../libnix -I../libmain + + +# Parse table generation. + +parser.o: parse-table.h + +parse-table.h: fix.tbl bin2c + ./bin2c fixParseTable < $< > $@ || (rm $@ && exit 1) + +noinst_PROGRAMS = bin2c + +bin2c_SOURCES = bin2c.c + +%.tbl: %.sdf + ../../externals/inst/bin/sdf2table -i $< -o $@ + +CLEANFILES = parse-table.h fix.tbl diff --git a/src/fix-ng/bin2c.c b/src/fix-ng/bin2c.c new file mode 100644 index 000000000..18bf81d69 --- /dev/null +++ b/src/fix-ng/bin2c.c @@ -0,0 +1,23 @@ +#include +#include +#include + +void print(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + if (vprintf(format, ap) < 0) abort(); + va_end(ap); +} + +int main(int argc, char * * argv) +{ + int c; + if (argc != 2) abort(); + print("static unsigned char %s[] = {", argv[1]); + while ((c = getchar()) != EOF) { + print("0x%02x, ", (unsigned char) c); + } + print("};\n"); + return 0; +} diff --git a/src/fix-ng/fix.cc b/src/fix-ng/fix.cc index 9a8ff1513..e13413bb4 100644 --- a/src/fix-ng/fix.cc +++ b/src/fix-ng/fix.cc @@ -1,13 +1,12 @@ #include #include +#include "parser.hh" #include "globals.hh" #include "normalise.hh" #include "shared.hh" -typedef ATerm Expr; - typedef map NormalForms; typedef map PkgPaths; typedef map PkgHashes; @@ -406,9 +405,7 @@ static Expr evalFile(EvalState & state, const Path & relPath) { Path path = searchPath(state.searchDirs, relPath); Nest nest(lvlTalkative, format("evaluating file `%1%'") % path); - Expr e = ATreadFromNamedFile(path.c_str()); - if (!e) - throw Error(format("unable to read a term from `%1%'") % path); + Expr e = parseExprFromFile(path); return evalExpr(state, e); } diff --git a/src/fix-ng/fix.sdf b/src/fix-ng/fix.sdf new file mode 100644 index 000000000..72f3e694d --- /dev/null +++ b/src/fix-ng/fix.sdf @@ -0,0 +1,163 @@ +definition + +module Main +imports Fix + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Top level syntax. + +module Fix +imports Fix-Exprs Fix-Layout + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Expressions. + +module Fix-Exprs +imports Fix-Lexicals URI +exports + sorts Expr Bind + context-free syntax + + Id + -> Expr {cons("Var")} + + Int + -> Expr {cons("Int")} + + Str + -> Expr {cons("Str")} + + Uri + -> Expr {cons("Uri")} + + Path + -> Expr {cons("Path")} + + "(" Expr ")" + -> Expr {bracket} + + Expr Expr + -> Expr {cons("Call"), left} + + "{" {Id ","}* "}" ":" Expr + -> Expr {cons("Function"), right} + + "{" {Bind ","}+ "}" + -> Expr {cons("Attrs")} + + Id "=" Expr + -> Bind {cons("Bind")} + + "[" {Expr ","}* "]" + -> Expr {cons("List")} + + context-free priorities + + Expr Expr -> Expr + > "{" {Id ","}* "}" ":" Expr -> Expr + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Lexical syntax. + +module Fix-Lexicals +exports + sorts Id Path + lexical syntax + [a-zA-Z\_][a-zA-Z0-9\_\']* -> Id + [0-9]+ -> Int + "\"" ~[\n\"]* "\"" -> Str + PathComp ("/" PathComp)+ -> Path + [a-zA-Z0-9\.\_\-]+ -> PathComp + lexical restrictions + Id -/- [a-zA-Z0-9\_\'] + Int -/- [0-9] + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% URIs (RFC 2396, appendix A). + +module URI +exports + sorts Uri + lexical syntax + Uscheme ":" (Uhierpath | Uopaquepath) -> Uri + + (Unetpath | Uabspath) ("?" Uquery)? -> Uhierpath + Uuricnoslash Uuric* -> Uopaquepath + + Uunreserved | Uescaped | [\;\?\:\@\&\=\+\$\,] -> Uuricnoslash + + "//" Uauthority Uabspath? -> Unetpath + "/" Upathsegments -> Uabspath + Urelsegment Uabspath? -> Urelpath + + (Uunreserved | Uescaped | [\;\@\&\=\+\$\,])+ -> Urelsegment + + Ualpha (Ualpha | Udigit | [\+\-\.])* -> Uscheme + + Userver | Uregname -> Uauthority + + (Uunreserved | Uescaped | [\$\,\;\:\@\&\=\+])+ -> Uregname + + ((Uuserinfo "@") Uhostport) -> Userver + (Uunreserved | Uescaped | [\;\:\&\=\+\$\,])* -> Uuserinfo + + Uhost (":" Uport)? -> Uhostport + Uhostname | UIPv4address -> Uhost + (Udomainlabel ".")+ Utoplabel "."? -> Uhostname + Ualphanum | Ualphanum (Ualphanum | "-")* Ualphanum -> Udomainlabel + Ualpha | Ualpha (Ualphanum | "-")* Ualphanum -> Utoplabel + Udigit+ "." Udigit+ "." Udigit+ "." Udigit+ -> UIPv4address + Udigit* -> Uport + + Uabspath | Uopaquepart -> Upath + Usegment ("/" Usegment)* -> Upathsegments + Upchar* (";" Uparam)* -> Usegment + Upchar* -> Uparam + Uunreserved | Uescaped | [\:\@\&\=\+\$\,] -> Upchar + + Uuric* -> Uquery + + Uuric* -> Ufragment + + Ureserved | Uunreserved | Uescaped -> Uuric + [\;\/\?\:\@\&\=\+\$\,] -> Ureserved + Ualphanum | Umark -> Uunreserved + [\-\_\.\!\~\*\'\(\)] -> Umark + + "%" Uhex Uhex -> Uescaped + Udigit | [A-Fa-f] -> Uhex + + Ualpha | Udigit -> Ualphanum + Ulowalpha | Uupalpha -> Ualpha + + [a-z] -> Ulowalpha + [A-Z] -> Uupalpha + [0-9] -> Udigit + + lexical restrictions + Uri -/- [a-zA-Z0-9\-\_\.\!\~\*\'\(\)] + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Layout. + +module Fix-Layout +exports + lexical syntax + [\ \t\n] -> LAYOUT + HashComment -> LAYOUT + Comment -> LAYOUT + "#" ~[\n]* [\n] -> HashComment + "//" ~[\n]* [\n] -> HashComment + "/*" ( ~[\*] | Asterisk )* "*/" -> Comment + [\*] -> Asterisk + lexical restrictions + Asterisk -/- [\/] + context-free restrictions + LAYOUT? -/- [\ \t\n] | [\#] + syntax + HashComment -> diff --git a/src/fix-ng/parser.cc b/src/fix-ng/parser.cc new file mode 100644 index 000000000..b2f0ed05d --- /dev/null +++ b/src/fix-ng/parser.cc @@ -0,0 +1,76 @@ +extern "C" { +#include +#include +} + +#include "parser.hh" +#include "shared.hh" +#include "expr.hh" +#include "parse-table.h" + + +Expr parseExprFromFile(const Path & path) +{ + /* Perhaps this is already an imploded parse tree? */ + Expr e = ATreadFromNamedFile(path.c_str()); + if (e) return e; + + /* Initialise the SDF libraries. */ + static bool initialised = false; + static ATerm parseTable = 0; + static language lang = 0; + + if (!initialised) { + PT_initMEPTApi(); + PT_initAsFix2Api(); + SGinitParser(ATfalse); + + ATprotect(&parseTable); + parseTable = ATreadFromBinaryString( + (char *) fixParseTable, sizeof fixParseTable); + if (!parseTable) + throw Error(format("cannot construct parse table term")); + + ATprotect(&lang); + lang = ATmake("Fix"); + if (!SGopenLanguageFromTerm( + (char *) programId.c_str(), lang, parseTable)) + throw Error(format("cannot open language")); + + SG_STARTSYMBOL_ON(); + SG_OUTPUT_ON(); + SG_ASFIX2ME_ON(); + SG_AMBIGUITY_ERROR_ON(); + + initialised = true; + } + + ATerm result = SGparseFile((char *) programId.c_str(), lang, + "Expr", (char *) path.c_str()); + if (!result) + throw SysError(format("parse failed in `%1%'") % path); + if (SGisParseError(result)) + throw Error(format("parse error in `%1%': %2%") + % path % printTerm(result)); + + PT_ParseTree tree = PT_makeParseTreeFromTerm(result); + if (!tree) + throw Error(format("cannot create parse tree")); + + ATerm imploded = PT_implodeParseTree(tree, + ATtrue, + ATtrue, + ATtrue, + ATtrue, + ATtrue, + ATtrue, + ATfalse, + ATtrue, + ATtrue, + ATtrue, + ATfalse); + if (!imploded) + throw Error(format("cannot implode parse tree")); + + return imploded; +} diff --git a/src/fix-ng/parser.hh b/src/fix-ng/parser.hh new file mode 100644 index 000000000..80e266f2d --- /dev/null +++ b/src/fix-ng/parser.hh @@ -0,0 +1,15 @@ +#ifndef __PARSER_H +#define __PARSER_H + +#include +#include + +#include "util.hh" + + +typedef ATerm Expr; + +Expr parseExprFromFile(const Path & path); + + +#endif /* !__PARSER_H */