forked from lix-project/lix
de90fdf908
bad flex doesn't have lexical restrictions, the current solution isn't quite right...)
113 lines
2.9 KiB
Text
113 lines
2.9 KiB
Text
%option reentrant bison-bridge bison-locations
|
|
%option noyywrap
|
|
%option never-interactive
|
|
|
|
|
|
%x STRING
|
|
|
|
|
|
%{
|
|
#include <string.h>
|
|
#include <aterm2.h>
|
|
#include "parser-tab.h"
|
|
|
|
static void initLoc(YYLTYPE * loc)
|
|
{
|
|
loc->first_line = 1;
|
|
loc->first_column = 1;
|
|
}
|
|
|
|
static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
|
|
{
|
|
while (len--) {
|
|
switch (*s++) {
|
|
case '\r':
|
|
if (*s == '\n') /* cr/lf */
|
|
s++;
|
|
/* fall through */
|
|
case '\n':
|
|
++loc->first_line;
|
|
loc->first_column = 1;
|
|
break;
|
|
default:
|
|
++loc->first_column;
|
|
}
|
|
}
|
|
}
|
|
|
|
ATerm toATerm(const char * s);
|
|
ATerm unescapeStr(const char * s);
|
|
|
|
#define YY_USER_INIT initLoc(yylloc)
|
|
#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
|
|
|
|
%}
|
|
|
|
|
|
ID [a-zA-Z\_][a-zA-Z0-9\_\']*
|
|
INT [0-9]+
|
|
PATH [a-zA-Z0-9\.\_\-\+]*(\/[a-zA-Z0-9\.\_\-\+]+)+
|
|
URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+
|
|
|
|
|
|
%%
|
|
|
|
|
|
if { return IF; }
|
|
then { return THEN; }
|
|
else { return ELSE; }
|
|
assert { return ASSERT; }
|
|
with { return WITH; }
|
|
let { return LET; }
|
|
rec { return REC; }
|
|
inherit { return INHERIT; }
|
|
|
|
\=\= { return EQ; }
|
|
\!\= { return NEQ; }
|
|
\&\& { return AND; }
|
|
\|\| { return OR; }
|
|
\-\> { return IMPL; }
|
|
\/\/ { return UPDATE; }
|
|
\+\+ { return CONCAT; }
|
|
|
|
{ID} { yylval->t = toATerm(yytext); return ID; /* !!! alloc */ }
|
|
{INT} { int n = atoi(yytext); /* !!! overflow */
|
|
yylval->t = ATmake("<int>", n);
|
|
return INT;
|
|
}
|
|
|
|
\" { BEGIN(STRING); return '"'; }
|
|
<STRING>([^\$\"\\]|\$[^\{\"]|\\.)+ {
|
|
/* !!! Not quite right: we want a follow restriction on "$", it
|
|
shouldn't be followed by a "{". Right now "$\"" will be consumed
|
|
as part of a string, rather than a "$" followed by the string
|
|
terminator. Disallow "$\"" for now. */
|
|
yylval->t = unescapeStr(yytext); /* !!! alloc */
|
|
return STR;
|
|
}
|
|
<STRING>\$\{ { BEGIN(INITIAL); return DOLLAR_CURLY; }
|
|
<STRING>\" { BEGIN(INITIAL); return '"'; }
|
|
<STRING>. return yytext[0]; /* just in case: shouldn't be reached */
|
|
|
|
|
|
{PATH} { yylval->t = toATerm(yytext); return PATH; /* !!! alloc */ }
|
|
{URI} { yylval->t = toATerm(yytext); return URI; /* !!! alloc */ }
|
|
|
|
[ \t\r\n]+ /* eat up whitespace */
|
|
\#[^\r\n]* /* single-line comments */
|
|
\/\*([^*]|\*[^\/])*\*\/ /* long comments */
|
|
|
|
. return yytext[0];
|
|
|
|
|
|
%%
|
|
|
|
/* Horrible, disgusting hack: allow the parser to set the scanner
|
|
start condition back to STRING. Necessary in interpolations like
|
|
"foo${expr}bar"; after the close brace we have to go back to the
|
|
STRING state. */
|
|
void backToString(yyscan_t scanner)
|
|
{
|
|
struct yyguts_t * yyg = (struct yyguts_t*) scanner;
|
|
BEGIN(STRING);
|
|
}
|