forked from lix-project/lix
libexpr: Recognize newline in more places in lexer
Flex's regexes have an annoying feature: the dot matches everything except a newline. This causes problems for expressions like: "${0}\ " where the backslash-newline combination matches this rule instead of the intended one mentioned in the comment: <STRING>\$|\\|\$\\ { /* This can only occur when we reach EOF, otherwise the above (...|\$[^\{\"\\]|\\.|\$\\.)+ would have triggered. This is technically invalid, but we leave the problem to the parser who fails with exact location. */ return STR; } However, the parser actually accepts the resulting token sequence ('"' DOLLAR_CURLY 0 '}' STR '"'), which is a problem because the lexer rule didn't assign anything to yylval. Ultimately this leads to a crash when dereferencing a NULL pointer in ExprConcatStrings::bindVars(). The fix does change the syntax of the language in some corner cases but I think it's only turning previously invalid (or crashing) syntax to valid syntax. E.g. "a\ b" and ''a''\ b'' were previously syntax errors but now both result in "a\nb". Found by afl-fuzz.
This commit is contained in:
parent
939cf4cceb
commit
a0e38c16bc
|
@ -85,6 +85,7 @@ static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length)
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
|
||||||
|
ANY .|\n
|
||||||
ID [a-zA-Z\_][a-zA-Z0-9\_\'\-]*
|
ID [a-zA-Z\_][a-zA-Z0-9\_\'\-]*
|
||||||
INT [0-9]+
|
INT [0-9]+
|
||||||
FLOAT (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?
|
FLOAT (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?
|
||||||
|
@ -146,8 +147,8 @@ or { return OR_KW; }
|
||||||
<INITIAL,INSIDE_DOLLAR_CURLY>\" {
|
<INITIAL,INSIDE_DOLLAR_CURLY>\" {
|
||||||
PUSH_STATE(STRING); return '"';
|
PUSH_STATE(STRING); return '"';
|
||||||
}
|
}
|
||||||
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\.|\$\\.)*\$/\" |
|
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})*\$/\" |
|
||||||
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\.|\$\\.)+ {
|
<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})+ {
|
||||||
/* It is impossible to match strings ending with '$' with one
|
/* It is impossible to match strings ending with '$' with one
|
||||||
regex because trailing contexts are only valid at the end
|
regex because trailing contexts are only valid at the end
|
||||||
of a rule. (A sane but undocumented limitation.) */
|
of a rule. (A sane but undocumented limitation.) */
|
||||||
|
@ -178,7 +179,7 @@ or { return OR_KW; }
|
||||||
yylval->e = new ExprIndStr("''");
|
yylval->e = new ExprIndStr("''");
|
||||||
return IND_STR;
|
return IND_STR;
|
||||||
}
|
}
|
||||||
<IND_STRING>\'\'\\. {
|
<IND_STRING>\'\'\\{ANY} {
|
||||||
yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
|
yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
|
||||||
return IND_STR;
|
return IND_STR;
|
||||||
}
|
}
|
||||||
|
@ -208,7 +209,7 @@ or { return OR_KW; }
|
||||||
\#[^\r\n]* /* single-line comments */
|
\#[^\r\n]* /* single-line comments */
|
||||||
\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */
|
\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */
|
||||||
|
|
||||||
. return yytext[0];
|
{ANY} return yytext[0];
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
1
tests/lang/eval-okay-backslash-newline-1.exp
Normal file
1
tests/lang/eval-okay-backslash-newline-1.exp
Normal file
|
@ -0,0 +1 @@
|
||||||
|
"a\nb"
|
2
tests/lang/eval-okay-backslash-newline-1.nix
Normal file
2
tests/lang/eval-okay-backslash-newline-1.nix
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
"a\
|
||||||
|
b"
|
1
tests/lang/eval-okay-backslash-newline-2.exp
Normal file
1
tests/lang/eval-okay-backslash-newline-2.exp
Normal file
|
@ -0,0 +1 @@
|
||||||
|
"a\nb"
|
2
tests/lang/eval-okay-backslash-newline-2.nix
Normal file
2
tests/lang/eval-okay-backslash-newline-2.nix
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
''a''\
|
||||||
|
b''
|
Loading…
Reference in a new issue